mirror of
https://git.bakhai.co.in/FbIN/4Get.git
synced 2025-11-08 13:00:07 +05:30
commit
c6e404d2af
132 changed files with 34951 additions and 0 deletions
178
lib/backend.php
Normal file
178
lib/backend.php
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
<?php
|
||||
class backend{
|
||||
|
||||
public function __construct($scraper){
|
||||
|
||||
$this->scraper = $scraper;
|
||||
}
|
||||
|
||||
/*
|
||||
Proxy stuff
|
||||
*/
|
||||
public function get_ip(){
|
||||
|
||||
$pool = constant("config::PROXY_" . strtoupper($this->scraper));
|
||||
if($pool === false){
|
||||
|
||||
// we don't want a proxy, fuck off!
|
||||
return 'raw_ip::::';
|
||||
}
|
||||
|
||||
// indent
|
||||
$proxy_index_raw = apcu_inc("p." . $this->scraper);
|
||||
|
||||
$proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
|
||||
$proxylist = explode("\n", $proxylist);
|
||||
|
||||
// ignore empty or commented lines
|
||||
$proxylist = array_filter($proxylist, function($entry){
|
||||
$entry = ltrim($entry);
|
||||
return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
|
||||
});
|
||||
|
||||
$proxylist = array_values($proxylist);
|
||||
|
||||
return $proxylist[$proxy_index_raw % count($proxylist)];
|
||||
}
|
||||
|
||||
// this function is also called directly on nextpage
|
||||
public function assign_proxy(&$curlproc, string $ip){
|
||||
|
||||
// parse proxy line
|
||||
[
|
||||
$type,
|
||||
$address,
|
||||
$port,
|
||||
$username,
|
||||
$password
|
||||
] = explode(":", $ip, 5);
|
||||
|
||||
switch($type){
|
||||
|
||||
case "raw_ip":
|
||||
return;
|
||||
break;
|
||||
|
||||
case "http":
|
||||
case "https":
|
||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
||||
curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
|
||||
break;
|
||||
|
||||
case "socks4":
|
||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
|
||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||
break;
|
||||
|
||||
case "socks5":
|
||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||
break;
|
||||
|
||||
case "socks4a":
|
||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||
break;
|
||||
|
||||
case "socks5_hostname":
|
||||
case "socks5h":
|
||||
case "socks5a":
|
||||
curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
|
||||
curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
|
||||
break;
|
||||
}
|
||||
|
||||
if($username != ""){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Next page stuff
|
||||
*/
|
||||
public function store(string $payload, string $page, string $proxy){
|
||||
|
||||
$key = sodium_crypto_secretbox_keygen();
|
||||
$nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
|
||||
|
||||
$requestid = apcu_inc("requestid");
|
||||
|
||||
apcu_store(
|
||||
$page[0] . "." . // first letter of page name
|
||||
$this->scraper . // scraper name
|
||||
$requestid,
|
||||
[
|
||||
$nonce,
|
||||
$proxy,
|
||||
// compress and encrypt
|
||||
sodium_crypto_secretbox(
|
||||
gzdeflate($payload),
|
||||
$nonce,
|
||||
$key
|
||||
)
|
||||
],
|
||||
900 // cache information for 15 minutes
|
||||
);
|
||||
|
||||
return
|
||||
$this->scraper . $requestid . "." .
|
||||
rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
|
||||
}
|
||||
|
||||
public function get(string $npt, string $page){
|
||||
|
||||
$page = $page[0];
|
||||
$explode = explode(".", $npt, 2);
|
||||
|
||||
if(count($explode) !== 2){
|
||||
|
||||
throw new Exception("Malformed nextPageToken!");
|
||||
}
|
||||
|
||||
$apcu = $page . "." . $explode[0];
|
||||
$key = $explode[1];
|
||||
|
||||
$payload = apcu_fetch($apcu);
|
||||
|
||||
if($payload === false){
|
||||
|
||||
throw new Exception("The next page token is invalid or has expired!");
|
||||
}
|
||||
|
||||
$key =
|
||||
base64_decode(
|
||||
str_pad(
|
||||
strtr($key, '-_', '+/'),
|
||||
strlen($key) % 4,
|
||||
'=',
|
||||
STR_PAD_RIGHT
|
||||
)
|
||||
);
|
||||
|
||||
// decrypt and decompress data
|
||||
$payload[2] =
|
||||
gzinflate(
|
||||
sodium_crypto_secretbox_open(
|
||||
$payload[2], // data
|
||||
$payload[0], // nonce
|
||||
$key
|
||||
)
|
||||
);
|
||||
|
||||
if($payload[2] === false){
|
||||
|
||||
throw new Exception("The next page token is invalid or has expired!");
|
||||
}
|
||||
|
||||
// remove the key after using successfully
|
||||
apcu_delete($apcu);
|
||||
|
||||
return [
|
||||
$payload[2], // data
|
||||
$payload[1] // proxy
|
||||
];
|
||||
}
|
||||
}
|
||||
144
lib/bingcache-todo-fix.php
Normal file
144
lib/bingcache-todo-fix.php
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
<?php
|
||||
|
||||
// https://www.bing.com/search?q=url%3Ahttps%3A%2F%2Fflossboxin.org.in
|
||||
// https://cc.bingj.com/cache.aspx?q=url%3ahttps%3a%2f%2f4g.flossboxin.org.in&d=4769685974291356&mkt=en-DE&setlang=en-GB&w=tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-
|
||||
// <div class="b_attribution" u="0N|5119|4769685974291356|tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-" tabindex="0">
|
||||
|
||||
new bingcache();
|
||||
|
||||
class bingcache{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
if(
|
||||
!isset($_GET["s"]) ||
|
||||
$this->validate_url($_GET["s"]) === false
|
||||
){
|
||||
|
||||
var_dump($this->validate_url($_GET["s"]));
|
||||
$this->do404("Please provide a valid URL.");
|
||||
}
|
||||
|
||||
$url = $_GET["s"];
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
curl_setopt(
|
||||
$curlproc,
|
||||
CURLOPT_URL,
|
||||
"https://www.bing.com/search?q=url%3A" .
|
||||
urlencode($url)
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt(
|
||||
$curlproc,
|
||||
CURLOPT_HTTPHEADER,
|
||||
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
$this->do404("Failed to connect to bing servers. Please try again later.");
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
|
||||
preg_match(
|
||||
'/<div class="b_attribution" u="(.*)" tabindex="0">/',
|
||||
$data,
|
||||
$keys
|
||||
);
|
||||
|
||||
print_r($keys);
|
||||
|
||||
if(count($keys) === 0){
|
||||
|
||||
$this->do404("Bing has not archived this URL.");
|
||||
}
|
||||
|
||||
$keys = explode("|", $keys[1]);
|
||||
$count = count($keys);
|
||||
|
||||
//header("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
|
||||
echo("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
|
||||
}
|
||||
|
||||
public function do404($text){
|
||||
|
||||
include "lib/frontend.php";
|
||||
$frontend = new frontend();
|
||||
|
||||
echo
|
||||
$frontend->load(
|
||||
"error.html",
|
||||
[
|
||||
"title" => "Shit",
|
||||
"text" => $text
|
||||
]
|
||||
);
|
||||
|
||||
die();
|
||||
}
|
||||
|
||||
public function validate_url($url){
|
||||
|
||||
$url_parts = parse_url($url);
|
||||
|
||||
// check if required parts are there
|
||||
if(
|
||||
!isset($url_parts["scheme"]) ||
|
||||
!(
|
||||
$url_parts["scheme"] == "http" ||
|
||||
$url_parts["scheme"] == "https"
|
||||
) ||
|
||||
!isset($url_parts["host"])
|
||||
){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(
|
||||
// if its not an RFC-valid URL
|
||||
!filter_var($url, FILTER_VALIDATE_URL)
|
||||
){
|
||||
return false;
|
||||
}
|
||||
|
||||
$ip =
|
||||
str_replace(
|
||||
["[", "]"], // handle ipv6
|
||||
"",
|
||||
$url_parts["host"]
|
||||
);
|
||||
|
||||
// if its not an IP
|
||||
if(!filter_var($ip, FILTER_VALIDATE_IP)){
|
||||
|
||||
// resolve domain's IP
|
||||
$ip = gethostbyname($url_parts["host"] . ".");
|
||||
}
|
||||
|
||||
// check if its localhost
|
||||
return filter_var(
|
||||
$ip,
|
||||
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
|
||||
);
|
||||
}
|
||||
}
|
||||
281
lib/bot_protection.php
Normal file
281
lib/bot_protection.php
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
<?php
|
||||
|
||||
class bot_protection{
|
||||
|
||||
public function __construct($frontend, $get, $filters, $page, $output){
|
||||
|
||||
// check if we want captcha
|
||||
if(config::BOT_PROTECTION !== 1){
|
||||
|
||||
apcu_inc("real_requests");
|
||||
if($output === true){
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
$filters,
|
||||
$page
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
Validate cookie, if it exists
|
||||
*/
|
||||
if(isset($_COOKIE["pass"])){
|
||||
|
||||
if(
|
||||
// check if key is not malformed
|
||||
preg_match(
|
||||
'/^k[0-9]+\.[A-Za-z0-9_]{20}$/',
|
||||
$_COOKIE["pass"]
|
||||
) &&
|
||||
// does key exist
|
||||
apcu_exists($_COOKIE["pass"])
|
||||
){
|
||||
|
||||
// exists, increment counter
|
||||
$inc = apcu_inc($_COOKIE["pass"]);
|
||||
|
||||
// we start counting from 1
|
||||
// when it has been incremented to 102, it has reached
|
||||
// 100 reqs
|
||||
if($inc >= config::MAX_SEARCHES + 2){
|
||||
|
||||
// reached limit, delete and give captcha
|
||||
apcu_delete($_COOKIE["pass"]);
|
||||
}else{
|
||||
|
||||
// the cookie is OK! dont die() and give results
|
||||
apcu_inc("real_requests");
|
||||
|
||||
if($output === true){
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
$filters,
|
||||
$page
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if($output === false){
|
||||
|
||||
http_response_code(401); // forbidden
|
||||
echo json_encode([
|
||||
"status" => "The \"pass\" token in your cookies is missing or has expired!!"
|
||||
]);
|
||||
die();
|
||||
}
|
||||
|
||||
/*
|
||||
Validate form data
|
||||
*/
|
||||
$lines =
|
||||
explode(
|
||||
"\r\n",
|
||||
file_get_contents("php://input")
|
||||
);
|
||||
|
||||
$invalid = false;
|
||||
$answers = [];
|
||||
$key = false;
|
||||
$error = "";
|
||||
|
||||
foreach($lines as $line){
|
||||
|
||||
$line = explode("=", $line, 2);
|
||||
|
||||
if(count($line) !== 2){
|
||||
|
||||
$invalid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/^c\[([0-9]+)\]$/',
|
||||
$line[0],
|
||||
$regex
|
||||
);
|
||||
|
||||
if(
|
||||
$line[1] != "on" ||
|
||||
!isset($regex[0][1])
|
||||
){
|
||||
|
||||
// check if its the v key
|
||||
if(
|
||||
$line[0] == "v" &&
|
||||
preg_match(
|
||||
'/^c[0-9]+\.[A-Za-z0-9_]{20}$/',
|
||||
$line[1]
|
||||
)
|
||||
){
|
||||
|
||||
$key = apcu_fetch($line[1]);
|
||||
apcu_delete($line[1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$regex = (int)$regex[1];
|
||||
|
||||
if(
|
||||
$regex >= 16 ||
|
||||
$regex <= -1
|
||||
){
|
||||
|
||||
$invalid = true;
|
||||
break;
|
||||
}
|
||||
|
||||
$answers[] = $regex;
|
||||
}
|
||||
|
||||
if(
|
||||
!$invalid &&
|
||||
$key !== false // has captcha been gen'd?
|
||||
){
|
||||
$check = count($key);
|
||||
|
||||
// validate answer
|
||||
for($i=0; $i<count($answers); $i++){
|
||||
|
||||
if(in_array($answers[$i], $key)){
|
||||
|
||||
$check--;
|
||||
}else{
|
||||
|
||||
$check = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if($check === 0){
|
||||
|
||||
// we passed the captcha
|
||||
// set cookie
|
||||
$inc = apcu_inc("cookie");
|
||||
|
||||
$key = "k" . $inc . "." . $this->randomchars();
|
||||
|
||||
apcu_inc($key, 1, $stupid, 86400);
|
||||
|
||||
apcu_inc("real_requests");
|
||||
|
||||
setcookie(
|
||||
"pass",
|
||||
$key,
|
||||
[
|
||||
"expires" => time() + 86400, // expires in 24 hours
|
||||
"samesite" => "Lax",
|
||||
"path" => "/"
|
||||
]
|
||||
);
|
||||
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
$filters,
|
||||
$page
|
||||
);
|
||||
return;
|
||||
|
||||
}else{
|
||||
|
||||
$error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>";
|
||||
}
|
||||
}
|
||||
|
||||
$key = "c" . apcu_inc("captcha_gen", 1) . "." . $this->randomchars();
|
||||
|
||||
$payload = [
|
||||
"timetaken" => microtime(true),
|
||||
"class" => "",
|
||||
"right-left" => "",
|
||||
"right-right" => "",
|
||||
"left" =>
|
||||
'<div class="infobox">' .
|
||||
'<h1>IQ test</h1>' .
|
||||
'IQ test has been enabled due to bot abuse on the network.<br>' .
|
||||
'Solving this IQ test will let you make 100 searches today. I will add an invite system to bypass this soon...' .
|
||||
$error .
|
||||
'<form method="POST" enctype="text/plain" autocomplete="off">' .
|
||||
'<div class="captcha-wrapper">' .
|
||||
'<div class="captcha">' .
|
||||
'<img src="captcha?v=' . $key . '" alt="Captcha image">' .
|
||||
'<div class="captcha-controls">' .
|
||||
'<input type="checkbox" name="c[0]" id="c0">' .
|
||||
'<label for="c0"></label>' .
|
||||
'<input type="checkbox" name="c[1]" id="c1">' .
|
||||
'<label for="c1"></label>' .
|
||||
'<input type="checkbox" name="c[2]" id="c2">' .
|
||||
'<label for="c2"></label>' .
|
||||
'<input type="checkbox" name="c[3]" id="c3">' .
|
||||
'<label for="c3"></label>' .
|
||||
'<input type="checkbox" name="c[4]" id="c4">' .
|
||||
'<label for="c4"></label>' .
|
||||
'<input type="checkbox" name="c[5]" id="c5">' .
|
||||
'<label for="c5"></label>' .
|
||||
'<input type="checkbox" name="c[6]" id="c6">' .
|
||||
'<label for="c6"></label>' .
|
||||
'<input type="checkbox" name="c[7]" id="c7">' .
|
||||
'<label for="c7"></label>' .
|
||||
'<input type="checkbox" name="c[8]" id="c8">' .
|
||||
'<label for="c8"></label>' .
|
||||
'<input type="checkbox" name="c[9]" id="c9">' .
|
||||
'<label for="c9"></label>' .
|
||||
'<input type="checkbox" name="c[10]" id="c10">' .
|
||||
'<label for="c10"></label>' .
|
||||
'<input type="checkbox" name="c[11]" id="c11">' .
|
||||
'<label for="c11"></label>' .
|
||||
'<input type="checkbox" name="c[12]" id="c12">' .
|
||||
'<label for="c12"></label>' .
|
||||
'<input type="checkbox" name="c[13]" id="c13">' .
|
||||
'<label for="c13"></label>' .
|
||||
'<input type="checkbox" name="c[14]" id="c14">' .
|
||||
'<label for="c14"></label>' .
|
||||
'<input type="checkbox" name="c[15]" id="c15">' .
|
||||
'<label for="c15"></label>' .
|
||||
'</div>' .
|
||||
'</div>' .
|
||||
'</div>' .
|
||||
'<input type="hidden" name="v" value="' . $key . '">' .
|
||||
'<input type="submit" value="Check IQ" class="captcha-submit">' .
|
||||
'</form>' .
|
||||
'</div>'
|
||||
];
|
||||
|
||||
$frontend->loadheader(
|
||||
$get,
|
||||
$filters,
|
||||
$page
|
||||
);
|
||||
|
||||
echo $frontend->load("search.html", $payload);
|
||||
die();
|
||||
}
|
||||
|
||||
private function randomchars(){
|
||||
|
||||
$chars =
|
||||
array_merge(
|
||||
range("A", "Z"),
|
||||
range("a", "z"),
|
||||
range(0, 9)
|
||||
);
|
||||
|
||||
$chars[] = "_";
|
||||
|
||||
$c = count($chars) - 1;
|
||||
|
||||
$key = "";
|
||||
|
||||
for($i=0; $i<20; $i++){
|
||||
|
||||
$key .= $chars[random_int(0, $c)];
|
||||
}
|
||||
|
||||
return $key;
|
||||
}
|
||||
}
|
||||
660
lib/curlproxy.php
Normal file
660
lib/curlproxy.php
Normal file
|
|
@ -0,0 +1,660 @@
|
|||
<?php
|
||||
|
||||
class proxy{
|
||||
|
||||
public const req_web = 0;
|
||||
public const req_image = 1;
|
||||
|
||||
public function __construct($cache = true){
|
||||
|
||||
$this->cache = $cache;
|
||||
}
|
||||
|
||||
public function do404(){
|
||||
|
||||
http_response_code(404);
|
||||
header("Content-Type: image/png");
|
||||
|
||||
$handle = fopen("lib/img404.png", "r");
|
||||
echo fread($handle, filesize("lib/img404.png"));
|
||||
fclose($handle);
|
||||
|
||||
die();
|
||||
return;
|
||||
}
|
||||
|
||||
public function getabsoluteurl($path, $relative){
|
||||
|
||||
if($this->validateurl($path)){
|
||||
|
||||
return $path;
|
||||
}
|
||||
|
||||
if(substr($path, 0, 2) == "//"){
|
||||
|
||||
return "https:" . $path;
|
||||
}
|
||||
|
||||
$url = null;
|
||||
|
||||
$relative = parse_url($relative);
|
||||
$url = $relative["scheme"] . "://";
|
||||
|
||||
if(
|
||||
isset($relative["user"]) &&
|
||||
isset($relative["pass"])
|
||||
){
|
||||
|
||||
$url .= $relative["user"] . ":" . $relative["pass"] . "@";
|
||||
}
|
||||
|
||||
$url .= $relative["host"];
|
||||
|
||||
if(isset($relative["path"])){
|
||||
|
||||
$relative["path"] = explode(
|
||||
"/",
|
||||
$relative["path"]
|
||||
);
|
||||
|
||||
unset($relative["path"][count($relative["path"]) - 1]);
|
||||
$relative["path"] = implode("/", $relative["path"]);
|
||||
|
||||
$url .= $relative["path"];
|
||||
}
|
||||
|
||||
if(
|
||||
strlen($path) !== 0 &&
|
||||
$path[0] !== "/"
|
||||
){
|
||||
|
||||
$url .= "/";
|
||||
}
|
||||
|
||||
$url .= $path;
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function validateurl($url){
|
||||
|
||||
$url_parts = parse_url($url);
|
||||
|
||||
// check if required parts are there
|
||||
if(
|
||||
!isset($url_parts["scheme"]) ||
|
||||
!(
|
||||
$url_parts["scheme"] == "http" ||
|
||||
$url_parts["scheme"] == "https"
|
||||
) ||
|
||||
!isset($url_parts["host"])
|
||||
){
|
||||
return false;
|
||||
}
|
||||
|
||||
$ip =
|
||||
str_replace(
|
||||
["[", "]"], // handle ipv6
|
||||
"",
|
||||
$url_parts["host"]
|
||||
);
|
||||
|
||||
// if its not an IP
|
||||
if(!filter_var($ip, FILTER_VALIDATE_IP)){
|
||||
|
||||
// resolve domain's IP
|
||||
$ip = gethostbyname($url_parts["host"] . ".");
|
||||
}
|
||||
|
||||
// check if its localhost
|
||||
if(
|
||||
filter_var(
|
||||
$ip,
|
||||
FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
|
||||
) === false
|
||||
){
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
|
||||
|
||||
if($redirectcount === 5){
|
||||
|
||||
throw new Exception("Too many redirects");
|
||||
}
|
||||
|
||||
if($url == "https://i.imgur.com/removed.png"){
|
||||
|
||||
throw new Exception("Encountered imgur 404");
|
||||
}
|
||||
|
||||
// sanitize URL
|
||||
if($this->validateurl($url) === false){
|
||||
|
||||
throw new Exception("Invalid URL");
|
||||
}
|
||||
|
||||
$this->clientcache();
|
||||
|
||||
$curl = curl_init();
|
||||
|
||||
curl_setopt($curl, CURLOPT_URL, $url);
|
||||
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curl, CURLOPT_HEADER, 1);
|
||||
|
||||
switch($reqtype){
|
||||
case self::req_web:
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_HTTPHEADER,
|
||||
[
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case self::req_image:
|
||||
|
||||
if($referer === null){
|
||||
$referer = explode("/", $url, 4);
|
||||
array_pop($referer);
|
||||
|
||||
$referer = implode("/", $referer);
|
||||
}
|
||||
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_HTTPHEADER,
|
||||
[
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: image/avif,image/webp,*/*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: {$referer}"
|
||||
]
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
// limit size of payloads
|
||||
curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
|
||||
curl_setopt($curl, CURLOPT_NOPROGRESS, false);
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_PROGRESSFUNCTION,
|
||||
function($downloadsize, $downloaded, $uploadsize, $uploaded
|
||||
){
|
||||
|
||||
// if $downloaded exceeds 100MB, fuck off
|
||||
return ($downloaded > 100000000) ? 1 : 0;
|
||||
});
|
||||
|
||||
$body = curl_exec($curl);
|
||||
|
||||
if(curl_errno($curl)){
|
||||
|
||||
throw new Exception(curl_error($curl));
|
||||
}
|
||||
|
||||
curl_close($curl);
|
||||
|
||||
$headers = [];
|
||||
$http = null;
|
||||
|
||||
while(true){
|
||||
|
||||
$header = explode("\n", $body, 2);
|
||||
$body = $header[1];
|
||||
|
||||
if($http === null){
|
||||
|
||||
// http/1.1 200 ok
|
||||
$header = explode("/", $header[0], 2);
|
||||
$header = explode(" ", $header[1], 3);
|
||||
|
||||
$http = [
|
||||
"version" => (float)$header[0],
|
||||
"code" => (int)$header[1]
|
||||
];
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if(trim($header[0]) == ""){
|
||||
|
||||
// reached end of headers
|
||||
break;
|
||||
}
|
||||
|
||||
$header = explode(":", $header[0], 2);
|
||||
|
||||
// malformed headers
|
||||
if(count($header) !== 2){ continue; }
|
||||
|
||||
$headers[strtolower(trim($header[0]))] = trim($header[1]);
|
||||
}
|
||||
|
||||
// check http code
|
||||
if(
|
||||
$http["code"] >= 300 &&
|
||||
$http["code"] <= 309
|
||||
){
|
||||
|
||||
// redirect
|
||||
if(!isset($headers["location"])){
|
||||
|
||||
throw new Exception("Broken redirect");
|
||||
}
|
||||
|
||||
$redirectcount++;
|
||||
|
||||
return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
|
||||
}else{
|
||||
if(
|
||||
$acceptallcodes === false &&
|
||||
$http["code"] > 300
|
||||
){
|
||||
|
||||
throw new Exception("Remote server returned an error code! ({$http["code"]})");
|
||||
}
|
||||
}
|
||||
|
||||
// check if data is okay
|
||||
switch($reqtype){
|
||||
|
||||
case self::req_image:
|
||||
|
||||
$format = false;
|
||||
|
||||
if(isset($headers["content-type"])){
|
||||
|
||||
if(stripos($headers["content-type"], "text/html") !== false){
|
||||
|
||||
throw new Exception("Server returned html");
|
||||
}
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/image\/([^ ]+)/i',
|
||||
$headers["content-type"],
|
||||
$match
|
||||
)
|
||||
){
|
||||
|
||||
$format = strtolower($match[1]);
|
||||
|
||||
if(substr(strtolower($format), 0, 2) == "x-"){
|
||||
|
||||
$format = substr($format, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
"http" => $http,
|
||||
"format" => $format,
|
||||
"headers" => $headers,
|
||||
"body" => $body
|
||||
];
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
return [
|
||||
"http" => $http,
|
||||
"headers" => $headers,
|
||||
"body" => $body
|
||||
];
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
public function stream_linear_image($url, $referer = null){
|
||||
|
||||
$this->stream($url, $referer, "image");
|
||||
}
|
||||
|
||||
public function stream_linear_audio($url, $referer = null){
|
||||
|
||||
$this->stream($url, $referer, "audio");
|
||||
}
|
||||
|
||||
private function stream($url, $referer, $format){
|
||||
|
||||
$this->clientcache();
|
||||
|
||||
$this->url = $url;
|
||||
$this->format = $format;
|
||||
|
||||
// sanitize URL
|
||||
if($this->validateurl($url) === false){
|
||||
|
||||
throw new Exception("Invalid URL");
|
||||
}
|
||||
|
||||
$curl = curl_init();
|
||||
|
||||
// set headers
|
||||
if($referer === null){
|
||||
$referer = explode("/", $url, 4);
|
||||
array_pop($referer);
|
||||
|
||||
$referer = implode("/", $referer);
|
||||
}
|
||||
|
||||
switch($format){
|
||||
|
||||
case "image":
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_HTTPHEADER,
|
||||
[
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: image/avif,image/webp,*/*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: {$referer}"
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "audio":
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_HTTPHEADER,
|
||||
[
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: {$referer}"
|
||||
]
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
// follow redirects
|
||||
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
|
||||
curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
|
||||
|
||||
// set url
|
||||
curl_setopt($curl, CURLOPT_URL, $url);
|
||||
curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
// timeout + disable ssl
|
||||
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
|
||||
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_WRITEFUNCTION,
|
||||
function($c, $data){
|
||||
|
||||
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
|
||||
|
||||
throw new Exception("Serber returned a non-200 code");
|
||||
}
|
||||
|
||||
echo $data;
|
||||
return strlen($data);
|
||||
}
|
||||
);
|
||||
|
||||
$this->empty_header = false;
|
||||
$this->cont = false;
|
||||
$this->headers_tmp = [];
|
||||
$this->headers = [];
|
||||
curl_setopt(
|
||||
$curl,
|
||||
CURLOPT_HEADERFUNCTION,
|
||||
function($c, $header){
|
||||
|
||||
$head = trim($header);
|
||||
$len = strlen($head);
|
||||
|
||||
if($len === 0){
|
||||
|
||||
$this->empty_header = true;
|
||||
$this->headers_tmp = [];
|
||||
}else{
|
||||
|
||||
$this->empty_header = false;
|
||||
$this->headers_tmp[] = $head;
|
||||
}
|
||||
|
||||
foreach($this->headers_tmp as $h){
|
||||
|
||||
// parse headers
|
||||
$h = explode(":", $h, 2);
|
||||
|
||||
if(count($h) !== 2){
|
||||
|
||||
if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
|
||||
|
||||
// not HTTP 200, probably a redirect
|
||||
$this->cont = false;
|
||||
}else{
|
||||
|
||||
$this->cont = true;
|
||||
}
|
||||
|
||||
// is HTTP 200, just ignore that line
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->headers[strtolower(trim($h[0]))] = trim($h[1]);
|
||||
}
|
||||
|
||||
if(
|
||||
$this->cont &&
|
||||
$this->empty_header
|
||||
){
|
||||
|
||||
// get content type
|
||||
if(isset($this->headers["content-type"])){
|
||||
|
||||
$octet_check = stripos($this->headers["content-type"], "octet-stream");
|
||||
|
||||
if(
|
||||
stripos($this->headers["content-type"], $this->format) === false &&
|
||||
$octet_check === false
|
||||
){
|
||||
|
||||
throw new Exception("Resource reported invalid Content-Type");
|
||||
}
|
||||
|
||||
}else{
|
||||
|
||||
throw new Exception("Resource is not an {$this->format} (no Content-Type)");
|
||||
}
|
||||
|
||||
$filetype = explode("/", $this->headers["content-type"]);
|
||||
|
||||
if(!isset($filetype[1])){
|
||||
|
||||
throw new Exception("Malformed Content-Type header");
|
||||
}
|
||||
|
||||
if($octet_check !== false){
|
||||
|
||||
$filetype[1] = "jpeg";
|
||||
}
|
||||
|
||||
header("Content-Type: {$this->format}/{$filetype[1]}");
|
||||
|
||||
// give payload size
|
||||
if(isset($this->headers["content-length"])){
|
||||
|
||||
header("Content-Length: {$this->headers["content-length"]}");
|
||||
}
|
||||
|
||||
// give filename
|
||||
$this->getfilenameheader($this->headers, $this->url, $filetype[1]);
|
||||
}
|
||||
|
||||
return strlen($header);
|
||||
}
|
||||
);
|
||||
|
||||
curl_exec($curl);
|
||||
|
||||
if(curl_errno($curl)){
|
||||
|
||||
throw new Exception(curl_error($curl));
|
||||
}
|
||||
|
||||
curl_close($curl);
|
||||
}
|
||||
|
||||
public function getfilenameheader($headers, $url, $filetype = "jpg"){
|
||||
|
||||
// get filename from content-disposition header
|
||||
if(isset($headers["content-disposition"])){
|
||||
|
||||
preg_match(
|
||||
'/filename=([^;]+)/',
|
||||
$headers["content-disposition"],
|
||||
$filename
|
||||
);
|
||||
|
||||
if(isset($filename[1])){
|
||||
|
||||
header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\"");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// get filename from URL
|
||||
$filename = parse_url($url, PHP_URL_PATH);
|
||||
|
||||
if($filename === null){
|
||||
|
||||
// everything failed! rename file to domain name
|
||||
header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\"");
|
||||
return;
|
||||
}
|
||||
|
||||
// remove extension from filename
|
||||
$filename =
|
||||
explode(
|
||||
".",
|
||||
basename($filename)
|
||||
);
|
||||
|
||||
if(count($filename) > 1){
|
||||
array_pop($filename);
|
||||
}
|
||||
|
||||
$filename = implode(".", $filename);
|
||||
|
||||
header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\"");
|
||||
return;
|
||||
}
|
||||
|
||||
public function getimageformat($payload, &$imagick){
|
||||
|
||||
$finfo = new finfo(FILEINFO_MIME_TYPE);
|
||||
$format = $finfo->buffer($payload["body"]);
|
||||
|
||||
if($format === false){
|
||||
|
||||
if($payload["format"] === false){
|
||||
|
||||
header("X-Error: Could not parse format");
|
||||
$this->favicon404();
|
||||
}
|
||||
|
||||
$format = $payload["format"];
|
||||
}else{
|
||||
|
||||
$format_tmp = explode("/", $format, 2);
|
||||
|
||||
if($format_tmp[0] == "image"){
|
||||
|
||||
$format_tmp = strtolower($format_tmp[1]);
|
||||
|
||||
if(substr($format_tmp, 0, 2) == "x-"){
|
||||
|
||||
$format_tmp = substr($format_tmp, 2);
|
||||
}
|
||||
|
||||
$format = $format_tmp;
|
||||
}
|
||||
}
|
||||
|
||||
switch($format){
|
||||
|
||||
case "tiff": $format = "gif"; break;
|
||||
case "vnd.microsoft.icon": $format = "ico"; break;
|
||||
case "icon": $format = "ico"; break;
|
||||
case "svg+xml": $format = "svg"; break;
|
||||
}
|
||||
|
||||
$imagick = new Imagick();
|
||||
|
||||
if(
|
||||
!in_array(
|
||||
$format,
|
||||
array_map("strtolower", $imagick->queryFormats())
|
||||
)
|
||||
){
|
||||
|
||||
// format could not be found, but imagemagick can
|
||||
// sometimes detect it? shit's fucked
|
||||
$format = false;
|
||||
}
|
||||
|
||||
return $format;
|
||||
}
|
||||
|
||||
public function clientcache(){
|
||||
|
||||
if($this->cache === false){
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
|
||||
$headers = getallheaders();
|
||||
|
||||
if(
|
||||
isset($headers["If-Modified-Since"]) ||
|
||||
isset($headers["If-Unmodified-Since"])
|
||||
){
|
||||
|
||||
http_response_code(304); // 304: Not Modified
|
||||
die();
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
lib/favicon404.png
Normal file
BIN
lib/favicon404.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 744 B |
1342
lib/frontend.php
Normal file
1342
lib/frontend.php
Normal file
File diff suppressed because it is too large
Load diff
610
lib/fuckhtml.php
Normal file
610
lib/fuckhtml.php
Normal file
|
|
@ -0,0 +1,610 @@
|
|||
<?php
|
||||
class fuckhtml{
|
||||
|
||||
public function __construct($html = null, $isfile = false){
|
||||
|
||||
if($html !== null){
|
||||
|
||||
$this->load($html, $isfile);
|
||||
}
|
||||
}
|
||||
|
||||
public function load($html, $isfile = false){
|
||||
|
||||
if(is_array($html)){
|
||||
|
||||
if(!isset($html["innerHTML"])){
|
||||
|
||||
throw new Exception("(load) Supplied array doesn't contain an innerHTML index");
|
||||
}
|
||||
$html = $html["innerHTML"];
|
||||
}
|
||||
|
||||
if($isfile){
|
||||
|
||||
$handle = fopen($html, "r");
|
||||
$fetch = fread($handle, filesize($html));
|
||||
fclose($handle);
|
||||
|
||||
$this->html = $fetch;
|
||||
}else{
|
||||
|
||||
$this->html = $html;
|
||||
}
|
||||
|
||||
$this->strlen = strlen($this->html);
|
||||
}
|
||||
|
||||
public function getloadedhtml(){
|
||||
|
||||
return $this->html;
|
||||
}
|
||||
|
||||
public function getElementsByTagName(string $tagname){
|
||||
|
||||
$out = [];
|
||||
|
||||
/*
|
||||
Scrape start of the tag. Example
|
||||
<div class="mydiv"> ...
|
||||
*/
|
||||
|
||||
if($tagname == "*"){
|
||||
|
||||
$tagname = '[A-Za-z0-9._-]+';
|
||||
}else{
|
||||
|
||||
$tagname = preg_quote(strtolower($tagname));
|
||||
}
|
||||
|
||||
preg_match_all(
|
||||
'/<\s*(' . $tagname . ')(\s(?:[^>\'"]*|"[^"]*"|\'[^\']*\')+)?\s*>/i',
|
||||
/* '/<\s*(' . $tagname . ')(\s[\S\s]*?)?>/i', */
|
||||
$this->html,
|
||||
$starting_tags,
|
||||
PREG_OFFSET_CAPTURE
|
||||
);
|
||||
|
||||
for($i=0; $i<count($starting_tags[0]); $i++){
|
||||
|
||||
/*
|
||||
Parse attributes
|
||||
*/
|
||||
$attributes = [];
|
||||
|
||||
preg_match_all(
|
||||
'/([^\/\s\\=]+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|[^\s]*))?/i',
|
||||
$starting_tags[2][$i][0],
|
||||
$regex_attributes
|
||||
);
|
||||
|
||||
for($k=0; $k<count($regex_attributes[0]); $k++){
|
||||
|
||||
if(trim($regex_attributes[2][$k]) == ""){
|
||||
|
||||
$attributes[$regex_attributes[1][$k]] =
|
||||
"true";
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$attributes[strtolower($regex_attributes[1][$k])] =
|
||||
trim($regex_attributes[2][$k], "'\" \n\r\t\v\x00");
|
||||
}
|
||||
|
||||
$out[] = [
|
||||
"tagName" => strtolower($starting_tags[1][$i][0]),
|
||||
"startPos" => $starting_tags[0][$i][1],
|
||||
"endPos" => 0,
|
||||
"startTag" => $starting_tags[0][$i][0],
|
||||
"attributes" => $attributes,
|
||||
"innerHTML" => null
|
||||
];
|
||||
}
|
||||
|
||||
/*
|
||||
Get innerHTML
|
||||
*/
|
||||
// get closing tag positions
|
||||
preg_match_all(
|
||||
'/<\s*\/\s*(' . $tagname . ')\s*>/i',
|
||||
$this->html,
|
||||
$regex_closing_tags,
|
||||
PREG_OFFSET_CAPTURE
|
||||
);
|
||||
|
||||
// merge opening and closing tags together
|
||||
for($i=0; $i<count($regex_closing_tags[1]); $i++){
|
||||
|
||||
$out[] = [
|
||||
"tagName" => strtolower($regex_closing_tags[1][$i][0]),
|
||||
"endTag" => $regex_closing_tags[0][$i][0],
|
||||
"startPos" => $regex_closing_tags[0][$i][1]
|
||||
];
|
||||
}
|
||||
|
||||
usort(
|
||||
$out,
|
||||
function($a, $b){
|
||||
|
||||
return $a["startPos"] > $b["startPos"];
|
||||
}
|
||||
);
|
||||
|
||||
// compute the indent level for each element
|
||||
$level = [];
|
||||
$count = count($out);
|
||||
|
||||
for($i=0; $i<$count; $i++){
|
||||
|
||||
if(!isset($level[$out[$i]["tagName"]])){
|
||||
|
||||
$level[$out[$i]["tagName"]] = 0;
|
||||
}
|
||||
|
||||
if(isset($out[$i]["startTag"])){
|
||||
|
||||
// encountered starting tag
|
||||
$level[$out[$i]["tagName"]]++;
|
||||
$out[$i]["level"] = $level[$out[$i]["tagName"]];
|
||||
}else{
|
||||
|
||||
// encountered closing tag
|
||||
$out[$i]["level"] = $level[$out[$i]["tagName"]];
|
||||
$level[$out[$i]["tagName"]]--;
|
||||
}
|
||||
}
|
||||
|
||||
// if the indent level is the same for a div,
|
||||
// we encountered _THE_ closing tag
|
||||
for($i=0; $i<$count; $i++){
|
||||
|
||||
if(!isset($out[$i]["startTag"])){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for($k=$i; $k<$count; $k++){
|
||||
|
||||
if(
|
||||
isset($out[$k]["endTag"]) &&
|
||||
$out[$i]["tagName"] == $out[$k]["tagName"] &&
|
||||
$out[$i]["level"]
|
||||
=== $out[$k]["level"]
|
||||
){
|
||||
|
||||
$startlen = strlen($out[$i]["startTag"]);
|
||||
$endlen = strlen($out[$k]["endTag"]);
|
||||
|
||||
$out[$i]["endPos"] = $out[$k]["startPos"] + $endlen;
|
||||
|
||||
$out[$i]["innerHTML"] =
|
||||
substr(
|
||||
$this->html,
|
||||
$out[$i]["startPos"] + $startlen,
|
||||
$out[$k]["startPos"] - ($out[$i]["startPos"] + $startlen)
|
||||
);
|
||||
|
||||
$out[$i]["outerHTML"] =
|
||||
substr(
|
||||
$this->html,
|
||||
$out[$i]["startPos"],
|
||||
$out[$k]["startPos"] - $out[$i]["startPos"] + $endlen
|
||||
);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// filter out ending divs
|
||||
for($i=0; $i<$count; $i++){
|
||||
|
||||
if(isset($out[$i]["endTag"])){
|
||||
|
||||
unset($out[$i]);
|
||||
}
|
||||
|
||||
unset($out[$i]["startTag"]);
|
||||
}
|
||||
|
||||
return array_values($out);
|
||||
}
|
||||
|
||||
public function getElementsByAttributeName(string $name, $collection = null){
|
||||
|
||||
if($collection === null){
|
||||
|
||||
$collection = $this->getElementsByTagName("*");
|
||||
}elseif(is_string($collection)){
|
||||
|
||||
$collection = $this->getElementsByTagName($collection);
|
||||
}
|
||||
|
||||
$return = [];
|
||||
foreach($collection as $elem){
|
||||
|
||||
foreach($elem["attributes"] as $attrib_name => $attrib_value){
|
||||
|
||||
if($attrib_name == $name){
|
||||
|
||||
$return[] = $elem;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
|
||||
|
||||
$elems = $this->getElementsByAttributeName($name, $collection);
|
||||
$value =
|
||||
explode(
|
||||
" ",
|
||||
trim(
|
||||
preg_replace(
|
||||
'/ +/',
|
||||
" ",
|
||||
$value
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
$return = [];
|
||||
|
||||
foreach($elems as $elem){
|
||||
|
||||
foreach($elem["attributes"] as $attrib_name => $attrib_value){
|
||||
|
||||
$attrib_value = explode(" ", $attrib_value);
|
||||
$ac = count($attrib_value);
|
||||
$nc = count($value);
|
||||
$cr = 0;
|
||||
|
||||
for($i=0; $i<$nc; $i++){
|
||||
|
||||
for($k=0; $k<$ac; $k++){
|
||||
|
||||
if($value[$i] == $attrib_value[$k]){
|
||||
|
||||
$cr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if($cr === $nc){
|
||||
|
||||
$return[] = $elem;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
public function getElementsByAttributeValue(string $name, string $value, $collection = null){
|
||||
|
||||
$elems = $this->getElementsByAttributeName($name, $collection);
|
||||
|
||||
$return = [];
|
||||
|
||||
foreach($elems as $elem){
|
||||
|
||||
foreach($elem["attributes"] as $attrib_name => $attrib_value){
|
||||
|
||||
if($attrib_value == $value){
|
||||
|
||||
$return[] = $elem;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
public function getElementById(string $idname, $collection = null){
|
||||
|
||||
$id = $this->getElementsByAttributeValue("id", $idname, $collection);
|
||||
|
||||
if(count($id) !== 0){
|
||||
|
||||
return $id[0];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function getElementsByClassName(string $classname, $collection = null){
|
||||
|
||||
return $this->getElementsByFuzzyAttributeValue("class", $classname, $collection);
|
||||
}
|
||||
|
||||
public function getTextContent($html, $whitespace = false, $trim = true){
|
||||
|
||||
if(is_array($html)){
|
||||
|
||||
if(!isset($html["innerHTML"])){
|
||||
|
||||
throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index");
|
||||
}
|
||||
|
||||
$html = $html["innerHTML"];
|
||||
}
|
||||
|
||||
$html = preg_split('/\n|<\/?br>/i', $html);
|
||||
|
||||
$out = "";
|
||||
for($i=0; $i<count($html); $i++){
|
||||
|
||||
$tmp =
|
||||
html_entity_decode(
|
||||
strip_tags(
|
||||
$html[$i]
|
||||
),
|
||||
ENT_QUOTES | ENT_XML1, "UTF-8"
|
||||
);
|
||||
|
||||
if($trim){
|
||||
|
||||
$tmp = trim($tmp);
|
||||
}
|
||||
|
||||
$out .= $tmp;
|
||||
|
||||
if($whitespace === true){
|
||||
|
||||
$out .= "\n";
|
||||
}else{
|
||||
|
||||
$out .= " ";
|
||||
}
|
||||
}
|
||||
|
||||
if($trim){
|
||||
|
||||
return trim($out);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function parseJsObject(string $json){
|
||||
|
||||
$bracket = false;
|
||||
$is_close_bracket = false;
|
||||
$escape = false;
|
||||
$lastchar = false;
|
||||
$json_out = null;
|
||||
$last_char = null;
|
||||
|
||||
$keyword_check = null;
|
||||
|
||||
for($i=0; $i<strlen($json); $i++){
|
||||
|
||||
switch($json[$i]){
|
||||
|
||||
case "\"":
|
||||
case "'":
|
||||
if($escape === true){
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if($json[$i] == $bracket){
|
||||
|
||||
$bracket = false;
|
||||
$is_close_bracket = true;
|
||||
|
||||
}else{
|
||||
|
||||
if($bracket === false){
|
||||
|
||||
$bracket = $json[$i];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
$is_close_bracket = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if(
|
||||
$json[$i] == "\\" &&
|
||||
!(
|
||||
$lastchar !== false &&
|
||||
$lastchar . $json[$i] == "\\\\"
|
||||
)
|
||||
){
|
||||
|
||||
$escape = true;
|
||||
}else{
|
||||
|
||||
$escape = false;
|
||||
}
|
||||
|
||||
if(
|
||||
$bracket === false &&
|
||||
$is_close_bracket === false
|
||||
){
|
||||
|
||||
// do keyword check
|
||||
$keyword_check .= $json[$i];
|
||||
|
||||
if(in_array($json[$i], [":", "{"])){
|
||||
|
||||
$keyword_check = substr($keyword_check, 0, -1);
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/function|array|return/i',
|
||||
$keyword_check
|
||||
)
|
||||
){
|
||||
|
||||
$json_out =
|
||||
preg_replace(
|
||||
'/[{"]*' . preg_quote($keyword_check, "/") . '$/',
|
||||
"",
|
||||
$json_out
|
||||
);
|
||||
}
|
||||
|
||||
$keyword_check = null;
|
||||
}
|
||||
|
||||
// here we know we're not iterating over a quoted string
|
||||
switch($json[$i]){
|
||||
|
||||
case "[":
|
||||
case "{":
|
||||
$json_out .= $json[$i];
|
||||
break;
|
||||
|
||||
case "]":
|
||||
case "}":
|
||||
case ",":
|
||||
case ":":
|
||||
if(!in_array($last_char, ["[", "{", "}", "]", "\""])){
|
||||
|
||||
$json_out .= "\"";
|
||||
}
|
||||
|
||||
$json_out .= $json[$i];
|
||||
break;
|
||||
|
||||
default:
|
||||
if(in_array($last_char, ["{", "[", ",", ":"])){
|
||||
|
||||
$json_out .= "\"";
|
||||
}
|
||||
|
||||
$json_out .= $json[$i];
|
||||
break;
|
||||
}
|
||||
}else{
|
||||
|
||||
$json_out .= $json[$i];
|
||||
}
|
||||
|
||||
$last_char = $json[$i];
|
||||
}
|
||||
|
||||
return json_decode($json_out, true);
|
||||
}
|
||||
|
||||
public function parseJsString($string){
|
||||
|
||||
return
|
||||
preg_replace_callback(
|
||||
'/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}|\\\n|\\\r/',
|
||||
function($match){
|
||||
|
||||
switch($match[0][1]){
|
||||
|
||||
case "u":
|
||||
return json_decode('"' . $match[0] . '"');
|
||||
break;
|
||||
|
||||
case "x":
|
||||
return mb_convert_encoding(
|
||||
stripcslashes($match[0]),
|
||||
"utf-8",
|
||||
"windows-1252"
|
||||
);
|
||||
break;
|
||||
|
||||
default:
|
||||
return " ";
|
||||
break;
|
||||
}
|
||||
},
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
public function extract_json($json){
|
||||
|
||||
$len = strlen($json);
|
||||
$array_level = 0;
|
||||
$object_level = 0;
|
||||
$in_quote = null;
|
||||
$start = null;
|
||||
|
||||
for($i=0; $i<$len; $i++){
|
||||
|
||||
switch($json[$i]){
|
||||
|
||||
case "[":
|
||||
if($in_quote === null){
|
||||
|
||||
$array_level++;
|
||||
if($start === null){
|
||||
|
||||
$start = $i;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "]":
|
||||
if($in_quote === null){
|
||||
|
||||
$array_level--;
|
||||
}
|
||||
break;
|
||||
|
||||
case "{":
|
||||
if($in_quote === null){
|
||||
|
||||
$object_level++;
|
||||
if($start === null){
|
||||
|
||||
$start = $i;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case "}":
|
||||
if($in_quote === null){
|
||||
|
||||
$object_level--;
|
||||
}
|
||||
break;
|
||||
|
||||
case "\"":
|
||||
case "'":
|
||||
if(
|
||||
$i !== 0 &&
|
||||
$json[$i - 1] !== "\\"
|
||||
){
|
||||
// found a non-escaped quote
|
||||
|
||||
if($in_quote === null){
|
||||
|
||||
// open quote
|
||||
$in_quote = $json[$i];
|
||||
}elseif($in_quote === $json[$i]){
|
||||
|
||||
// close quote
|
||||
$in_quote = null;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if(
|
||||
$start !== null &&
|
||||
$array_level === 0 &&
|
||||
$object_level === 0
|
||||
){
|
||||
|
||||
return substr($json, $start, $i - $start + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
lib/img404.png
Normal file
BIN
lib/img404.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 216 B |
132
lib/type-todo.php
Normal file
132
lib/type-todo.php
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
|
||||
public function type($get){
|
||||
|
||||
$search = $get["s"];
|
||||
$bang = $get["bang"];
|
||||
|
||||
if(empty($search)){
|
||||
|
||||
if(!empty($bang)){
|
||||
|
||||
// !youtube
|
||||
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
|
||||
|
||||
pg_prepare($conn, "bang_get", "SELECT bang,name FROM bangs WHERE bang LIKE $1 ORDER BY bang ASC LIMIT 8");
|
||||
$q = pg_execute($conn, "bang_get", ["$bang%"]);
|
||||
|
||||
$results = [];
|
||||
while($row = pg_fetch_array($q, null, PGSQL_ASSOC)){
|
||||
|
||||
$results[] = [
|
||||
"s" => "!" . $row["bang"],
|
||||
"n" => $row["name"]
|
||||
];
|
||||
}
|
||||
|
||||
return $results;
|
||||
}else{
|
||||
|
||||
// everything is empty
|
||||
// lets just return a bang list
|
||||
return [
|
||||
[
|
||||
"s" => "!w",
|
||||
"n" => "Wikipedia",
|
||||
"u" => "https://en.wikipedia.org/wiki/Special:Search?search={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!4ch",
|
||||
"n" => "4chan Board",
|
||||
"u" => "https://find.4chan.org/?q={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!a",
|
||||
"n" => "Amazon",
|
||||
"u" => "https://www.amazon.com/s?k={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!e",
|
||||
"n" => "eBay",
|
||||
"u" => "https://www.ebay.com/sch/items/?_nkw={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!so",
|
||||
"n" => "Stack Overflow",
|
||||
"u" => "http://stackoverflow.com/search?q={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!gh",
|
||||
"n" => "GitHub",
|
||||
"u" => "https://github.com/search?utf8=%E2%9C%93&q={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!tw",
|
||||
"n" => "Twitter",
|
||||
"u" => "https://twitter.com/search?q={%q%}"
|
||||
],
|
||||
[
|
||||
"s" => "!r",
|
||||
"n" => "Reddit",
|
||||
"u" => "https://www.reddit.com/search?q={%q%}"
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// now we know search isnt empty
|
||||
if(!empty($bang)){
|
||||
|
||||
// check if the bang exists
|
||||
$conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
|
||||
|
||||
pg_prepare($conn, "bang_get_single", "SELECT bang,name FROM bangs WHERE bang = $1 LIMIT 1");
|
||||
$q = pg_execute($conn, "bang_get_single", [$bang]);
|
||||
|
||||
$row = pg_fetch_array($q, null, PGSQL_ASSOC);
|
||||
|
||||
if(isset($row["bang"])){
|
||||
|
||||
$bang = "!$bang ";
|
||||
}else{
|
||||
|
||||
$bang = "";
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$res = $this->get(
|
||||
"https://duckduckgo.com/ac/",
|
||||
[
|
||||
"q" => strtolower($search)
|
||||
],
|
||||
ddg::req_xhr
|
||||
);
|
||||
|
||||
$res = json_decode($res, true);
|
||||
|
||||
}catch(Exception $e){
|
||||
|
||||
throw new Exception("Failed to get /ac/");
|
||||
}
|
||||
|
||||
$arr = [];
|
||||
for($i=0; $i<count($res); $i++){
|
||||
|
||||
if($i === 8){break;}
|
||||
|
||||
if(empty($bang)){
|
||||
|
||||
$arr[] = [
|
||||
"s" => $res[$i]["phrase"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$arr[] = [
|
||||
"s" => $bang . $res[$i]["phrase"],
|
||||
"n" => $row["name"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $arr;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue