From 92ac79b4c6cdd37e63bd58008377f1052da01f45 Mon Sep 17 00:00:00 2001 From: cedric Date: Tue, 9 Dec 2025 20:27:03 +0000 Subject: [PATCH 1/5] php oriented objet --- index.php | 461 +++++++++++++++++++++--------------------------------- 1 file changed, 180 insertions(+), 281 deletions(-) diff --git a/index.php b/index.php index e4c420a..02f58df 100644 --- a/index.php +++ b/index.php @@ -1,291 +1,190 @@ CACHE_MAX_AGE; + } + + public function downloadInstances(): bool { + $data = file_get_contents(INSTANCES_URL); + if ($data === false) return false; + return $this->safeWrite(INSTANCES_JSON, $data); + } + + public function extractValidUrls(): bool { + if (!file_exists(INSTANCES_JSON)) return false; + + $json = json_decode(file_get_contents(INSTANCES_JSON), true); + if (!isset($json['instances']) || !is_array($json['instances'])) return false; + + $valid = array_filter(array_keys($json['instances']), function($url) use ($json) { + $data = $json['instances'][$url] ?? []; + return ($data['network_type'] ?? '') === 'normal' + && ($data['http']['status_code'] ?? 0) === 200 + && ($data['timing']['search']['success_percentage'] ?? 0) === 100.0 + && ($data['timing']['initial']['success_percentage'] ?? 0) === 100.0 + && filter_var($url, FILTER_VALIDATE_URL) + && in_array(parse_url($url, PHP_URL_SCHEME), ['http','https'], true); + }); + + if (empty($valid)) return false; + + return $this->safeWrite(URLS_TXT, implode("\n", array_map('rtrim', $valid))); + } + + private function safeWrite(string $file, string $data): bool { + $written = file_put_contents($file, $data, LOCK_EX); + return $written !== false && $written === strlen($data); + } + + public function loadUrls(): array { + if (empty($this->urlsCache)) { + if (!file_exists(URLS_TXT)) return []; + $urls = file(URLS_TXT, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + $this->urlsCache = array_filter($urls, fn($url) => filter_var($url, FILTER_VALIDATE_URL)); + } + return $this->urlsCache; + } + + public function getRandomUrl(): string { + $urls = $this->loadUrls(); + if (empty($urls)) throw new RuntimeException("Aucune URL disponible"); + return $urls[array_rand($urls)]; + } +} + +class BangManager +{ + private array $bangs = []; + + public function __construct(private string $file = BANG_FILE) { + $this->loadBangs(); + } + + private function loadBangs(): void { + if (!file_exists($this->file)) return; + $data = json_decode(file_get_contents($this->file), true); + $this->bangs = is_array($data) ? $data : []; + } + + public function tryRedirect(string $query): bool { + if (!str_starts_with($query, '!')) return false; + if (!preg_match('/^!(\w+)\s?(.*)$/u', $query, $matches)) return false; + + [$bangCode, $searchTerm] = [$matches[1], trim($matches[2])]; + if ($searchTerm === '') return false; + + foreach ($this->bangs as $bang) { + if (($bang['bang'] ?? '') === $bangCode) { + header("Location: " . rtrim($bang['url'], '/') . '?q=' . rawurlencode($searchTerm)); + exit; + } + } + return false; + } +} + +class RequestHandler +{ + public function __construct(private CacheManager $cache, private BangManager $bang) {} + + public function handle(): void { + $method = $_SERVER['REQUEST_METHOD'] ?? 'GET'; + $query = $_REQUEST['q'] ?? null; + + if ($query !== null && !$this->bang->tryRedirect($query)) { + $instance = rtrim($this->cache->getRandomUrl(), '/'); + + if ($method === 'GET') { + $this->redirectGet($instance, $query); + } elseif ($method === 'POST') { + $this->proxyPost($instance, $_POST); + } else { + http_response_code(405); + exit('Méthode non autorisée'); + } + } elseif ($query === null) { + $instance = rtrim($this->cache->getRandomUrl(), '/'); + if ($method === 'GET') { + header("Location: $instance"); + exit; + } elseif ($method === 'POST') { + $this->proxyPost($instance, $_POST); + } else { + http_response_code(405); + exit('Méthode non autorisée'); + } + } + } + + private function redirectGet(string $instance, string $query): void { + $url = $instance . '/search?' . http_build_query(['q' => $query]); + header("Location: $url"); + exit; + } + + private function proxyPost(string $urlBase, array $postData): void { + $url = $urlBase . '/search'; + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => http_build_query($postData), + CURLOPT_HEADER => false, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_TIMEOUT => 10, + CURLOPT_SSL_VERIFYPEER => true, + ]); + $response = curl_exec($ch); + $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + http_response_code($code); + echo $response; + exit; + } +} + +// -------------------- EXECUTION -------------------- +try { + $cache = new CacheManager(); + if ($cache->isExpired()) { + if (!$cache->downloadInstances() || !$cache->extractValidUrls()) { + http_response_code(500); + exit('Erreur interne : impossible de générer le cache.'); + } + } + + $bang = new BangManager(); + $handler = new RequestHandler($cache, $bang); + $handler->handle(); + +} catch (\Throwable $e) { http_response_code(500); - exit('Erreur interne'); -} - -// Mise à jour du cache si trop vieux ou absent -if (cache_expired()) { - if (!download_instances_json() || !extract_valid_urls()) { - error_log('Erreur lors de la génération du cache'); - http_response_code(500); - exit('Erreur interne'); - } -} - -// Récupérer la requête -$query = $_GET['q'] ?? null; - -if ($query !== null) { - if (!is_valid_query($query)) { - http_response_code(400); - exit('Paramètre q invalide'); - } - - // Gestion bang : si retourne true, redirection faite et script termine - if (!try_redirect_bang($query)) { - redirect_search($query); - } -} else { - redirect_random_url(); -} - -// — Fonctions — // - -function cache_expired(): bool -{ - if (!file_exists(URLS_TXT)) { - return true; - } - $age = time() - filemtime(URLS_TXT); - return $age > CACHE_MAX_AGE; -} - -function download_instances_json(): bool -{ - $ch = curl_init(INSTANCES_URL); - curl_setopt_array($ch, [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_TIMEOUT => 10, - CURLOPT_SSL_VERIFYPEER => true, - ]); - $data = curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - if ($data === false) { - error_log('curl_exec failed: ' . curl_error($ch)); - } - curl_close($ch); - - if ($code === 200 && $data !== false) { - return safe_file_put_contents(INSTANCES_JSON, $data); - } - error_log("Erreur téléchargement instances.json, HTTP $code"); - return false; -} - -function extract_valid_urls(): bool -{ - if (!file_exists(INSTANCES_JSON)) { - error_log("instances.json introuvable"); - return false; - } - - $content = file_get_contents(INSTANCES_JSON); - if ($content === false) { - error_log("Impossible de lire instances.json"); - return false; - } - - $j = json_decode($content, true); - if (!isset($j['instances']) || !is_array($j['instances'])) { - error_log("JSON mal formé"); - return false; - } - - $valid = []; - foreach ($j['instances'] as $url => $data) { - if ( - ($data['network_type'] ?? '') === 'normal' - && ($data['http']['status_code'] ?? 0) === 200 - && ($data['timing']['search']['success_percentage'] ?? 0) === 100.0 - && ($data['timing']['initial']['success_percentage'] ?? 0) === 100.0 - && filter_var($url, FILTER_VALIDATE_URL) - && in_array(parse_url($url, PHP_URL_SCHEME), ['http', 'https'], true) - ) { - $valid[] = rtrim($url, '/'); - } - } - - if (empty($valid)) { - error_log("Aucune URL valide"); - return false; - } - - return safe_file_put_contents(URLS_TXT, implode("\n", $valid)); -} - -function safe_file_put_contents(string $filename, string $data): bool -{ - $fp = fopen($filename, 'c'); - if (!$fp) { - error_log("Impossible d'ouvrir $filename en écriture"); - return false; - } - if (!flock($fp, LOCK_EX)) { - fclose($fp); - error_log("Impossible de verrouiller $filename"); - return false; - } - ftruncate($fp, 0); - $written = fwrite($fp, $data); - fflush($fp); - flock($fp, LOCK_UN); - fclose($fp); - - return $written === strlen($data); -} - -function get_all_urls(): array -{ - if (!file_exists(URLS_TXT)) { - return []; - } - - $fp = fopen(URLS_TXT, 'r'); - if (!$fp) { - error_log("Impossible d'ouvrir urls.txt"); - return []; - } - - if (!flock($fp, LOCK_SH)) { - fclose($fp); - error_log("Impossible de verrouiller urls.txt en lecture"); - return []; - } - - $urls = []; - while (($line = fgets($fp)) !== false) { - $line = trim($line); - if ($line !== '' && filter_var($line, FILTER_VALIDATE_URL)) { - $urls[] = $line; - } - } - flock($fp, LOCK_UN); - fclose($fp); - - return $urls; -} - -function redirect_random_url(): void -{ - $urls = get_all_urls(); - if (empty($urls)) { - http_response_code(500); - exit('Instances indisponibles'); - } - $target = $urls[array_rand($urls)]; - header("Location: $target"); - exit; -} - -function redirect_search(string $query): void -{ - $urls = get_all_urls(); - if (empty($urls)) { - http_response_code(500); - exit('Instances indisponibles pour recherche'); - } - $base = $urls[array_rand($urls)]; - - $parsed = parse_url($base); - if ($parsed === false) { - error_log("Base URL invalide dans urls.txt"); - http_response_code(500); - exit('Erreur interne'); - } - - $path = $parsed['path'] ?? ''; - $path = rtrim($path, '/') . '/search'; - - $query_string = http_build_query(['q' => $query]); - - $target = $parsed['scheme'] . '://' . $parsed['host']; - if (isset($parsed['port'])) { - $target .= ':' . $parsed['port']; - } - $target .= $path . '?' . $query_string; - - header("Location: $target"); - exit; -} - -function is_valid_query(string $query): bool -{ - $query = trim($query); - if ($query === '') { - return false; - } - if (mb_strlen($query) > 200) { - return false; - } - - // Vérifier que la chaîne est en UTF-8 valide - if (!mb_check_encoding($query, 'UTF-8')) { - return false; - } - - return preg_match('/^[^\x00-\x1F\x7F]+$/u', $query) === 1; -} - -// --- Gestion bangs --- // - -function load_bangs(): array -{ - $file = __DIR__ . '/bang.json'; - if (!file_exists($file)) { - error_log("bang.json introuvable"); - return []; - } - $content = file_get_contents($file); - if ($content === false) { - error_log("Impossible de lire bang.json"); - return []; - } - $bangs = json_decode($content, true); - if (!is_array($bangs)) { - error_log("bang.json mal formé"); - return []; - } - return $bangs; -} - -function try_redirect_bang(string $query): bool -{ - if (substr($query, 0, 1) !== '!') { - return false; - } - - if (!preg_match('/^!(\w+)\s?(.*)$/u', $query, $matches)) { - return false; - } - $bang_code = $matches[1]; - $search_term = trim($matches[2]); - - if ($search_term === '') { - return false; - } - - $bangs = load_bangs(); - if (empty($bangs)) { - return false; - } - - foreach ($bangs as $bang) { - if (($bang['bang'] ?? '') === $bang_code) { - $base_url = rtrim($bang['url'], '/'); - $search_url = $base_url . '?q=' . rawurlencode($search_term); - header("Location: $search_url"); - exit; - } - } - return false; + exit('Erreur interne : ' . $e->getMessage()); } From 7f162fa616529d4d1f0cd8d9349fd7649875b4b6 Mon Sep 17 00:00:00 2001 From: cedric Date: Wed, 10 Dec 2025 21:20:06 +0000 Subject: [PATCH 2/5] Change CACHE_DIR path --- index.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.php b/index.php index 02f58df..ee293ff 100644 --- a/index.php +++ b/index.php @@ -2,7 +2,7 @@ declare(strict_types=1); // -------------------- CONSTANTES -------------------- -const CACHE_DIR = __DIR__ . '/cache'; +const CACHE_DIR = '/var/cache/homepage'; const INSTANCES_JSON = CACHE_DIR . '/instances.json'; const URLS_TXT = CACHE_DIR . '/urls.txt'; const INSTANCES_URL = 'https://searx.space/data/instances.json'; From 65f977918f0b764c8e73d54514816cee378c59a3 Mon Sep 17 00:00:00 2001 From: cedric Date: Thu, 11 Dec 2025 11:50:28 +0000 Subject: [PATCH 3/5] Actualiser index.php Disallow POST due to 429 --- index.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.php b/index.php index ee293ff..fe3c6d9 100644 --- a/index.php +++ b/index.php @@ -123,7 +123,8 @@ class RequestHandler if ($method === 'GET') { $this->redirectGet($instance, $query); } elseif ($method === 'POST') { - $this->proxyPost($instance, $_POST); + $this->redirectGet($instance, $query); + // $this->proxyPost($instance, $_POST); } else { http_response_code(405); exit('Méthode non autorisée'); From dbd255da9664f001cd6b70b004eec821d5a6604f Mon Sep 17 00:00:00 2001 From: cedric Date: Thu, 11 Dec 2025 21:21:56 +0000 Subject: [PATCH 4/5] Use legit engines --- index.php | 150 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 44 deletions(-) diff --git a/index.php b/index.php index fe3c6d9..5b91226 100644 --- a/index.php +++ b/index.php @@ -2,20 +2,23 @@ declare(strict_types=1); // -------------------- CONSTANTES -------------------- -const CACHE_DIR = '/var/cache/homepage'; -const INSTANCES_JSON = CACHE_DIR . '/instances.json'; -const URLS_TXT = CACHE_DIR . '/urls.txt'; -const INSTANCES_URL = 'https://searx.space/data/instances.json'; -const CACHE_MAX_AGE = 3600; // 1 heure -const BANG_FILE = __DIR__ . '/bang.json'; +const CACHE_DIR = '/var/cache/homepage'; +const INSTANCES_JSON = CACHE_DIR . '/instances.json'; +const URLS_TXT = __DIR__ . '/urls.txt'; // ← Hors du cache, à la racine +const INSTANCES_URL = 'https://searx.space/data/instances.json'; +const CACHE_MAX_AGE = 3600; // 1 heure +const BANG_FILE = __DIR__ . '/bang.json'; +const USE_SEARX_INSTANCES = true; // false → utilise engines.txt // -------------------- HEADERS DE SÉCURITÉ -------------------- header('X-Frame-Options: SAMEORIGIN'); header('X-Content-Type-Options: nosniff'); header('Referrer-Policy: no-referrer-when-downgrade'); header("Content-Security-Policy: default-src 'none'; frame-ancestors 'none'; sandbox"); +header('X-Search-Mode: ' . (USE_SEARX_INSTANCES ? 'searx' : 'engines')); // ← Point 6 // -------------------- CLASSES -------------------- + class CacheManager { private array $urlsCache = []; @@ -109,22 +112,58 @@ class BangManager } } -class RequestHandler +class EngineManager +{ + private static ?array $cachedEngines = null; + private array $engines = []; + + public function __construct(private string $file = __DIR__ . '/engines.txt') + { + if (self::$cachedEngines === null) { + $this->loadEngines(); + self::$cachedEngines = $this->engines; + } else { + $this->engines = self::$cachedEngines; + } + } + + private function loadEngines(): void + { + if (!file_exists($this->file)) { + throw new RuntimeException("Fichier engines.txt introuvable"); + } + $lines = file($this->file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + $this->engines = array_filter($lines, fn($line) => !empty(trim($line))); + } + + public function getRandomEngine(): string + { + if (empty($this->engines)) { + throw new RuntimeException("Aucun moteur de recherche configuré dans engines.txt"); + } + return $this->engines[array_rand($this->engines)]; + } +} + +class SearxHandler { public function __construct(private CacheManager $cache, private BangManager $bang) {} - public function handle(): void { + public function handle(): void + { $method = $_SERVER['REQUEST_METHOD'] ?? 'GET'; $query = $_REQUEST['q'] ?? null; if ($query !== null && !$this->bang->tryRedirect($query)) { $instance = rtrim($this->cache->getRandomUrl(), '/'); + $url = $instance . '/search?' . http_build_query(['q' => $query]); if ($method === 'GET') { - $this->redirectGet($instance, $query); + header("Location: $url"); + exit; } elseif ($method === 'POST') { - $this->redirectGet($instance, $query); - // $this->proxyPost($instance, $_POST); + header("Location: $url"); // Redirige en GET pour simplifier + exit; } else { http_response_code(405); exit('Méthode non autorisée'); @@ -135,57 +174,80 @@ class RequestHandler header("Location: $instance"); exit; } elseif ($method === 'POST') { - $this->proxyPost($instance, $_POST); + header("Location: $instance"); + exit; } else { http_response_code(405); exit('Méthode non autorisée'); } } } +} - private function redirectGet(string $instance, string $query): void { - $url = $instance . '/search?' . http_build_query(['q' => $query]); - header("Location: $url"); - exit; - } +class EngineHandler +{ + public function __construct(private EngineManager $engineManager, private BangManager $bang) {} - private function proxyPost(string $urlBase, array $postData): void { - $url = $urlBase . '/search'; - $ch = curl_init($url); - curl_setopt_array($ch, [ - CURLOPT_RETURNTRANSFER => true, - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => http_build_query($postData), - CURLOPT_HEADER => false, - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_TIMEOUT => 10, - CURLOPT_SSL_VERIFYPEER => true, - ]); - $response = curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); + public function handle(): void + { + $method = $_SERVER['REQUEST_METHOD'] ?? 'GET'; + $query = $_REQUEST['q'] ?? null; - http_response_code($code); - echo $response; - exit; + if ($query !== null && !$this->bang->tryRedirect($query)) { + $engineTemplate = $this->engineManager->getRandomEngine(); + $url = sprintf($engineTemplate, rawurlencode($query)); + + if ($method === 'GET') { + header("Location: $url"); + exit; + } elseif ($method === 'POST') { + header("Location: $url"); // Redirige en GET + exit; + } else { + http_response_code(405); + exit('Méthode non autorisée'); + } + } elseif ($query === null) { + // Rediriger vers la page d'accueil du premier moteur + $engineTemplate = $this->engineManager->getRandomEngine(); + $homepage = str_replace('?query=%s', '', str_replace('?q=%s', '', $engineTemplate)); + if ($method === 'GET') { + header("Location: $homepage"); + exit; + } elseif ($method === 'POST') { + header("Location: $homepage"); + exit; + } else { + http_response_code(405); + exit('Méthode non autorisée'); + } + } } } // -------------------- EXECUTION -------------------- try { - $cache = new CacheManager(); - if ($cache->isExpired()) { - if (!$cache->downloadInstances() || !$cache->extractValidUrls()) { - http_response_code(500); - exit('Erreur interne : impossible de générer le cache.'); + $bang = new BangManager(); + $engineManager = new EngineManager(); + + if (USE_SEARX_INSTANCES) { + $cache = new CacheManager(); + if ($cache->isExpired()) { + if (!$cache->downloadInstances() || !$cache->extractValidUrls()) { + http_response_code(500); + exit('Erreur interne : veuillez réessayer plus tard.'); + } } + $handler = new SearxHandler($cache, $bang); + } else { + $handler = new EngineHandler($engineManager, $bang); } - $bang = new BangManager(); - $handler = new RequestHandler($cache, $bang); $handler->handle(); } catch (\Throwable $e) { + // Ne pas exposer les détails techniques → Point 3 + error_log('Erreur interne : ' . $e->getMessage() . ' | Trace: ' . $e->getTraceAsString()); http_response_code(500); - exit('Erreur interne : ' . $e->getMessage()); -} + exit('Erreur interne : veuillez réessayer plus tard.'); +} \ No newline at end of file From f98a04c11f819d61dce429f6172631094ebec569 Mon Sep 17 00:00:00 2001 From: cedric Date: Thu, 11 Dec 2025 21:22:37 +0000 Subject: [PATCH 5/5] Add engines urls --- url.txt | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 url.txt diff --git a/url.txt b/url.txt new file mode 100644 index 0000000..31b7970 --- /dev/null +++ b/url.txt @@ -0,0 +1,8 @@ +https://marginalia-search.com/search?query=%s +https://swisscows.com/fr/web?query=%s +https://www.qwant.com/?q=%s +https://search.brave.com/search?q=%s +https://www.ecosia.org/search?q=%s +https://www.mojeek.com/search?q=%s +https://duckduckgo.com/?q=%s +https://www.startpage.com/do/search?q=%s \ No newline at end of file