Compare commits
5 commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f98a04c11f | |||
| dbd255da96 | |||
| 65f977918f | |||
| 7f162fa616 | |||
| 92ac79b4c6 |
2 changed files with 252 additions and 282 deletions
526
index.php
526
index.php
|
|
@ -1,291 +1,253 @@
|
|||
<?php
|
||||
// index.php
|
||||
// Gère génération du cache, redirection classique et gestion des bangs (!code)
|
||||
declare(strict_types=1);
|
||||
|
||||
// Constantes
|
||||
define('CACHE_DIR', __DIR__ . '/cache');
|
||||
define('INSTANCES_JSON', CACHE_DIR . '/instances.json');
|
||||
define('URLS_TXT', CACHE_DIR . '/urls.txt');
|
||||
define('INSTANCES_URL', 'https://searx.space/data/instances.json');
|
||||
define('CACHE_MAX_AGE', 3600); // 1 heure
|
||||
// -------------------- CONSTANTES --------------------
|
||||
const CACHE_DIR = '/var/cache/homepage';
|
||||
const INSTANCES_JSON = CACHE_DIR . '/instances.json';
|
||||
const URLS_TXT = __DIR__ . '/urls.txt'; // ← Hors du cache, à la racine
|
||||
const INSTANCES_URL = 'https://searx.space/data/instances.json';
|
||||
const CACHE_MAX_AGE = 3600; // 1 heure
|
||||
const BANG_FILE = __DIR__ . '/bang.json';
|
||||
const USE_SEARX_INSTANCES = true; // false → utilise engines.txt
|
||||
|
||||
// Headers de sécurité
|
||||
// -------------------- HEADERS DE SÉCURITÉ --------------------
|
||||
header('X-Frame-Options: SAMEORIGIN');
|
||||
header('X-Content-Type-Options: nosniff');
|
||||
header('Referrer-Policy: no-referrer-when-downgrade');
|
||||
header("Content-Security-Policy: default-src 'none'; frame-ancestors 'none'; sandbox");
|
||||
header('X-Search-Mode: ' . (USE_SEARX_INSTANCES ? 'searx' : 'engines')); // ← Point 6
|
||||
|
||||
// Création dossier cache si nécessaire
|
||||
if (!is_dir(CACHE_DIR) && !mkdir(CACHE_DIR, 0755, true)) {
|
||||
error_log('Erreur création dossier cache');
|
||||
// -------------------- CLASSES --------------------
|
||||
|
||||
class CacheManager
|
||||
{
|
||||
private array $urlsCache = [];
|
||||
|
||||
public function __construct(private string $cacheDir = CACHE_DIR) {
|
||||
if (!is_dir($cacheDir) && !mkdir($cacheDir, 0755, true)) {
|
||||
throw new RuntimeException("Impossible de créer le dossier cache");
|
||||
}
|
||||
}
|
||||
|
||||
public function isExpired(): bool {
|
||||
return !file_exists(URLS_TXT) || (time() - filemtime(URLS_TXT)) > CACHE_MAX_AGE;
|
||||
}
|
||||
|
||||
public function downloadInstances(): bool {
|
||||
$data = file_get_contents(INSTANCES_URL);
|
||||
if ($data === false) return false;
|
||||
return $this->safeWrite(INSTANCES_JSON, $data);
|
||||
}
|
||||
|
||||
public function extractValidUrls(): bool {
|
||||
if (!file_exists(INSTANCES_JSON)) return false;
|
||||
|
||||
$json = json_decode(file_get_contents(INSTANCES_JSON), true);
|
||||
if (!isset($json['instances']) || !is_array($json['instances'])) return false;
|
||||
|
||||
$valid = array_filter(array_keys($json['instances']), function($url) use ($json) {
|
||||
$data = $json['instances'][$url] ?? [];
|
||||
return ($data['network_type'] ?? '') === 'normal'
|
||||
&& ($data['http']['status_code'] ?? 0) === 200
|
||||
&& ($data['timing']['search']['success_percentage'] ?? 0) === 100.0
|
||||
&& ($data['timing']['initial']['success_percentage'] ?? 0) === 100.0
|
||||
&& filter_var($url, FILTER_VALIDATE_URL)
|
||||
&& in_array(parse_url($url, PHP_URL_SCHEME), ['http','https'], true);
|
||||
});
|
||||
|
||||
if (empty($valid)) return false;
|
||||
|
||||
return $this->safeWrite(URLS_TXT, implode("\n", array_map('rtrim', $valid)));
|
||||
}
|
||||
|
||||
private function safeWrite(string $file, string $data): bool {
|
||||
$written = file_put_contents($file, $data, LOCK_EX);
|
||||
return $written !== false && $written === strlen($data);
|
||||
}
|
||||
|
||||
public function loadUrls(): array {
|
||||
if (empty($this->urlsCache)) {
|
||||
if (!file_exists(URLS_TXT)) return [];
|
||||
$urls = file(URLS_TXT, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$this->urlsCache = array_filter($urls, fn($url) => filter_var($url, FILTER_VALIDATE_URL));
|
||||
}
|
||||
return $this->urlsCache;
|
||||
}
|
||||
|
||||
public function getRandomUrl(): string {
|
||||
$urls = $this->loadUrls();
|
||||
if (empty($urls)) throw new RuntimeException("Aucune URL disponible");
|
||||
return $urls[array_rand($urls)];
|
||||
}
|
||||
}
|
||||
|
||||
class BangManager
|
||||
{
|
||||
private array $bangs = [];
|
||||
|
||||
public function __construct(private string $file = BANG_FILE) {
|
||||
$this->loadBangs();
|
||||
}
|
||||
|
||||
private function loadBangs(): void {
|
||||
if (!file_exists($this->file)) return;
|
||||
$data = json_decode(file_get_contents($this->file), true);
|
||||
$this->bangs = is_array($data) ? $data : [];
|
||||
}
|
||||
|
||||
public function tryRedirect(string $query): bool {
|
||||
if (!str_starts_with($query, '!')) return false;
|
||||
if (!preg_match('/^!(\w+)\s?(.*)$/u', $query, $matches)) return false;
|
||||
|
||||
[$bangCode, $searchTerm] = [$matches[1], trim($matches[2])];
|
||||
if ($searchTerm === '') return false;
|
||||
|
||||
foreach ($this->bangs as $bang) {
|
||||
if (($bang['bang'] ?? '') === $bangCode) {
|
||||
header("Location: " . rtrim($bang['url'], '/') . '?q=' . rawurlencode($searchTerm));
|
||||
exit;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class EngineManager
|
||||
{
|
||||
private static ?array $cachedEngines = null;
|
||||
private array $engines = [];
|
||||
|
||||
public function __construct(private string $file = __DIR__ . '/engines.txt')
|
||||
{
|
||||
if (self::$cachedEngines === null) {
|
||||
$this->loadEngines();
|
||||
self::$cachedEngines = $this->engines;
|
||||
} else {
|
||||
$this->engines = self::$cachedEngines;
|
||||
}
|
||||
}
|
||||
|
||||
private function loadEngines(): void
|
||||
{
|
||||
if (!file_exists($this->file)) {
|
||||
throw new RuntimeException("Fichier engines.txt introuvable");
|
||||
}
|
||||
$lines = file($this->file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$this->engines = array_filter($lines, fn($line) => !empty(trim($line)));
|
||||
}
|
||||
|
||||
public function getRandomEngine(): string
|
||||
{
|
||||
if (empty($this->engines)) {
|
||||
throw new RuntimeException("Aucun moteur de recherche configuré dans engines.txt");
|
||||
}
|
||||
return $this->engines[array_rand($this->engines)];
|
||||
}
|
||||
}
|
||||
|
||||
class SearxHandler
|
||||
{
|
||||
public function __construct(private CacheManager $cache, private BangManager $bang) {}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$method = $_SERVER['REQUEST_METHOD'] ?? 'GET';
|
||||
$query = $_REQUEST['q'] ?? null;
|
||||
|
||||
if ($query !== null && !$this->bang->tryRedirect($query)) {
|
||||
$instance = rtrim($this->cache->getRandomUrl(), '/');
|
||||
$url = $instance . '/search?' . http_build_query(['q' => $query]);
|
||||
|
||||
if ($method === 'GET') {
|
||||
header("Location: $url");
|
||||
exit;
|
||||
} elseif ($method === 'POST') {
|
||||
header("Location: $url"); // Redirige en GET pour simplifier
|
||||
exit;
|
||||
} else {
|
||||
http_response_code(405);
|
||||
exit('Méthode non autorisée');
|
||||
}
|
||||
} elseif ($query === null) {
|
||||
$instance = rtrim($this->cache->getRandomUrl(), '/');
|
||||
if ($method === 'GET') {
|
||||
header("Location: $instance");
|
||||
exit;
|
||||
} elseif ($method === 'POST') {
|
||||
header("Location: $instance");
|
||||
exit;
|
||||
} else {
|
||||
http_response_code(405);
|
||||
exit('Méthode non autorisée');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class EngineHandler
|
||||
{
|
||||
public function __construct(private EngineManager $engineManager, private BangManager $bang) {}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
$method = $_SERVER['REQUEST_METHOD'] ?? 'GET';
|
||||
$query = $_REQUEST['q'] ?? null;
|
||||
|
||||
if ($query !== null && !$this->bang->tryRedirect($query)) {
|
||||
$engineTemplate = $this->engineManager->getRandomEngine();
|
||||
$url = sprintf($engineTemplate, rawurlencode($query));
|
||||
|
||||
if ($method === 'GET') {
|
||||
header("Location: $url");
|
||||
exit;
|
||||
} elseif ($method === 'POST') {
|
||||
header("Location: $url"); // Redirige en GET
|
||||
exit;
|
||||
} else {
|
||||
http_response_code(405);
|
||||
exit('Méthode non autorisée');
|
||||
}
|
||||
} elseif ($query === null) {
|
||||
// Rediriger vers la page d'accueil du premier moteur
|
||||
$engineTemplate = $this->engineManager->getRandomEngine();
|
||||
$homepage = str_replace('?query=%s', '', str_replace('?q=%s', '', $engineTemplate));
|
||||
if ($method === 'GET') {
|
||||
header("Location: $homepage");
|
||||
exit;
|
||||
} elseif ($method === 'POST') {
|
||||
header("Location: $homepage");
|
||||
exit;
|
||||
} else {
|
||||
http_response_code(405);
|
||||
exit('Méthode non autorisée');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- EXECUTION --------------------
|
||||
try {
|
||||
$bang = new BangManager();
|
||||
$engineManager = new EngineManager();
|
||||
|
||||
if (USE_SEARX_INSTANCES) {
|
||||
$cache = new CacheManager();
|
||||
if ($cache->isExpired()) {
|
||||
if (!$cache->downloadInstances() || !$cache->extractValidUrls()) {
|
||||
http_response_code(500);
|
||||
exit('Erreur interne : veuillez réessayer plus tard.');
|
||||
}
|
||||
}
|
||||
$handler = new SearxHandler($cache, $bang);
|
||||
} else {
|
||||
$handler = new EngineHandler($engineManager, $bang);
|
||||
}
|
||||
|
||||
$handler->handle();
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
// Ne pas exposer les détails techniques → Point 3
|
||||
error_log('Erreur interne : ' . $e->getMessage() . ' | Trace: ' . $e->getTraceAsString());
|
||||
http_response_code(500);
|
||||
exit('Erreur interne');
|
||||
}
|
||||
|
||||
// Mise à jour du cache si trop vieux ou absent
|
||||
if (cache_expired()) {
|
||||
if (!download_instances_json() || !extract_valid_urls()) {
|
||||
error_log('Erreur lors de la génération du cache');
|
||||
http_response_code(500);
|
||||
exit('Erreur interne');
|
||||
}
|
||||
}
|
||||
|
||||
// Récupérer la requête
|
||||
$query = $_GET['q'] ?? null;
|
||||
|
||||
if ($query !== null) {
|
||||
if (!is_valid_query($query)) {
|
||||
http_response_code(400);
|
||||
exit('Paramètre q invalide');
|
||||
}
|
||||
|
||||
// Gestion bang : si retourne true, redirection faite et script termine
|
||||
if (!try_redirect_bang($query)) {
|
||||
redirect_search($query);
|
||||
}
|
||||
} else {
|
||||
redirect_random_url();
|
||||
}
|
||||
|
||||
// — Fonctions — //
|
||||
|
||||
function cache_expired(): bool
|
||||
{
|
||||
if (!file_exists(URLS_TXT)) {
|
||||
return true;
|
||||
}
|
||||
$age = time() - filemtime(URLS_TXT);
|
||||
return $age > CACHE_MAX_AGE;
|
||||
}
|
||||
|
||||
function download_instances_json(): bool
|
||||
{
|
||||
$ch = curl_init(INSTANCES_URL);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_TIMEOUT => 10,
|
||||
CURLOPT_SSL_VERIFYPEER => true,
|
||||
]);
|
||||
$data = curl_exec($ch);
|
||||
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
if ($data === false) {
|
||||
error_log('curl_exec failed: ' . curl_error($ch));
|
||||
}
|
||||
curl_close($ch);
|
||||
|
||||
if ($code === 200 && $data !== false) {
|
||||
return safe_file_put_contents(INSTANCES_JSON, $data);
|
||||
}
|
||||
error_log("Erreur téléchargement instances.json, HTTP $code");
|
||||
return false;
|
||||
}
|
||||
|
||||
function extract_valid_urls(): bool
|
||||
{
|
||||
if (!file_exists(INSTANCES_JSON)) {
|
||||
error_log("instances.json introuvable");
|
||||
return false;
|
||||
}
|
||||
|
||||
$content = file_get_contents(INSTANCES_JSON);
|
||||
if ($content === false) {
|
||||
error_log("Impossible de lire instances.json");
|
||||
return false;
|
||||
}
|
||||
|
||||
$j = json_decode($content, true);
|
||||
if (!isset($j['instances']) || !is_array($j['instances'])) {
|
||||
error_log("JSON mal formé");
|
||||
return false;
|
||||
}
|
||||
|
||||
$valid = [];
|
||||
foreach ($j['instances'] as $url => $data) {
|
||||
if (
|
||||
($data['network_type'] ?? '') === 'normal'
|
||||
&& ($data['http']['status_code'] ?? 0) === 200
|
||||
&& ($data['timing']['search']['success_percentage'] ?? 0) === 100.0
|
||||
&& ($data['timing']['initial']['success_percentage'] ?? 0) === 100.0
|
||||
&& filter_var($url, FILTER_VALIDATE_URL)
|
||||
&& in_array(parse_url($url, PHP_URL_SCHEME), ['http', 'https'], true)
|
||||
) {
|
||||
$valid[] = rtrim($url, '/');
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($valid)) {
|
||||
error_log("Aucune URL valide");
|
||||
return false;
|
||||
}
|
||||
|
||||
return safe_file_put_contents(URLS_TXT, implode("\n", $valid));
|
||||
}
|
||||
|
||||
function safe_file_put_contents(string $filename, string $data): bool
|
||||
{
|
||||
$fp = fopen($filename, 'c');
|
||||
if (!$fp) {
|
||||
error_log("Impossible d'ouvrir $filename en écriture");
|
||||
return false;
|
||||
}
|
||||
if (!flock($fp, LOCK_EX)) {
|
||||
fclose($fp);
|
||||
error_log("Impossible de verrouiller $filename");
|
||||
return false;
|
||||
}
|
||||
ftruncate($fp, 0);
|
||||
$written = fwrite($fp, $data);
|
||||
fflush($fp);
|
||||
flock($fp, LOCK_UN);
|
||||
fclose($fp);
|
||||
|
||||
return $written === strlen($data);
|
||||
}
|
||||
|
||||
function get_all_urls(): array
|
||||
{
|
||||
if (!file_exists(URLS_TXT)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$fp = fopen(URLS_TXT, 'r');
|
||||
if (!$fp) {
|
||||
error_log("Impossible d'ouvrir urls.txt");
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!flock($fp, LOCK_SH)) {
|
||||
fclose($fp);
|
||||
error_log("Impossible de verrouiller urls.txt en lecture");
|
||||
return [];
|
||||
}
|
||||
|
||||
$urls = [];
|
||||
while (($line = fgets($fp)) !== false) {
|
||||
$line = trim($line);
|
||||
if ($line !== '' && filter_var($line, FILTER_VALIDATE_URL)) {
|
||||
$urls[] = $line;
|
||||
}
|
||||
}
|
||||
flock($fp, LOCK_UN);
|
||||
fclose($fp);
|
||||
|
||||
return $urls;
|
||||
}
|
||||
|
||||
function redirect_random_url(): void
|
||||
{
|
||||
$urls = get_all_urls();
|
||||
if (empty($urls)) {
|
||||
http_response_code(500);
|
||||
exit('Instances indisponibles');
|
||||
}
|
||||
$target = $urls[array_rand($urls)];
|
||||
header("Location: $target");
|
||||
exit;
|
||||
}
|
||||
|
||||
function redirect_search(string $query): void
|
||||
{
|
||||
$urls = get_all_urls();
|
||||
if (empty($urls)) {
|
||||
http_response_code(500);
|
||||
exit('Instances indisponibles pour recherche');
|
||||
}
|
||||
$base = $urls[array_rand($urls)];
|
||||
|
||||
$parsed = parse_url($base);
|
||||
if ($parsed === false) {
|
||||
error_log("Base URL invalide dans urls.txt");
|
||||
http_response_code(500);
|
||||
exit('Erreur interne');
|
||||
}
|
||||
|
||||
$path = $parsed['path'] ?? '';
|
||||
$path = rtrim($path, '/') . '/search';
|
||||
|
||||
$query_string = http_build_query(['q' => $query]);
|
||||
|
||||
$target = $parsed['scheme'] . '://' . $parsed['host'];
|
||||
if (isset($parsed['port'])) {
|
||||
$target .= ':' . $parsed['port'];
|
||||
}
|
||||
$target .= $path . '?' . $query_string;
|
||||
|
||||
header("Location: $target");
|
||||
exit;
|
||||
}
|
||||
|
||||
function is_valid_query(string $query): bool
|
||||
{
|
||||
$query = trim($query);
|
||||
if ($query === '') {
|
||||
return false;
|
||||
}
|
||||
if (mb_strlen($query) > 200) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Vérifier que la chaîne est en UTF-8 valide
|
||||
if (!mb_check_encoding($query, 'UTF-8')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preg_match('/^[^\x00-\x1F\x7F]+$/u', $query) === 1;
|
||||
}
|
||||
|
||||
// --- Gestion bangs --- //
|
||||
|
||||
function load_bangs(): array
|
||||
{
|
||||
$file = __DIR__ . '/bang.json';
|
||||
if (!file_exists($file)) {
|
||||
error_log("bang.json introuvable");
|
||||
return [];
|
||||
}
|
||||
$content = file_get_contents($file);
|
||||
if ($content === false) {
|
||||
error_log("Impossible de lire bang.json");
|
||||
return [];
|
||||
}
|
||||
$bangs = json_decode($content, true);
|
||||
if (!is_array($bangs)) {
|
||||
error_log("bang.json mal formé");
|
||||
return [];
|
||||
}
|
||||
return $bangs;
|
||||
}
|
||||
|
||||
function try_redirect_bang(string $query): bool
|
||||
{
|
||||
if (substr($query, 0, 1) !== '!') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!preg_match('/^!(\w+)\s?(.*)$/u', $query, $matches)) {
|
||||
return false;
|
||||
}
|
||||
$bang_code = $matches[1];
|
||||
$search_term = trim($matches[2]);
|
||||
|
||||
if ($search_term === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$bangs = load_bangs();
|
||||
if (empty($bangs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ($bangs as $bang) {
|
||||
if (($bang['bang'] ?? '') === $bang_code) {
|
||||
$base_url = rtrim($bang['url'], '/');
|
||||
$search_url = $base_url . '?q=' . rawurlencode($search_term);
|
||||
header("Location: $search_url");
|
||||
exit;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
exit('Erreur interne : veuillez réessayer plus tard.');
|
||||
}
|
||||
8
url.txt
Normal file
8
url.txt
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
https://marginalia-search.com/search?query=%s
|
||||
https://swisscows.com/fr/web?query=%s
|
||||
https://www.qwant.com/?q=%s
|
||||
https://search.brave.com/search?q=%s
|
||||
https://www.ecosia.org/search?q=%s
|
||||
https://www.mojeek.com/search?q=%s
|
||||
https://duckduckgo.com/?q=%s
|
||||
https://www.startpage.com/do/search?q=%s
|
||||
Loading…
Add table
Add a link
Reference in a new issue