291 lines
7.2 KiB
PHP
291 lines
7.2 KiB
PHP
<?php
|
|
// index.php
|
|
// Gère génération du cache, redirection classique et gestion des bangs (!code)
|
|
|
|
// Constantes
|
|
define('CACHE_DIR', __DIR__ . '/cache');
|
|
define('INSTANCES_JSON', CACHE_DIR . '/instances.json');
|
|
define('URLS_TXT', CACHE_DIR . '/urls.txt');
|
|
define('INSTANCES_URL', 'https://searx.space/data/instances.json');
|
|
define('CACHE_MAX_AGE', 3600); // 1 heure
|
|
|
|
// Headers de sécurité
|
|
header('X-Frame-Options: SAMEORIGIN');
|
|
header('X-Content-Type-Options: nosniff');
|
|
header('Referrer-Policy: no-referrer-when-downgrade');
|
|
header("Content-Security-Policy: default-src 'none'; frame-ancestors 'none'; sandbox");
|
|
|
|
// Création dossier cache si nécessaire
|
|
if (!is_dir(CACHE_DIR) && !mkdir(CACHE_DIR, 0755, true)) {
|
|
error_log('Erreur création dossier cache');
|
|
http_response_code(500);
|
|
exit('Erreur interne');
|
|
}
|
|
|
|
// Mise à jour du cache si trop vieux ou absent
|
|
if (cache_expired()) {
|
|
if (!download_instances_json() || !extract_valid_urls()) {
|
|
error_log('Erreur lors de la génération du cache');
|
|
http_response_code(500);
|
|
exit('Erreur interne');
|
|
}
|
|
}
|
|
|
|
// Récupérer la requête
|
|
$query = $_GET['q'] ?? null;
|
|
|
|
if ($query !== null) {
|
|
if (!is_valid_query($query)) {
|
|
http_response_code(400);
|
|
exit('Paramètre q invalide');
|
|
}
|
|
|
|
// Gestion bang : si retourne true, redirection faite et script termine
|
|
if (!try_redirect_bang($query)) {
|
|
redirect_search($query);
|
|
}
|
|
} else {
|
|
redirect_random_url();
|
|
}
|
|
|
|
// — Fonctions — //
|
|
|
|
function cache_expired(): bool
|
|
{
|
|
if (!file_exists(URLS_TXT)) {
|
|
return true;
|
|
}
|
|
$age = time() - filemtime(URLS_TXT);
|
|
return $age > CACHE_MAX_AGE;
|
|
}
|
|
|
|
function download_instances_json(): bool
|
|
{
|
|
$ch = curl_init(INSTANCES_URL);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_FOLLOWLOCATION => true,
|
|
CURLOPT_TIMEOUT => 10,
|
|
CURLOPT_SSL_VERIFYPEER => true,
|
|
]);
|
|
$data = curl_exec($ch);
|
|
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
if ($data === false) {
|
|
error_log('curl_exec failed: ' . curl_error($ch));
|
|
}
|
|
curl_close($ch);
|
|
|
|
if ($code === 200 && $data !== false) {
|
|
return safe_file_put_contents(INSTANCES_JSON, $data);
|
|
}
|
|
error_log("Erreur téléchargement instances.json, HTTP $code");
|
|
return false;
|
|
}
|
|
|
|
function extract_valid_urls(): bool
|
|
{
|
|
if (!file_exists(INSTANCES_JSON)) {
|
|
error_log("instances.json introuvable");
|
|
return false;
|
|
}
|
|
|
|
$content = file_get_contents(INSTANCES_JSON);
|
|
if ($content === false) {
|
|
error_log("Impossible de lire instances.json");
|
|
return false;
|
|
}
|
|
|
|
$j = json_decode($content, true);
|
|
if (!isset($j['instances']) || !is_array($j['instances'])) {
|
|
error_log("JSON mal formé");
|
|
return false;
|
|
}
|
|
|
|
$valid = [];
|
|
foreach ($j['instances'] as $url => $data) {
|
|
if (
|
|
($data['network_type'] ?? '') === 'normal'
|
|
&& ($data['http']['status_code'] ?? 0) === 200
|
|
&& ($data['timing']['search']['success_percentage'] ?? 0) === 100.0
|
|
&& ($data['timing']['initial']['success_percentage'] ?? 0) === 100.0
|
|
&& filter_var($url, FILTER_VALIDATE_URL)
|
|
&& in_array(parse_url($url, PHP_URL_SCHEME), ['http', 'https'], true)
|
|
) {
|
|
$valid[] = rtrim($url, '/');
|
|
}
|
|
}
|
|
|
|
if (empty($valid)) {
|
|
error_log("Aucune URL valide");
|
|
return false;
|
|
}
|
|
|
|
return safe_file_put_contents(URLS_TXT, implode("\n", $valid));
|
|
}
|
|
|
|
function safe_file_put_contents(string $filename, string $data): bool
|
|
{
|
|
$fp = fopen($filename, 'c');
|
|
if (!$fp) {
|
|
error_log("Impossible d'ouvrir $filename en écriture");
|
|
return false;
|
|
}
|
|
if (!flock($fp, LOCK_EX)) {
|
|
fclose($fp);
|
|
error_log("Impossible de verrouiller $filename");
|
|
return false;
|
|
}
|
|
ftruncate($fp, 0);
|
|
$written = fwrite($fp, $data);
|
|
fflush($fp);
|
|
flock($fp, LOCK_UN);
|
|
fclose($fp);
|
|
|
|
return $written === strlen($data);
|
|
}
|
|
|
|
function get_all_urls(): array
|
|
{
|
|
if (!file_exists(URLS_TXT)) {
|
|
return [];
|
|
}
|
|
|
|
$fp = fopen(URLS_TXT, 'r');
|
|
if (!$fp) {
|
|
error_log("Impossible d'ouvrir urls.txt");
|
|
return [];
|
|
}
|
|
|
|
if (!flock($fp, LOCK_SH)) {
|
|
fclose($fp);
|
|
error_log("Impossible de verrouiller urls.txt en lecture");
|
|
return [];
|
|
}
|
|
|
|
$urls = [];
|
|
while (($line = fgets($fp)) !== false) {
|
|
$line = trim($line);
|
|
if ($line !== '' && filter_var($line, FILTER_VALIDATE_URL)) {
|
|
$urls[] = $line;
|
|
}
|
|
}
|
|
flock($fp, LOCK_UN);
|
|
fclose($fp);
|
|
|
|
return $urls;
|
|
}
|
|
|
|
function redirect_random_url(): void
|
|
{
|
|
$urls = get_all_urls();
|
|
if (empty($urls)) {
|
|
http_response_code(500);
|
|
exit('Instances indisponibles');
|
|
}
|
|
$target = $urls[array_rand($urls)];
|
|
header("Location: $target");
|
|
exit;
|
|
}
|
|
|
|
function redirect_search(string $query): void
|
|
{
|
|
$urls = get_all_urls();
|
|
if (empty($urls)) {
|
|
http_response_code(500);
|
|
exit('Instances indisponibles pour recherche');
|
|
}
|
|
$base = $urls[array_rand($urls)];
|
|
|
|
$parsed = parse_url($base);
|
|
if ($parsed === false) {
|
|
error_log("Base URL invalide dans urls.txt");
|
|
http_response_code(500);
|
|
exit('Erreur interne');
|
|
}
|
|
|
|
$path = $parsed['path'] ?? '';
|
|
$path = rtrim($path, '/') . '/search';
|
|
|
|
$query_string = http_build_query(['q' => $query]);
|
|
|
|
$target = $parsed['scheme'] . '://' . $parsed['host'];
|
|
if (isset($parsed['port'])) {
|
|
$target .= ':' . $parsed['port'];
|
|
}
|
|
$target .= $path . '?' . $query_string;
|
|
|
|
header("Location: $target");
|
|
exit;
|
|
}
|
|
|
|
function is_valid_query(string $query): bool
|
|
{
|
|
$query = trim($query);
|
|
if ($query === '') {
|
|
return false;
|
|
}
|
|
if (mb_strlen($query) > 200) {
|
|
return false;
|
|
}
|
|
|
|
// Vérifier que la chaîne est en UTF-8 valide
|
|
if (!mb_check_encoding($query, 'UTF-8')) {
|
|
return false;
|
|
}
|
|
|
|
return preg_match('/^[^\x00-\x1F\x7F]+$/u', $query) === 1;
|
|
}
|
|
|
|
// --- Gestion bangs --- //
|
|
|
|
function load_bangs(): array
|
|
{
|
|
$file = __DIR__ . '/bang.json';
|
|
if (!file_exists($file)) {
|
|
error_log("bang.json introuvable");
|
|
return [];
|
|
}
|
|
$content = file_get_contents($file);
|
|
if ($content === false) {
|
|
error_log("Impossible de lire bang.json");
|
|
return [];
|
|
}
|
|
$bangs = json_decode($content, true);
|
|
if (!is_array($bangs)) {
|
|
error_log("bang.json mal formé");
|
|
return [];
|
|
}
|
|
return $bangs;
|
|
}
|
|
|
|
function try_redirect_bang(string $query): bool
|
|
{
|
|
if (substr($query, 0, 1) !== '!') {
|
|
return false;
|
|
}
|
|
|
|
if (!preg_match('/^!(\w+)\s?(.*)$/u', $query, $matches)) {
|
|
return false;
|
|
}
|
|
$bang_code = $matches[1];
|
|
$search_term = trim($matches[2]);
|
|
|
|
if ($search_term === '') {
|
|
return false;
|
|
}
|
|
|
|
$bangs = load_bangs();
|
|
if (empty($bangs)) {
|
|
return false;
|
|
}
|
|
|
|
foreach ($bangs as $bang) {
|
|
if (($bang['bang'] ?? '') === $bang_code) {
|
|
$base_url = rtrim($bang['url'], '/');
|
|
$search_url = $base_url . '?q=' . rawurlencode($search_term);
|
|
header("Location: $search_url");
|
|
exit;
|
|
}
|
|
}
|
|
return false;
|
|
}
|