From 3ba3b5a8db0db14c5a8fed529d76f9f479e1ec97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=B8nstantine=20Kovalensky?= <45331093+kovalensky@users.noreply.github.com> Date: Fri, 3 Nov 2023 12:38:54 +0400 Subject: [PATCH] Multiple Scrape (#1018) --- bt/announce.php | 6 ++-- bt/scrape.php | 76 ++++++++++++++++++++++++++++------------------ library/config.php | 3 +- 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/bt/announce.php b/bt/announce.php index 902eab511..94c9eb9bb 100644 --- a/bt/announce.php +++ b/bt/announce.php @@ -67,7 +67,7 @@ if (!isset($info_hash)) { } // Store info hash in hex format -$info_hash_hex = bin2hex($info_hash); +$info_hash_hex = mb_check_encoding($info_hash, 'UTF8') ? $info_hash : bin2hex($info_hash); // Store peer id $peer_id_sql = rtrim(DB()->escape(htmlCHR($peer_id)), ' '); @@ -436,9 +436,11 @@ if (!$output) { 'complete' => (int)$seeders, 'incomplete' => (int)$leechers, 'downloaded' => (int)$client_completed, - 'peers' => $peers, ]; + if (!empty($peers)) { + $output['peers'] = $peers; + } if (!empty($peers6)) { $output['peers6'] = $peers6; } diff --git a/bt/scrape.php b/bt/scrape.php index ee817da3c..eaaa90e59 100644 --- a/bt/scrape.php +++ b/bt/scrape.php @@ -22,16 +22,14 @@ if (isset($_GET['?info_hash']) && !isset($_GET['info_hash'])) { $_GET['info_hash'] = $_GET['?info_hash']; } -$is_bt_v2 = null; $info_hash = isset($_GET['info_hash']) ? (string)$_GET['info_hash'] : null; // Verify info_hash if (!isset($info_hash)) { msg_die('info_hash was not provided'); } - // Store info hash in hex format -$info_hash_hex = bin2hex($info_hash); +$info_hash_hex = mb_check_encoding($info_hash, 'UTF8') ? $info_hash : bin2hex($info_hash); // Check info_hash version if (strlen($info_hash) == 32) { @@ -42,37 +40,55 @@ if (strlen($info_hash) == 32) { msg_die('Invalid info_hash: ' . $info_hash_hex); } -if ($lp_scrape_info = CACHE('tr_cache')->get(SCRAPE_LIST_PREFIX . $info_hash_hex)) { - die(\Arokettu\Bencode\Bencode::encode($lp_scrape_info)); -} +// Handle multiple hashes -$info_hash_sql = rtrim(DB()->escape($info_hash), ' '); -/** - * Поскольку торрент-клиенты в настоящее время обрезают инфо-хэш до 20 символов (независимо от его типа, как известно v1 = 20 символов, а v2 = 32 символа), - * то результатов $is_bt_v2 (исходя из длины строки определяем тип инфо-хэша) проверки нам будет мало, именно поэтому происходит поиск v2 хэша, если торрент является v1 (по длине) и если в tor.info_hash столбце нету v1 хэша. - */ -$info_hash_where = $is_bt_v2 ? "WHERE tor.info_hash_v2 = '$info_hash_sql'" : "WHERE tor.info_hash = '$info_hash_sql' OR tor.info_hash_v2 LIKE '$info_hash_sql%'"; +preg_match_all('/info_hash=([^&]*)/i', $_SERVER["QUERY_STRING"], $info_hash_array); -$row = DB()->fetch_row(" - SELECT tor.complete_count, snap.seeders, snap.leechers - FROM " . BB_BT_TORRENTS . " tor - LEFT JOIN " . BB_BT_TRACKER_SNAP . " snap ON (snap.topic_id = tor.topic_id) - $info_hash_where - LIMIT 1 -"); + $torrents = []; + $info_hashes = []; -if (!$row) { - msg_die('Torrent not registered, info_hash = ' . $info_hash_hex); -} + foreach ($info_hash_array[1] as $hash) { + if ($scrape_cache = CACHE('tr_cache')->get(SCRAPE_LIST_PREFIX . bin2hex(urldecode($hash)))) { + $torrents['files'][$info_key = array_key_first($scrape_cache)] = $scrape_cache[$info_key]; + } + else{ + $info_hashes[] = '\''. DB()->escape((urldecode($hash))) . '\''; + } + } -$output['files'][$info_hash] = [ - 'complete' => (int)$row['seeders'], - 'downloaded' => (int)$row['complete_count'], - 'incomplete' => (int)$row['leechers'], -]; + $info_hash_count = count($info_hashes); -$peers_list_cached = CACHE('tr_cache')->set(SCRAPE_LIST_PREFIX . $info_hash_hex, $output, SCRAPE_LIST_EXPIRE); + if (!empty($info_hash_count)) { -echo \Arokettu\Bencode\Bencode::encode($output); + if ($info_hash_count > $bb_cfg['max_scrapes']) { + $info_hashes = array_slice($info_hashes, 0, $bb_cfg['max_scrapes']); + } -exit; + $info_hashes_sql = 'tor.info_hash' . ' IN ( ' . implode(', ', $info_hashes). ' )'; + $sql = " + SELECT tor.info_hash, tor.complete_count, snap.seeders, snap.leechers + FROM " . BB_BT_TORRENTS . " tor + LEFT JOIN " . BB_BT_TRACKER_SNAP . " snap ON (snap.topic_id = tor.topic_id) + WHERE $info_hashes_sql + LIMIT $info_hash_count + "; + + $rowset = DB()->fetch_rowset($sql); + + if (count($rowset) > 0) { + foreach ($rowset as $scrapes) { + $torrents['files'][$scrapes['info_hash']] = [ + 'complete' => (int)$scrapes['seeders'], + 'downloaded' => (int)$scrapes['complete_count'], + 'incomplete' => (int)$scrapes['leechers'] + ]; + CACHE('tr_cache')->set(SCRAPE_LIST_PREFIX . bin2hex($scrapes['info_hash']), array_slice($torrents['files'], -1, null, true), SCRAPE_LIST_EXPIRE); + } + } + } + + if (empty($torrents)) { + msg_die('Torrent not registered, info_hash = ' . $info_hash_hex); + } + + die(\Arokettu\Bencode\Bencode::encode($torrents)); diff --git a/library/config.php b/library/config.php index 39e806fa8..2c9a2758a 100644 --- a/library/config.php +++ b/library/config.php @@ -95,7 +95,8 @@ $bb_cfg['gzip_compress'] = false; // compress output // Tracker $bb_cfg['announce_interval'] = 1800; // Announce interval (default: 1800) -$bb_cfg['scrape_interval'] = 60; // Scrape interval (default: 60) +$bb_cfg['scrape_interval'] = 100; // Scrape interval (default: 100) +$bb_cfg['max_scrapes'] = 20; // Allowed number of info-hashes for simultaneous scraping (default: 20) $bb_cfg['passkey_key'] = 'uk'; // Passkey key name in GET request $bb_cfg['ignore_reported_ip'] = false; // Ignore IP reported by client $bb_cfg['verify_reported_ip'] = true; // Verify IP reported by client against $_SERVER['HTTP_X_FORWARDED_FOR']