diff --git a/CHANGELOG.md b/CHANGELOG.md index b93e97ed9..00a517728 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Removed `wbr()` [\#1387](https://github.com/torrentpier/torrentpier/pull/1387) ([belomaxorka](https://github.com/belomaxorka)) - Removed converting for legacy md5 passwords [\#1386](https://github.com/torrentpier/torrentpier/pull/1386) ([belomaxorka](https://github.com/belomaxorka)) - Fixed issue with poll_users cleaning at every cron job startup [\#1390](https://github.com/torrentpier/torrentpier/pull/1390) ([belomaxorka](https://github.com/belomaxorka)) +- Improved word censor 🤐 [\#1393](https://github.com/torrentpier/torrentpier/pull/1393) ([belomaxorka](https://github.com/belomaxorka)) - Used hashing for filenames generation [\#1385](https://github.com/torrentpier/torrentpier/pull/1385) ([belomaxorka](https://github.com/belomaxorka)) - Minor improvements [\#1382](https://github.com/torrentpier/torrentpier/pull/1382), [\#1383](https://github.com/torrentpier/torrentpier/pull/1383), [\#1391](https://github.com/torrentpier/torrentpier/pull/1391) ([belomaxorka](https://github.com/belomaxorka)) - Some bugfixes [\#1380](https://github.com/torrentpier/torrentpier/pull/1380) ([belomaxorka](https://github.com/belomaxorka)) diff --git a/index.php b/index.php index 92e3469a0..d5f6468b8 100644 --- a/index.php +++ b/index.php @@ -327,7 +327,7 @@ if ($bb_cfg['show_latest_news']) { foreach ($latest_news as $news) { $template->assign_block_vars('news', [ 'NEWS_TOPIC_ID' => $news['topic_id'], - 'NEWS_TITLE' => str_short($news['topic_title'], $bb_cfg['max_news_title']), + 'NEWS_TITLE' => str_short($wordCensor->censorString($news['topic_title']), $bb_cfg['max_news_title']), 'NEWS_TIME' => bb_date($news['topic_time'], 'd-M', false), 'NEWS_IS_NEW' => is_unread($news['topic_time'], $news['topic_id'], $news['forum_id']), ]); @@ -346,7 +346,7 @@ if ($bb_cfg['show_network_news']) { foreach ($network_news as $net) { $template->assign_block_vars('net', [ 'NEWS_TOPIC_ID' => $net['topic_id'], - 'NEWS_TITLE' => str_short($net['topic_title'], $bb_cfg['max_net_title']), + 'NEWS_TITLE' => str_short($wordCensor->censorString($net['topic_title']), $bb_cfg['max_net_title']), 'NEWS_TIME' => bb_date($net['topic_time'], 'd-M', false), 'NEWS_IS_NEW' => is_unread($net['topic_time'], $net['topic_id'], $net['forum_id']), ]); diff --git a/library/ajax/posts.php b/library/ajax/posts.php index 660918d85..0d4526a1b 100644 --- a/library/ajax/posts.php +++ b/library/ajax/posts.php @@ -47,13 +47,6 @@ if (isset($this->request['post_id'])) { $is_auth = auth(AUTH_ALL, $post['forum_id'], $userdata, $post); } -if (!defined('WORD_LIST_OBTAINED')) { - $orig_word = []; - $replace_word = []; - obtain_word_list($orig_word, $replace_word); - define('WORD_LIST_OBTAINED', true); -} - switch ($this->request['type']) { case 'delete': if ($post['post_id'] != $post['topic_first_post_id'] && $is_auth['auth_delete'] && ($is_auth['auth_mod'] || ($userdata['user_id'] == $post['poster_id'] && $post['topic_last_post_id'] == $post['post_id'] && $post['post_time'] + 3600 * 3 > TIMENOW))) { @@ -83,13 +76,10 @@ switch ($this->request['type']) { $message = "[quote=\"" . $quote_username . "\"][qpost=" . $post['post_id'] . "]" . $post['post_text'] . "[/quote]\r"; // hide user passkey - $message = preg_replace('#(?<=\?uk=)[a-zA-Z0-9]{10}(?=&)#', 'passkey', $message); + $message = preg_replace('#(?<=\?uk=)[a-zA-Z0-9](?=&)#', 'passkey', $message); // hide sid - $message = preg_replace('#(?<=[\?&;]sid=)[a-zA-Z0-9]{12}#', 'sid', $message); - - if (!empty($orig_word)) { - $message = (!empty($message)) ? preg_replace($orig_word, $replace_word, $message) : ''; - } + $message = preg_replace('#(?<=[\?&;]sid=)[a-zA-Z0-9]#', 'sid', $message); + $message = $wordCensor->censorString($message); if ($post['post_id'] == $post['topic_first_post_id']) { $message = "[quote]" . $post['topic_title'] . "[/quote]\r"; diff --git a/library/includes/bbcode.php b/library/includes/bbcode.php index a758dc707..06a8fd809 100644 --- a/library/includes/bbcode.php +++ b/library/includes/bbcode.php @@ -388,17 +388,12 @@ function add_search_words($post_id, $post_message, $topic_title = '', $only_retu function bbcode2html($text) { - global $bbcode; + global $bbcode, $wordCensor; if (!isset($bbcode)) { $bbcode = new TorrentPier\Legacy\BBCode(); } - $orig_word = []; - $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - if (count($orig_word)) { - $text = preg_replace($orig_word, $replacement_word, $text); - } + $text = $wordCensor->censorString($text); return $bbcode->bbcode2html($text); } diff --git a/library/includes/functions.php b/library/includes/functions.php index 01ff19822..927bbd0f0 100644 --- a/library/includes/functions.php +++ b/library/includes/functions.php @@ -1606,36 +1606,6 @@ function bb_preg_quote($str, $delimiter) return $text; } -// -// Obtain list of naughty words and build preg style replacement arrays for use by the -// calling script, note that the vars are passed as references this just makes it easier -// to return both sets of arrays -// -function obtain_word_list(&$orig_word, &$replacement_word) -{ - global $bb_cfg; - - if (!$bb_cfg['use_word_censor']) { - return false; - } - - if (!$sql = CACHE('bb_cache')->get('censored')) { - $sql = DB()->fetch_rowset("SELECT word, replacement FROM " . BB_WORDS); - if (!$sql) { - $sql = [['word' => 1, 'replacement' => 1]]; - } - CACHE('bb_cache')->set('censored', $sql, 7200); - } - - foreach ($sql as $row) { - //$orig_word[] = '#(?data; +/** + * Word censor + */ +$wordCensor = new \TorrentPier\Censor(); + /** * Cron */ diff --git a/library/includes/ucp/topic_watch.php b/library/includes/ucp/topic_watch.php index 1f5fadf8c..bfaba229a 100644 --- a/library/includes/ucp/topic_watch.php +++ b/library/includes/ucp/topic_watch.php @@ -81,7 +81,7 @@ if ($watch_count > 0) { 'ROW_CLASS' => (!($i % 2)) ? 'row1' : 'row2', 'POST_ID' => $watch[$i]['topic_first_post_id'], 'TOPIC_ID' => $watch[$i]['topic_id'], - 'TOPIC_TITLE' => str_short($watch[$i]['topic_title'], 70), + 'TOPIC_TITLE' => str_short($wordCensor->censorString($watch[$i]['topic_title']), 70), 'FULL_TOPIC_TITLE' => $watch[$i]['topic_title'], 'U_TOPIC' => TOPIC_URL . $watch[$i]['topic_id'], 'FORUM_TITLE' => $watch[$i]['forum_name'], diff --git a/posting.php b/posting.php index 240e5a73c..410095f34 100644 --- a/posting.php +++ b/posting.php @@ -18,20 +18,15 @@ $page_cfg['load_tpl_vars'] = [ ]; $submit = (bool)@$_REQUEST['post']; -$preview = (bool)@$_REQUEST['preview']; +$refresh = $preview = (bool)@$_REQUEST['preview']; $delete = (bool)@$_REQUEST['delete']; +$mode = (string)@$_REQUEST['mode']; +$confirm = isset($_POST['confirm']); $forum_id = (int)@$_REQUEST[POST_FORUM_URL]; $topic_id = (int)@$_REQUEST[POST_TOPIC_URL]; $post_id = (int)@$_REQUEST[POST_POST_URL]; -$mode = (string)@$_REQUEST['mode']; - -$confirm = isset($_POST['confirm']); - -$refresh = $preview; -$orig_word = $replacement_word = []; - // Set topic type $topic_type = (@$_POST['topictype']) ? (int)$_POST['topictype'] : POST_NORMAL; $topic_type = in_array($topic_type, [POST_NORMAL, POST_STICKY, POST_ANNOUNCE]) ? $topic_type : POST_NORMAL; @@ -461,13 +456,6 @@ if ($refresh || $error_msg || ($submit && $topic_has_new_posts)) { $message = $post_info['post_text']; if ($mode == 'quote') { - if (!defined('WORD_LIST_OBTAINED')) { - $orig_word = []; - $replace_word = []; - obtain_word_list($orig_word, $replace_word); - define('WORD_LIST_OBTAINED', true); - } - if ($post_info['post_attachment'] && !IS_AM) { $message = $post_info['topic_title']; } @@ -475,16 +463,14 @@ if ($refresh || $error_msg || ($submit && $topic_has_new_posts)) { $message = '[quote="' . $quote_username . '"][qpost=' . $post_info['post_id'] . ']' . $message . '[/quote]'; // hide user passkey - $message = preg_replace('#(?<=\?uk=)[a-zA-Z0-9]{10}(?=&)#', 'passkey', $message); + $message = preg_replace('#(?<=\?uk=)[a-zA-Z0-9](?=&)#', 'passkey', $message); // hide sid - $message = preg_replace('#(?<=[\?&;]sid=)[a-zA-Z0-9]{12}#', 'sid', $message); + $message = preg_replace('#(?<=[\?&;]sid=)[a-zA-Z0-9]#', 'sid', $message); - if (!empty($orig_word)) { - $subject = (!empty($subject)) ? preg_replace($orig_word, $replace_word, $subject) : ''; - $message = (!empty($message)) ? preg_replace($orig_word, $replace_word, $message) : ''; - } + $subject = $wordCensor->censorString($subject); + $message = $wordCensor->censorString($message); - if (!preg_match('/^Re:/', $subject) && strlen($subject) > 0) { + if (!preg_match('/^Re:/', $subject) && !empty($subject)) { $subject = 'Re: ' . $subject; } diff --git a/privmsg.php b/privmsg.php index 8837d7d78..4061abff1 100644 --- a/privmsg.php +++ b/privmsg.php @@ -375,18 +375,9 @@ if ($mode == 'read') { // Processing of post // $post_subject = htmlCHR($privmsg['privmsgs_subject']); - $private_message = $privmsg['privmsgs_text']; - - $orig_word = []; - $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - - if (count($orig_word)) { - $post_subject = preg_replace($orig_word, $replacement_word, $post_subject); - $private_message = preg_replace($orig_word, $replacement_word, $private_message); - } - + $post_subject = $wordCensor->censorString($post_subject); + $private_message = $wordCensor->censorString($private_message); $private_message = bbcode2html($private_message); // @@ -1052,18 +1043,9 @@ if ($mode == 'read') { } if ($preview && !$error) { - $orig_word = []; - $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - $preview_message = bbcode2html($privmsg_message); - - if (count($orig_word)) { - $preview_subject = preg_replace($orig_word, $replacement_word, $privmsg_subject); - $preview_message = preg_replace($orig_word, $replacement_word, $preview_message); - } else { - $preview_subject = $privmsg_subject; - } + $preview_subject = $wordCensor->censorString($privmsg_subject); + $preview_message = $wordCensor->censorString($preview_message); $s_hidden_fields = ''; $s_hidden_fields .= ''; @@ -1188,9 +1170,6 @@ if ($mode == 'read') { // $template->set_filenames(['body' => 'privmsgs.tpl']); - $orig_word = $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - // // New message // @@ -1402,12 +1381,7 @@ if ($mode == 'read') { $msg_userid = $row['user_id']; $msg_user = profile_url($row); - - $msg_subject = $row['privmsgs_subject']; - - if (count($orig_word)) { - $msg_subject = preg_replace($orig_word, $replacement_word, $msg_subject); - } + $msg_subject = $wordCensor->censorString($row['privmsgs_subject']); $u_subject = PM_URL . "?folder=$folder&mode=read&" . POST_POST_URL . "=$privmsg_id"; diff --git a/search.php b/search.php index e12f6bdf1..aae65fdf5 100644 --- a/search.php +++ b/search.php @@ -59,12 +59,6 @@ if (isset($_POST['del_my_post'])) { redirect("search.php?u={$user->id}"); } -// -// Define censored word matches -// -$orig_word = $replacement_word = []; -obtain_word_list($orig_word, $replacement_word); - $tracking_topics = get_tracks('topic'); $tracking_forums = get_tracks('forum'); @@ -572,35 +566,27 @@ if ($post_mode) { $topic_id = (int)$topic_id; $forum_id = (int)$first_post['forum_id']; $is_unread_t = is_unread($first_post['topic_last_post_time'], $topic_id, $forum_id); - $topic_title = $first_post['topic_title']; - - if (count($orig_word)) { - $topic_title = preg_replace($orig_word, $replacement_word, $topic_title); - } $template->assign_block_vars('t', array( 'FORUM_ID' => $forum_id, 'FORUM_NAME' => $forum_name_html[$forum_id], 'TOPIC_ID' => $topic_id, - 'TOPIC_TITLE' => $topic_title, + 'TOPIC_TITLE' => $wordCensor->censorString($first_post['topic_title']), 'TOPIC_ICON' => get_topic_icon($first_post, $is_unread_t), )); $quote_btn = $edit_btn = $ip_btn = ''; - $delpost_btn = (IS_AM); + $delpost_btn = IS_AM; // Topic posts block foreach ($topic_posts as $row_num => $post) { if ($post['poster_id'] != BOT_UID) { $quote_btn = true; - $edit_btn = $ip_btn = (IS_AM); + $edit_btn = $ip_btn = IS_AM; } $message = get_parsed_post($post); - - if (count($orig_word)) { - $message = preg_replace($orig_word, $replacement_word, $message); - } + $message = $wordCensor->censorString($message); $template->assign_block_vars('t.p', array( 'ROW_NUM' => $row_num, @@ -801,7 +787,7 @@ else { 'FORUM_NAME' => $forum_name_html[$forum_id], 'TOPIC_ID' => $topic_id, 'HREF_TOPIC_ID' => $moved ? $topic['topic_moved_id'] : $topic['topic_id'], - 'TOPIC_TITLE' => $topic['topic_title'], + 'TOPIC_TITLE' => $wordCensor->censorString($topic['topic_title']), 'IS_UNREAD' => $is_unread, 'TOPIC_ICON' => get_topic_icon($topic, $is_unread), 'PAGINATION' => $moved ? '' : build_topic_pagination(TOPIC_URL . $topic_id, $topic['topic_replies'], $bb_cfg['posts_per_page']), diff --git a/src/Censor.php b/src/Censor.php new file mode 100644 index 000000000..39137c8a2 --- /dev/null +++ b/src/Censor.php @@ -0,0 +1,64 @@ +get('censored')) { + $words = DB()->fetch_rowset("SELECT word, replacement FROM " . BB_WORDS); + CACHE('bb_cache')->set('censored', $words, 7200); + } + + foreach ($words as $word) { + $this->words[] = '#(?replacements[] = $word['replacement']; + } + } + + /** + * Word censor + * + * @param string $word + * @return string + */ + public function censorString(string $word): string + { + return preg_replace($this->words, $this->replacements, $word); + } +} diff --git a/src/Legacy/Atom.php b/src/Legacy/Atom.php index bd2564fe5..536509b80 100644 --- a/src/Legacy/Atom.php +++ b/src/Legacy/Atom.php @@ -169,7 +169,7 @@ class Atom */ private static function create_atom($file_path, $mode, $id, $title, $topics) { - global $bb_cfg, $lang; + global $bb_cfg, $lang, $wordCensor; $date = null; $time = null; $dir = \dirname($file_path); @@ -203,13 +203,7 @@ class Atom if (isset($topic['tor_status'])) { $tor_status = " ({$lang['TOR_STATUS_NAME'][$topic['tor_status']]})"; } - $topic_title = $topic['topic_title']; - $orig_word = []; - $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - if (\count($orig_word)) { - $topic_title = preg_replace($orig_word, $replacement_word, $topic_title); - } + $topic_title = $wordCensor->censorString($topic['topic_title']); $author_name = $topic['first_username'] ?: $lang['GUEST']; $last_time = $topic['topic_last_post_time']; if ($topic['topic_last_post_edit_time']) { diff --git a/src/Legacy/Post.php b/src/Legacy/Post.php index c9655ed73..f76900524 100644 --- a/src/Legacy/Post.php +++ b/src/Legacy/Post.php @@ -335,7 +335,7 @@ class Post */ public static function user_notification($mode, &$post_data, &$topic_title, &$forum_id, &$topic_id, &$notify_user) { - global $bb_cfg, $lang, $userdata; + global $bb_cfg, $lang, $userdata, $wordCensor; if (!$bb_cfg['topic_notify_enabled']) { return; @@ -358,12 +358,7 @@ class Post "); if ($watch_list) { - $orig_word = $replacement_word = []; - obtain_word_list($orig_word, $replacement_word); - - if (\count($orig_word)) { - $topic_title = preg_replace($orig_word, $replacement_word, $topic_title); - } + $topic_title = $wordCensor->censorString($topic_title); $u_topic = make_url(TOPIC_URL . $topic_id . '&view=newest#newest'); $unwatch_topic = make_url(TOPIC_URL . "$topic_id&unwatch=topic"); diff --git a/viewforum.php b/viewforum.php index 715ba9074..497634e96 100644 --- a/viewforum.php +++ b/viewforum.php @@ -354,10 +354,6 @@ if ($topics_csv = implode(',', $topic_ids)) { "); } -// Define censored word matches -$orig_word = $replacement_word = []; -obtain_word_list($orig_word, $replacement_word); - if ($forum_data['allow_reg_tracker']) { $post_new_topic_url = POSTING_URL . "?mode=new_rel&f=$forum_id"; $post_img = $images['release_new']; @@ -444,7 +440,7 @@ foreach ($topic_rowset as $topic) { 'FORUM_ID' => $forum_id, 'TOPIC_ID' => $topic_id, 'HREF_TOPIC_ID' => $moved ? $topic['topic_moved_id'] : $topic['topic_id'], - 'TOPIC_TITLE' => $topic['topic_title'], + 'TOPIC_TITLE' => $wordCensor->censorString($topic['topic_title']), 'TOPICS_SEPARATOR' => $separator, 'IS_UNREAD' => $is_unread, 'TOPIC_ICON' => get_topic_icon($topic, $is_unread), diff --git a/viewtopic.php b/viewtopic.php index 1f74ce78c..b02d87bd1 100644 --- a/viewtopic.php +++ b/viewtopic.php @@ -365,14 +365,8 @@ if (!$ranks = $datastore->get('ranks')) { $ranks = $datastore->get('ranks'); } -// Define censored word matches -$orig_word = $replacement_word = []; -obtain_word_list($orig_word, $replacement_word); - // Censor topic title -if (count($orig_word)) { - $topic_title = preg_replace($orig_word, $replacement_word, $topic_title); -} +$topic_title = $wordCensor->censorString($topic_title); // Post, reply and other URL generation for templating vars $new_topic_url = POSTING_URL . "?mode=newtopic&f=" . $forum_id; @@ -627,28 +621,26 @@ for ($i = 0; $i < $total_posts; $i++) { } // Replace naughty words - if (count($orig_word)) { - if ($user_sig) { - $user_sig = str_replace( - '\"', '"', - substr( - preg_replace_callback('#(\>(((?>([^><]+|(?R)))*)\<))#s', function ($matches) use ($orig_word, $replacement_word) { - return preg_replace($orig_word, $replacement_word, reset($matches)); - }, '>' . $user_sig . '<'), 1, -1 - ) - ); - } - - $message = str_replace( + if ($user_sig) { + $user_sig = str_replace( '\"', '"', substr( - preg_replace_callback('#(\>(((?>([^><]+|(?R)))*)\<))#s', function ($matches) use ($orig_word, $replacement_word) { - return preg_replace($orig_word, $replacement_word, reset($matches)); - }, '>' . $message . '<'), 1, -1 + preg_replace_callback('#(\>(((?>([^><]+|(?R)))*)\<))#s', function ($matches) use ($wordCensor) { + return $wordCensor->censorString(reset($matches)); + }, '>' . $user_sig . '<'), 1, -1 ) ); } + $message = str_replace( + '\"', '"', + substr( + preg_replace_callback('#(\>(((?>([^><]+|(?R)))*)\<))#s', function ($matches) use ($wordCensor) { + return $wordCensor->censorString(reset($matches)); + }, '>' . $message . '<'), 1, -1 + ) + ); + // Replace newlines (we use this rather than nl2br because till recently it wasn't XHTML compliant) if ($user_sig) { $user_sig = $bb_cfg['user_signature_start'] . $user_sig . $bb_cfg['user_signature_end'];