фиксы

This commit is contained in:
PheRum 2015-05-26 13:05:29 +03:00
commit 931858e211
2 changed files with 12 additions and 12 deletions

View file

@ -2851,12 +2851,12 @@ class Text_LangCorrect
"\xc2\xad", #"мягкие" переносы строк (­) "\xc2\xad", #"мягкие" переносы строк (­)
]; ];
#http://ru.wikipedia.org/wiki/Диакритические_знаки #http://ru.wikipedia.org/wiki/Диакритические_знаки
$s = Text_UTF8::diactrical_remove($s, $additional_chars, $is_can_restored = true, $restore_table); $s = Text_Utf8::diactrical_remove($s, $additional_chars, $is_can_restored = true, $restore_table);
$this->words = []; $this->words = [];
$s = $this->_parse1($s); $s = $this->_parse1($s);
$s = $this->_parse2($s); $s = $this->_parse2($s);
$s = Text_UTF8::diactrical_restore($s, $restore_table); $s = Text_Utf8::diactrical_restore($s, $restore_table);
$words = $this->words; $words = $this->words;
return $s; return $s;
} }
@ -3061,7 +3061,7 @@ class Text_LangCorrect
#если в $s спецсимволов больше чем букв, возвращаем $word #если в $s спецсимволов больше чем букв, возвращаем $word
$sc_count = 0; $sc_count = 0;
$s = preg_replace('/' . $this->sc . '/sSX', '', $s, -1, $sc_count); $s = preg_replace('/' . $this->sc . '/sSX', '', $s, -1, $sc_count);
if ($sc_count > 0 && $sc_count > Text_UTF8::strlen($s)) return $word; if ($sc_count > 0 && $sc_count > Text_Utf8::strlen($s)) return $word;
return reset($suggestions); return reset($suggestions);
} }
@ -3069,7 +3069,7 @@ class Text_LangCorrect
#анализ на основе N-грамм русского и английского языка #анализ на основе N-грамм русского и английского языка
private function _bigram_exists($word, $lang) private function _bigram_exists($word, $lang)
{ {
$word = ($lang === 'en') ? strtolower($word) : Text_UTF8::lowercase($word); $word = ($lang === 'en') ? strtolower($word) : Text_Utf8::lowercase($word);
#шаг 0. #шаг 0.
#проверяем слова в списке слов-исключений #проверяем слова в списке слов-исключений
@ -3088,10 +3088,10 @@ class Text_LangCorrect
&& ! array_key_exists($m[0], $this->vowels3_lc[$lang])) return true; && ! array_key_exists($m[0], $this->vowels3_lc[$lang])) return true;
#шаг 3. #шаг 3.
$length = Text_UTF8::strlen($word); $length = Text_Utf8::strlen($word);
for ($pos = 0, $limit = $length - 1; $pos < $limit; $pos++) for ($pos = 0, $limit = $length - 1; $pos < $limit; $pos++)
{ {
$ss = Text_UTF8::substr($word, $pos, 2); $ss = Text_Utf8::substr($word, $pos, 2);
if ($pos === 0) $ss = ' ' . $ss; #beginning of word if ($pos === 0) $ss = ' ' . $ss; #beginning of word
elseif ($pos === $limit - 1) $ss = $ss . ' '; #ending of word elseif ($pos === $limit - 1) $ss = $ss . ' '; #ending of word
if (array_key_exists($ss, $this->bigrams)) return true; if (array_key_exists($ss, $this->bigrams)) return true;

View file

@ -61,7 +61,7 @@
* @version 2.2.2 * @version 2.2.2
*/ */
class Text_UTF8 class Text_Utf8
{ {
#REPLACEMENT CHARACTER (for broken char) #REPLACEMENT CHARACTER (for broken char)
const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD const REPLACEMENT_CHAR = "\xEF\xBF\xBD"; #U+FFFD
@ -3199,11 +3199,11 @@ class Text_UTF8
if (self::is_ascii($s)) return '(?i:' . preg_quote($s, $delimiter) . ')'; #speed improve if (self::is_ascii($s)) return '(?i:' . preg_quote($s, $delimiter) . ')'; #speed improve
$s_re = ''; $s_re = '';
$s_lc = Text_UTF8::lowercase($s); if ($s_lc === false) return false; $s_lc = Text_Utf8::lowercase($s); if ($s_lc === false) return false;
$s_uc = Text_UTF8::uppercase($s); if ($s_uc === false) return false; $s_uc = Text_Utf8::uppercase($s); if ($s_uc === false) return false;
$chars_lc = Text_UTF8::str_split($s_lc); if ($chars_lc === false) return false; $chars_lc = Text_Utf8::str_split($s_lc); if ($chars_lc === false) return false;
$chars_uc = Text_UTF8::str_split($s_uc); if ($chars_uc === false) return false; $chars_uc = Text_Utf8::str_split($s_uc); if ($chars_uc === false) return false;
foreach ($chars_lc as $i => $char) foreach ($chars_lc as $i => $char)
{ {
@ -3971,7 +3971,7 @@ class Text_UTF8
$high_cp = self::ord($high); $high_cp = self::ord($high);
if ($low_cp === false || $high_cp === false) return false; if ($low_cp === false || $high_cp === false) return false;
$a = range($low_cp, $high_cp, $step); $a = range($low_cp, $high_cp, $step);
return array_map(['Text_UTF8', 'chr'], $a); return array_map(['Text_Utf8', 'chr'], $a);
} }
/** /**