diff options
author | Tim Starling <tstarling@wikimedia.org> | 2021-11-29 12:31:51 +1100 |
---|---|---|
committer | Tim Starling <tstarling@wikimedia.org> | 2021-12-01 15:40:12 +1100 |
commit | 0c645f86b90896ddc26711514befec6512bad16c (patch) | |
tree | 09d73e9e163d4fb5e770e38b9c00b4b3346987cf /includes/collation/IcuCollation.php | |
parent | e252029b27132d532281a05aa82069af14f2cdd4 (diff) | |
download | mediawikicore-0c645f86b90896ddc26711514befec6512bad16c.tar.gz mediawikicore-0c645f86b90896ddc26711514befec6512bad16c.zip |
IcuCollation cleanup
* Fix calls to deprecated function utf8ToCodepoint()
* Use a closure in ArrayUtils::findLowerBound() and optimise the code.
* Fix the fixme and remove unused method getPrecompiledData().
* Use __DIR__ not $IP when fetching data.
* Make getFirstLetterData() and getPrimarySortKey() private. Remove
getLetterByIndex(), getSortKeyByLetterIndex(), getFirstLetterCount().
Confirmed no callers.
Partly based on Ppchelko's I9204bb3f633c249519f8d20d6a033ffeb8cce758.
Change-Id: I66ba384c863928ca0c2742a1523d02bd78f241f9
Diffstat (limited to 'includes/collation/IcuCollation.php')
-rw-r--r-- | includes/collation/IcuCollation.php | 91 |
1 files changed, 22 insertions, 69 deletions
diff --git a/includes/collation/IcuCollation.php b/includes/collation/IcuCollation.php index 991aa9523b5d..f53511f6fe2a 100644 --- a/includes/collation/IcuCollation.php +++ b/includes/collation/IcuCollation.php @@ -77,8 +77,8 @@ class IcuCollation extends Collation { * letters (denoted by keys starting with '-'). * * These are additions to (or subtractions from) the data stored in the - * first-letters-root.php data file (which among others includes full basic Latin, - * Cyrillic and Greek alphabets). + * first-letters-root.php data file (which among others includes full basic + * Latin, Cyrillic and Greek alphabets). * * "Separate letter" is a letter that would have a separate heading/section * for it in a dictionary or a phone book in this language. This data isn't @@ -277,10 +277,6 @@ class IcuCollation extends Collation { return $this->mainCollator->getSortKey( $string ); } - public function getPrimarySortKey( $string ) { - return $this->primaryCollator->getSortKey( $string ); - } - public function getFirstLetter( $string ) { $string = strval( $string ); if ( $string === '' ) { @@ -290,16 +286,21 @@ class IcuCollation extends Collation { $firstChar = mb_substr( $string, 0, 1, 'UTF-8' ); // If the first character is a CJK character, just return that character. - if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) { + if ( ord( $firstChar ) > 0x7f && self::isCjk( mb_ord( $firstChar ) ) ) { return $firstChar; } $sortKey = $this->getPrimarySortKey( $string ); + $data = $this->getFirstLetterData(); + $keys = $data['keys']; + $letters = $data['chars']; // Do a binary search to find the correct letter to sort under $min = ArrayUtils::findLowerBound( - [ $this, 'getSortKeyByLetterIndex' ], - $this->getFirstLetterCount(), + static function ( $index ) use ( $keys ) { + return $keys[$index]; + }, + count( $keys ), 'strcmp', $sortKey ); @@ -308,7 +309,7 @@ class IcuCollation extends Collation { return ''; } - $sortLetter = $this->getLetterByIndex( $min ); + $sortLetter = $letters[$min]; if ( $this->useNumericCollation ) { // If the sort letter is a number, return '0–9' (or localized equivalent). @@ -322,11 +323,15 @@ class IcuCollation extends Collation { return $sortLetter; } + private function getPrimarySortKey( $string ) { + return $this->primaryCollator->getSortKey( $string ); + } + /** * @since 1.16.3 * @return array */ - public function getFirstLetterData() { + private function getFirstLetterData() { if ( $this->firstLetterData === null ) { $cache = ObjectCache::getLocalServerInstance( CACHE_ANYTHING ); $cacheKey = $cache->makeKey( @@ -349,10 +354,9 @@ class IcuCollation extends Collation { * @throws MWException */ private function fetchFirstLetterData() { - global $IP; // Generate data from serialized data file if ( isset( self::TAILORING_FIRST_LETTERS[$this->locale] ) ) { - $letters = require "$IP/includes/collation/data/first-letters-root.php"; + $letters = require __DIR__ . "/data/first-letters-root.php"; // Append additional characters $letters = array_merge( $letters, self::TAILORING_FIRST_LETTERS[$this->locale] ); // Remove unnecessary ones, if any @@ -366,14 +370,10 @@ class IcuCollation extends Collation { $letters[] = $this->digitTransformLanguage->formatNumNoSeparators( $digit ); } } elseif ( $this->locale === 'root' ) { - $letters = require "$IP/includes/collation/data/first-letters-root.php"; + $letters = require __DIR__ . "/data/first-letters-root.php"; } else { - // FIXME: Is this still used? - $letters = $this->getPrecompiledData( "first-letters-{$this->locale}.ser" ); - if ( $letters === false ) { - throw new MWException( "MediaWiki does not support ICU locale " . - "\"{$this->locale}\"" ); - } + throw new MWException( "MediaWiki does not support ICU locale " . + "\"{$this->locale}\"" ); } /* Sort the letters. @@ -395,8 +395,7 @@ class IcuCollation extends Collation { wfDebug( "Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)" ); // If that also has a collision, use codepoint as a tiebreaker. if ( $comp === 0 ) { - $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) <=> - UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] ); + $comp = mb_ord( $letter ) <=> mb_ord( $letterMap[$key] ); } if ( $comp < 0 ) { $letterMap[$key] = $letter; @@ -418,7 +417,7 @@ class IcuCollation extends Collation { * is an 'R' followed by an 's'. * * Additionally an expanded element should always sort directly - * after its first element due to they way sortkeys work. + * after its first element due to the way sortkeys work. * * UCA sortkey elements are of variable length but no collation * element should be a prefix of some other element, so I think @@ -486,52 +485,6 @@ class IcuCollation extends Collation { } /** - * Get an object from the precompiled serialized directory - * - * Replaced use of wfGetPrecompiledData - * - * @param string $name - * @return mixed The variable on success, false on failure - */ - private function getPrecompiledData( $name ) { - global $IP; - $file = "$IP/serialized/$name"; - if ( file_exists( $file ) ) { - $blob = file_get_contents( $file ); - if ( $blob ) { - return unserialize( $blob ); - } - } - return false; - } - - /** - * @param string $index - * @return string - * @since 1.16.3 - */ - public function getLetterByIndex( $index ) { - return $this->getFirstLetterData()['chars'][$index]; - } - - /** - * @param string $index - * @return string - * @since 1.16.3 - */ - public function getSortKeyByLetterIndex( $index ) { - return $this->getFirstLetterData()['keys'][$index]; - } - - /** - * @return int - * @since 1.16.3 - */ - public function getFirstLetterCount() { - return count( $this->getFirstLetterData()['chars'] ); - } - - /** * Test if a code point is a CJK (Chinese, Japanese, Korean) character * @param int $codepoint * @return bool |