diff options
author | Kevin Israel <pleasestand@live.com> | 2013-05-26 06:12:12 -0400 |
---|---|---|
committer | Kevin Israel <pleasestand@live.com> | 2014-01-06 18:22:24 -0500 |
commit | 74557dedd02f17ae5115eaf51089e79f7277a353 (patch) | |
tree | a106fbfb30e001dbbcb6a77f45ee225a9a4482a2 /serialized | |
parent | d0610c03702edf3d5bd937f3567bf9c51b1abbe7 (diff) | |
download | mediawikicore-74557dedd02f17ae5115eaf51089e79f7277a353.tar.gz mediawikicore-74557dedd02f17ae5115eaf51089e79f7277a353.zip |
Generate Utf8Case.ser directly from UnicodeData.txt
This allows getting rid of serialized/serialize.php. I also moved
includes/normal/Utf8CaseGenerate.php to maintenance/language/
generateUtf8Case.php and updated it to subclass Maintenance, as
it seems to be largely unrelated to normalization.
Using version 6.0.0 of UnicodeData.txt, the updated script generates
exactly the same serialized output as was previously checked in.
Also updated the Makefile to reflect the current set of .ser files
and added some .gitignore entries.
Change-Id: I05afece3dc4505a9f43993ac4d7726b37d9c6956
Diffstat (limited to 'serialized')
-rw-r--r-- | serialized/.gitignore | 4 | ||||
-rw-r--r-- | serialized/Makefile | 28 | ||||
-rw-r--r-- | serialized/serialize.php | 95 |
3 files changed, 28 insertions, 99 deletions
diff --git a/serialized/.gitignore b/serialized/.gitignore new file mode 100644 index 000000000000..d9d58dd989e0 --- /dev/null +++ b/serialized/.gitignore @@ -0,0 +1,4 @@ +/UnicodeData.txt +/allkeys.txt +/ucd.all.grouped.xml +/ucd.all.grouped.zip diff --git a/serialized/Makefile b/serialized/Makefile index 062155b6844a..c7e75063c652 100644 --- a/serialized/Makefile +++ b/serialized/Makefile @@ -1,7 +1,7 @@ - -SPECIAL_TARGETS=Utf8Case.ser +SPECIAL_TARGETS=Utf8Case.ser normalize-ar.ser normalize-ml.ser first-letters-root.ser ALL_TARGETS=$(SPECIAL_TARGETS) DIST_TARGETS=$(SPECIAL_TARGETS) +UNICODE_VERSION=6.0.0 .PHONY: all dist clean @@ -13,6 +13,26 @@ dist: $(DIST_TARGETS) clean: rm -f $(ALL_TARGETS) -Utf8Case.ser : ../includes/normal/Utf8Case.php - php serialize.php -o $@ $< +Utf8Case.ser: UnicodeData.txt + php ../maintenance/language/generateUtf8Case.php + +normalize-ar.ser: UnicodeData.txt + php ../maintenance/language/generateNormalizerDataAr.php + +normalize-ml.ser: + php ../maintenance/language/generateNormalizerDataMl.php + +first-letters-root.ser: allkeys.txt ucd.all.grouped.xml + php ../maintenance/language/generateCollationData.php + +UnicodeData.txt: + wget http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt + +allkeys.txt: + wget http://www.unicode.org/Public/UCA/$(UNICODE_VERSION)/allkeys.txt + +ucd.all.grouped.xml: ucd.all.grouped.zip + unzip ucd.all.grouped.zip ucd.all.grouped.xml +ucd.all.grouped.zip: + wget http://www.unicode.org/Public/$(UNICODE_VERSION)/ucdxml/ucd.all.grouped.zip diff --git a/serialized/serialize.php b/serialized/serialize.php deleted file mode 100644 index 766c1a5116a0..000000000000 --- a/serialized/serialize.php +++ /dev/null @@ -1,95 +0,0 @@ -<?php -/** - * Serialize variables found in input file and store the result in the - * specified file. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - */ - -if ( !defined( 'MEDIAWIKI' ) ) { - $wgNoDBParam = true; - $optionsWithArgs = array( 'o' ); - require_once __DIR__ .'/../maintenance/commandLine.inc'; - - $stderr = fopen( 'php://stderr', 'w' ); - if ( !isset( $args[0] ) ) { - fwrite( $stderr, "No input file specified\n" ); - exit( 1 ); - } - if ( wfIsWindows() ) { - $files = array(); - foreach ( $args as $arg ) { - $files = array_merge( $files, glob( $arg ) ); - } - if ( !$files ) { - fwrite( $stderr, "No files found\n" ); - } - } else { - $files = $args; - } - - if ( isset( $options['o'] ) ) { - $out = fopen( $options['o'], 'wb' ); - if ( !$out ) { - fwrite( $stderr, "Unable to open file \"{$options['o']}\" for output\n" ); - exit( 1 ); - } - } else { - $out = fopen( 'php://stdout', 'wb' ); - } - - $vars = array(); - foreach ( $files as $inputFile ) { - $vars = array_merge( $vars, getVars( $inputFile ) ); - } - fwrite( $out, serialize( $vars ) ); - fclose( $out ); - exit( 0 ); -} - -//---------------------------------------------------------------------------- - -function getVars( $_gv_filename ) { - require $_gv_filename; - $vars = get_defined_vars(); - unset( $vars['_gv_filename'] ); - - # Clean up line endings - if ( wfIsWindows() ) { - $vars = unixLineEndings( $vars ); - } - return $vars; -} - -function unixLineEndings( $var ) { - static $recursionLevel = 0; - if ( $recursionLevel > 50 ) { - global $stderr; - fwrite( $stderr, "Error: Recursion limit exceeded. Possible circular reference in array variable.\n" ); - exit( 2 ); - } - - if ( is_array( $var ) ) { - ++$recursionLevel; - $var = array_map( 'unixLineEndings', $var ); - --$recursionLevel; - } elseif ( is_string( $var ) ) { - $var = str_replace( "\r\n", "\n", $var ); - } - return $var; -} |