aboutsummaryrefslogtreecommitdiffstats
path: root/serialized
diff options
context:
space:
mode:
authorKevin Israel <pleasestand@live.com>2013-05-26 06:12:12 -0400
committerKevin Israel <pleasestand@live.com>2014-01-06 18:22:24 -0500
commit74557dedd02f17ae5115eaf51089e79f7277a353 (patch)
treea106fbfb30e001dbbcb6a77f45ee225a9a4482a2 /serialized
parentd0610c03702edf3d5bd937f3567bf9c51b1abbe7 (diff)
downloadmediawikicore-74557dedd02f17ae5115eaf51089e79f7277a353.tar.gz
mediawikicore-74557dedd02f17ae5115eaf51089e79f7277a353.zip
Generate Utf8Case.ser directly from UnicodeData.txt
This allows getting rid of serialized/serialize.php. I also moved includes/normal/Utf8CaseGenerate.php to maintenance/language/ generateUtf8Case.php and updated it to subclass Maintenance, as it seems to be largely unrelated to normalization. Using version 6.0.0 of UnicodeData.txt, the updated script generates exactly the same serialized output as was previously checked in. Also updated the Makefile to reflect the current set of .ser files and added some .gitignore entries. Change-Id: I05afece3dc4505a9f43993ac4d7726b37d9c6956
Diffstat (limited to 'serialized')
-rw-r--r--serialized/.gitignore4
-rw-r--r--serialized/Makefile28
-rw-r--r--serialized/serialize.php95
3 files changed, 28 insertions, 99 deletions
diff --git a/serialized/.gitignore b/serialized/.gitignore
new file mode 100644
index 000000000000..d9d58dd989e0
--- /dev/null
+++ b/serialized/.gitignore
@@ -0,0 +1,4 @@
+/UnicodeData.txt
+/allkeys.txt
+/ucd.all.grouped.xml
+/ucd.all.grouped.zip
diff --git a/serialized/Makefile b/serialized/Makefile
index 062155b6844a..c7e75063c652 100644
--- a/serialized/Makefile
+++ b/serialized/Makefile
@@ -1,7 +1,7 @@
-
-SPECIAL_TARGETS=Utf8Case.ser
+SPECIAL_TARGETS=Utf8Case.ser normalize-ar.ser normalize-ml.ser first-letters-root.ser
ALL_TARGETS=$(SPECIAL_TARGETS)
DIST_TARGETS=$(SPECIAL_TARGETS)
+UNICODE_VERSION=6.0.0
.PHONY: all dist clean
@@ -13,6 +13,26 @@ dist: $(DIST_TARGETS)
clean:
rm -f $(ALL_TARGETS)
-Utf8Case.ser : ../includes/normal/Utf8Case.php
- php serialize.php -o $@ $<
+Utf8Case.ser: UnicodeData.txt
+ php ../maintenance/language/generateUtf8Case.php
+
+normalize-ar.ser: UnicodeData.txt
+ php ../maintenance/language/generateNormalizerDataAr.php
+
+normalize-ml.ser:
+ php ../maintenance/language/generateNormalizerDataMl.php
+
+first-letters-root.ser: allkeys.txt ucd.all.grouped.xml
+ php ../maintenance/language/generateCollationData.php
+
+UnicodeData.txt:
+ wget http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
+
+allkeys.txt:
+ wget http://www.unicode.org/Public/UCA/$(UNICODE_VERSION)/allkeys.txt
+
+ucd.all.grouped.xml: ucd.all.grouped.zip
+ unzip ucd.all.grouped.zip ucd.all.grouped.xml
+ucd.all.grouped.zip:
+ wget http://www.unicode.org/Public/$(UNICODE_VERSION)/ucdxml/ucd.all.grouped.zip
diff --git a/serialized/serialize.php b/serialized/serialize.php
deleted file mode 100644
index 766c1a5116a0..000000000000
--- a/serialized/serialize.php
+++ /dev/null
@@ -1,95 +0,0 @@
-<?php
-/**
- * Serialize variables found in input file and store the result in the
- * specified file.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-if ( !defined( 'MEDIAWIKI' ) ) {
- $wgNoDBParam = true;
- $optionsWithArgs = array( 'o' );
- require_once __DIR__ .'/../maintenance/commandLine.inc';
-
- $stderr = fopen( 'php://stderr', 'w' );
- if ( !isset( $args[0] ) ) {
- fwrite( $stderr, "No input file specified\n" );
- exit( 1 );
- }
- if ( wfIsWindows() ) {
- $files = array();
- foreach ( $args as $arg ) {
- $files = array_merge( $files, glob( $arg ) );
- }
- if ( !$files ) {
- fwrite( $stderr, "No files found\n" );
- }
- } else {
- $files = $args;
- }
-
- if ( isset( $options['o'] ) ) {
- $out = fopen( $options['o'], 'wb' );
- if ( !$out ) {
- fwrite( $stderr, "Unable to open file \"{$options['o']}\" for output\n" );
- exit( 1 );
- }
- } else {
- $out = fopen( 'php://stdout', 'wb' );
- }
-
- $vars = array();
- foreach ( $files as $inputFile ) {
- $vars = array_merge( $vars, getVars( $inputFile ) );
- }
- fwrite( $out, serialize( $vars ) );
- fclose( $out );
- exit( 0 );
-}
-
-//----------------------------------------------------------------------------
-
-function getVars( $_gv_filename ) {
- require $_gv_filename;
- $vars = get_defined_vars();
- unset( $vars['_gv_filename'] );
-
- # Clean up line endings
- if ( wfIsWindows() ) {
- $vars = unixLineEndings( $vars );
- }
- return $vars;
-}
-
-function unixLineEndings( $var ) {
- static $recursionLevel = 0;
- if ( $recursionLevel > 50 ) {
- global $stderr;
- fwrite( $stderr, "Error: Recursion limit exceeded. Possible circular reference in array variable.\n" );
- exit( 2 );
- }
-
- if ( is_array( $var ) ) {
- ++$recursionLevel;
- $var = array_map( 'unixLineEndings', $var );
- --$recursionLevel;
- } elseif ( is_string( $var ) ) {
- $var = str_replace( "\r\n", "\n", $var );
- }
- return $var;
-}