aboutsummaryrefslogtreecommitdiffstats
path: root/languages/LanguageUtf8.php
blob: 805589dd2f53c7fc3d6d682afe1f9dbc2dac9551 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
<?php
if( defined( "MEDIAWIKI" ) ) {

$wgInputEncoding    = "utf-8";
$wgOutputEncoding	= "utf-8";

$wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );

if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
	require_once( "includes/Utf8Case.php" );
	$wgMemc->set( $key1, $wikiUpperChars );
	$wgMemc->set( $key2, $wikiLowerChars );
}

# Base stuff useful to all UTF-8 based language files
class LanguageUtf8 extends Language {

	function ucfirst( $string ) {
		# For most languages, this is a wrapper for ucfirst()
		# But that doesn't work right in a UTF-8 locale
		global $wikiUpperChars;
		return preg_replace (
        	"/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
        	"strtr ( \"\$1\" , \$wikiUpperChars )",
        	$string );
	}
	
	function lcfirst( $string ) {
		global $wikiLowerChars;
		return preg_replace (
        	"/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
        	"strtr ( \"\$1\" , \$wikiLowerChars )",
        	$string );
	}

	function stripForSearch( $string ) {
		# MySQL fulltext index doesn't grok utf-8, so we
		# need to fold cases and convert to hex
		global $wikiLowerChars;
		return preg_replace(
		  "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
		  "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
		  $string );
	}

	function fallback8bitEncoding() {
		# Windows codepage 1252 is a superset of iso 8859-1
		# override this to use difference source encoding to
		# translate incoming 8-bit URLs.
		return "windows-1252";
	}

	function checkTitleEncoding( $s ) {
		global $wgInputEncoding;

		# Check for non-UTF-8 URLs
		$ishigh = preg_match( '/[\x80-\xff]/', $s);
		if(!$ishigh) return $s;
		
		$isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
		if( $isutf8 ) return $s;

		return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
	}

	function firstChar( $s ) {
		preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
		'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})', $s, $matches);
		
		return isset( $matches[1] ) ? $matches[1] : "";
	}
}

} # ifdef MEDIAWIKI

?>