fixed prefixed links (for arabic wikipedia)

author: Jens Frank <jeluf@users.mediawiki.org> 2004-04-21 23:52:48 +0000
committer: Jens Frank <jeluf@users.mediawiki.org> 2004-04-21 23:52:48 +0000
commit: bbc10ff7043f8044dcda833b19e13d1b9b52e132 (patch)
tree: 3ed94f498bb59cf96fbab007f05aa2cb2e9a60ca /includes/Tokenizer.php
parent: eb24ecc37b85d791f245689f55468f99da234ad0 (diff)
download: mediawikicore-bbc10ff7043f8044dcda833b19e13d1b9b52e132.tar.gz
mediawikicore-bbc10ff7043f8044dcda833b19e13d1b9b52e132.zip
1 files changed, 21 insertions, 6 deletions
diff --git a/includes/Tokenizer.php b/includes/Tokenizer.php
index 9ef18b0c5a69..05754226de7d 100644
--- a/includes/Tokenizer.php
+++ b/includes/Tokenizer.php
@@ -8,8 +8,11 @@ class Tokenizer {
 
 	/* private */ function Tokenizer()
 	{
+		global $wgLang;
+
 		$this->mPos=0;
 		$this->mTokenQueue=array();
+		$this->linkPrefixExtension = $wgLang->linkPrefixExtension();
 	}
 
 	# factory function
@@ -54,8 +57,7 @@ class Tokenizer {
 	// proceeds character by character through the text, looking for characters needing
 	// special attention. Those are currently: I, R, ', [, ], newline
 	//
-	// TODO: prefixed links for Arabic wikipedia not implemented yet
-	//       handling of French blanks not yet implemented
+	// TODO:  handling of French blanks not yet implemented
 	function nextToken()
 	{
 		$fname = "Tokenizer::nextToken";
@@ -64,8 +66,8 @@ class Tokenizer {
 		if ( count( $this->mQueuedToken ) != 0 ) {
 			// still one token from the last round around. Return that one first.
 			$token = array_shift( $this->mQueuedToken );
-		} else if ( $this->mPos > $this->mTextLength )
-		{	// If no text is left, return "false".
+		} else if ( $this->mPos > $this->mTextLength ) {
+		 	// If no text is left, return "false".
 			$token = false;
 		} else {
 
@@ -98,8 +100,21 @@ class Tokenizer {
 					     		$this->mPos += 3;
 							break 2; // switch + while
 						} else if ( $this->continues("[") ) {
-						     	$queueToken["type"] = "[[";
+							$queueToken["type"] = "[[";
 							$queueToken["text"] = "";
+							// Check for a "prefixed link", e.g. Al[[Khazar]]
+							// Mostly for arabic wikipedia
+							if ( $this->linkPrefixExtension ) {
+								while (    $this->linkPrefixExtension
+									&& ($len = strlen( $token["text"] ) ) > 0 
+									&& !ctype_space( $token["text"][$len-1] ) )
+								{
+									//prepend the character to the link's open tag
+									$queueToken["text"] = $token["text"][$len-1] . $queueToken["text"];
+									//remove character from the end of the text token
+									$token["text"] = substr( $token["text"], 0, -1);
+								}
+							}
 							$this->mQueuedToken[] = $queueToken;
 					     		$this->mPos += 2;
 							break 2; // switch + while 
@@ -158,7 +173,7 @@ class Tokenizer {
 
 	// function continues
 	// checks whether the mText continues with $cont from mPos+1
-	function continues( $cont )
+	/* private */ function continues( $cont )
 	{
 		// If string is not long enough to contain $cont, return false
 		if ( $this->mTextLength < $this->mPos + strlen( $cont ) )
author	Jens Frank <jeluf@users.mediawiki.org>	2004-04-21 23:52:48 +0000
committer	Jens Frank <jeluf@users.mediawiki.org>	2004-04-21 23:52:48 +0000
commit	bbc10ff7043f8044dcda833b19e13d1b9b52e132 (patch)
tree	3ed94f498bb59cf96fbab007f05aa2cb2e9a60ca /includes/Tokenizer.php
parent	eb24ecc37b85d791f245689f55468f99da234ad0 (diff)
download	mediawikicore-bbc10ff7043f8044dcda833b19e13d1b9b52e132.tar.gz mediawikicore-bbc10ff7043f8044dcda833b19e13d1b9b52e132.zip