pageConfigFactory = $pageConfigFactory; $this->pageIdentity = $pageIdentity; $this->parsoid = $parsoid; $this->siteConfig = $siteConfig; $this->pageTitle = $titleFactory->newFromPageIdentity( $this->pageIdentity ); $this->languageConverterFactory = $languageConverterFactory; $this->languageFactory = $languageFactory; } /** * Set the PageConfig object to be used during language variant conversion. * If not provided, the object will be created. * * @param PageConfig $pageConfig * @return void */ public function setPageConfig( PageConfig $pageConfig ) { $this->pageConfig = $pageConfig; } /** * Set the page content language override. * * @param Bcp47Code $language * @return void */ public function setPageLanguageOverride( Bcp47Code $language ) { $this->pageLanguageOverride = $language; } /** * Perform variant conversion on a PageBundle object. * * @param PageBundle $pageBundle * @param Bcp47Code $targetVariant * @param ?Bcp47Code $sourceVariant * * @return PageBundle The converted PageBundle, or the object passed in as * $pageBundle if the conversion is not supported. * @throws HttpException */ public function convertPageBundleVariant( PageBundle $pageBundle, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant = null ): PageBundle { [ $pageLanguage, $sourceVariant ] = $this->getBaseAndSourceLanguage( $pageBundle, $sourceVariant ); if ( !$this->siteConfig->langConverterEnabledBcp47( $pageLanguage ) ) { // If the language doesn't support variants, just return the content unmodified. return $pageBundle; } $pageConfig = $this->getPageConfig( $pageLanguage, $sourceVariant ); if ( $this->parsoid->implementsLanguageConversionBcp47( $pageConfig, $targetVariant ) ) { return $this->parsoid->pb2pb( $pageConfig, 'variant', $pageBundle, [ 'variant' => [ 'source' => $sourceVariant, 'target' => $targetVariant, ] ] ); } else { if ( !$this->isFallbackLanguageConverterEnabled ) { // Fallback variant conversion is not enabled, return the page bundle as is. return $pageBundle; } // LanguageConverter::hasVariant and LanguageConverter::convertTo // could take a string|Bcp47Code in the future, which would // allow us to avoid the $targetVariantCode conversion here. $baseLanguage = $this->languageFactory->getParentLanguage( $targetVariant ); $languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage ); $targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode(); if ( $languageConverter->hasVariant( $targetVariantCode ) ) { // NOTE: This is not a convert() because we have the exact desired variant // and don't need to compute a preferred variant based on a base language. // Also see T267067 for why convert() should be avoided. $convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode ); $pageVariant = $targetVariant; } else { // No conversion possible - pass through original HTML in original language $convertedHtml = $pageBundle->html; $pageVariant = $pageConfig->getPageLanguageBcp47(); } // Add a note so that we can identify what was used to perform the variant conversion $msg = ""; $convertedHtml = $msg . $convertedHtml; // NOTE: Keep this in sync with code in Parsoid.php in Parsoid repo // Add meta information that Parsoid normally adds $headers = [ 'content-language' => $pageVariant->toBcp47Code(), 'vary' => [ 'Accept', 'Accept-Language' ] ]; $doc = DOMUtils::parseHTML( '' ); $doc->appendChild( $doc->createElement( 'head' ) ); DOMUtils::addHttpEquivHeaders( $doc, $headers ); $docElt = $doc->documentElement; '@phan-var Element $docElt'; $docHtml = DOMCompat::getOuterHTML( $docElt ); $convertedHtml = preg_replace( "##", $docHtml, "$convertedHtml" ); return new PageBundle( $convertedHtml, [], [], $pageBundle->version, $headers ); } } /** * Perform variant conversion on a ParserOutput object. * * @param ParserOutput $parserOutput * @param Bcp47Code $targetVariant * @param ?Bcp47Code $sourceVariant * * @return ParserOutput */ public function convertParserOutputVariant( ParserOutput $parserOutput, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant = null ): ParserOutput { $pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ); $modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariant, $sourceVariant ); return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle, $parserOutput ); } /** * Disable fallback language variant converter */ public function disableFallbackLanguageConverter(): void { $this->isFallbackLanguageConverterEnabled = false; } private function getPageConfig( Bcp47Code $pageLanguage, ?Bcp47Code $sourceVariant ): PageConfig { if ( $this->pageConfig ) { return $this->pageConfig; } try { $this->pageConfig = $this->pageConfigFactory->create( $this->pageIdentity, null, null, null, $pageLanguage ); if ( $sourceVariant ) { $this->pageConfig->setVariantBcp47( $sourceVariant ); } } catch ( RevisionAccessException $exception ) { // TODO: Throw a different exception, this class should not know // about HTTP status codes. throw new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ), 404 ); } return $this->pageConfig; } /** * Try to determine the page's language code as follows: * * First consider any value set by calling ::setPageLanguageOverride(); * this would have come from a Content-Language header. * * If ::setPageLanguageOverride() has not been called, check for a * content-language header in $pageBundle, which should be * equivalent. These are used when the title/article doesn't * (yet) exist. * * If these are not given, use the $default if given; this is used * to allow additional parameters to the request to be used as * fallbacks. * * If we don't have $default, but we do have a PageConfig in * $this->pageConfig, return $this->pageConfig->getPageLanguage(). * * Finally, fall back to $this->pageTitle->getPageLanguage(). * * @param PageBundle $pageBundle * @param Bcp47Code|null $default A default language, used after * Content-Language but before PageConfig/Title lookup. * * @return Bcp47Code the page language; may be a variant. */ private function getPageLanguage( PageBundle $pageBundle, ?Bcp47Code $default = null ): Bcp47Code { // If a language was set by calling setPageLanguageOverride(), always use it! if ( $this->pageLanguageOverride ) { return $this->pageLanguageOverride; } // If the page bundle contains a language code, use that. $pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null; if ( $pageBundleLanguage ) { // The HTTP header will contain a BCP-47 language code, not a // mediawiki-internal one. return new Bcp47CodeValue( $pageBundleLanguage ); } // NOTE: Use explicit default *before* we try PageBundle, because PageConfig::getPageLanguage() // falls back to Title::getPageLanguage(). If we did that first, $default would never be used. if ( $default ) { return $default; } // If we have a PageConfig, we can ask it for the page's language. Note that this will fall back to // Title::getPageLanguage(), so it has to be the last thing we try. if ( $this->pageConfig ) { return $this->pageConfig->getPageLanguageBcp47(); } // Finally, just go by the code associated with the title. This may come from the database or // it may be determined based on the title itself. return $this->pageTitle->getPageLanguage(); } /** * Determine the codes of the base language and the source variant. * * The base language will be used to find the appropriate LanguageConverter. * It should never be a variant. * * The source variant will be used to instruct the LanguageConverter. * It should always be a variant (or null to trigger auto-detection of * the source variant). * * @param PageBundle $pageBundle * @param ?Bcp47Code $sourceLanguage * * @return array{0:Bcp47Code,1:?Bcp47Code} [ Bcp47Code $pageLanguage, ?Bcp47Code $sourceLanguage ] */ private function getBaseAndSourceLanguage( PageBundle $pageBundle, ?Bcp47Code $sourceLanguage ): array { // Try to determine the language code associated with the content of the page. // The result may be a variant code. $baseLanguage = $this->getPageLanguage( $pageBundle, $sourceLanguage ); // To find out if $baseLanguage is actually a variant, get the parent language and compare. $parentLang = $this->languageFactory->getParentLanguage( $baseLanguage ); // If $parentLang is not the same language as $baseLanguage, this means that // $baseLanguage is a variant. In that case, set $sourceLanguage to that // variant (unless $sourceLanguage is already set), and set $baseLanguage // to the $parentLang if ( $parentLang && strcasecmp( $parentLang->toBcp47Code(), $baseLanguage->toBcp47Code() ) !== 0 ) { if ( !$sourceLanguage ) { $sourceLanguage = $baseLanguage; } $baseLanguage = $parentLang; } if ( $sourceLanguage !== null ) { $parentConverter = $this->languageConverterFactory->getLanguageConverter( $parentLang ); // If the source variant isn't actually a variant, trigger auto-detection $sourceIsVariant = ( strcasecmp( $parentLang->toBcp47Code(), $sourceLanguage->toBcp47Code() ) !== 0 && $parentConverter->hasVariant( LanguageCode::bcp47ToInternal( $sourceLanguage->toBcp47Code() ) ) ); if ( !$sourceIsVariant ) { $sourceLanguage = null; } } return [ $baseLanguage, $sourceLanguage ]; } }