aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--includes/GlobalFunctions.php7
-rw-r--r--includes/utils/UrlUtils.php37
-rw-r--r--tests/phpunit/unit/includes/utils/UrlUtilsTest.php11
3 files changed, 24 insertions, 31 deletions
diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php
index 5ee172743b32..957a50370bba 100644
--- a/includes/GlobalFunctions.php
+++ b/includes/GlobalFunctions.php
@@ -543,7 +543,7 @@ function wfGetServerUrl( $proto ) {
* @return string URL assembled from its component parts
*/
function wfAssembleUrl( $urlParts ) {
- return wfGetUrlUtils()->assemble( (array)$urlParts );
+ return UrlUtils::assemble( (array)$urlParts );
}
/**
@@ -551,14 +551,13 @@ function wfAssembleUrl( $urlParts ) {
* '/a/./b/../c/' becomes '/a/c/'. For details on the algorithm, please see
* RFC3986 section 5.2.4.
*
- * @since 1.19
- *
* @deprecated since 1.39, use UrlUtils::removeDotSegments()
+ * @since 1.19
* @param string $urlPath URL path, potentially containing dot-segments
* @return string URL path with all dot-segments removed
*/
function wfRemoveDotSegments( $urlPath ) {
- return wfGetUrlUtils()->removeDotSegments( (string)$urlPath );
+ return UrlUtils::removeDotSegments( (string)$urlPath );
}
/**
diff --git a/includes/utils/UrlUtils.php b/includes/utils/UrlUtils.php
index 3e1cd7a534f0..8c3fcfff9942 100644
--- a/includes/utils/UrlUtils.php
+++ b/includes/utils/UrlUtils.php
@@ -180,14 +180,14 @@ class UrlUtils {
$bits = $this->parse( $url );
if ( $bits && isset( $bits['path'] ) ) {
- $bits['path'] = $this->removeDotSegments( $bits['path'] );
- return $this->assemble( $bits );
+ $bits['path'] = self::removeDotSegments( $bits['path'] );
+ return self::assemble( $bits );
} elseif ( $bits ) {
# No path to expand
return $url;
} elseif ( !str_starts_with( $url, '/' ) ) {
# URL is a relative path
- return $this->removeDotSegments( $url );
+ return self::removeDotSegments( $url );
}
# Expanded URL is not valid.
@@ -227,12 +227,11 @@ class UrlUtils {
* This is the basic structure used (brackets contain keys for $urlParts):
* [scheme][delimiter][user]:[pass]@[host]:[port][path]?[query]#[fragment]
*
- * @todo Need to integrate this into expand() (see T34168)
- *
+ * @since 1.41
* @param array $urlParts URL parts, as output from parse()
* @return string URL assembled from its component parts
*/
- public function assemble( array $urlParts ): string {
+ public static function assemble( array $urlParts ): string {
$result = '';
if ( isset( $urlParts['delimiter'] ) ) {
@@ -278,12 +277,11 @@ class UrlUtils {
* Remove all dot-segments in the provided URL path. For example, '/a/./b/../c/' becomes
* '/a/c/'. For details on the algorithm, please see RFC3986 section 5.2.4.
*
- * @todo Need to integrate this into expand() (see T34168)
- *
+ * @since 1.41
* @param string $urlPath URL path, potentially containing dot-segments
* @return string URL path with all dot-segments removed
*/
- public function removeDotSegments( string $urlPath ): string {
+ public static function removeDotSegments( string $urlPath ): string {
$output = '';
$inputOffset = 0;
$inputLength = strlen( $urlPath );
@@ -400,14 +398,17 @@ class UrlUtils {
}
/**
- * parse_url() work-alike, but non-broken. Differences:
+ * Advanced and configurable version of parse_url().
+ *
+ * 1) Add a "delimiter" element to the array, which helps permits to blindly re-assemble
+ * any URL regardless of protocol, including those that don't use `://`,
+ * such as "mailto:" and "news:".
+ * 2) Reject URLs with protocols not in $wgUrlProtocols.
+ * 3) Reject relative or incomplete URLs that parse_url would return a partial array for.
*
- * 1) Handles protocols that don't use :// (e.g., mailto: and news:, as well as
- * protocol-relative URLs) correctly.
- * 2) Adds a "delimiter" element to the array (see (2)).
- * 3) Verifies that the protocol is on the UrlProtocols allowed list.
- * 4) Rejects some invalid URLs that parse_url doesn't, e.g. the empty string or URLs starting
- * with a line feed character.
+ * If all you need is to extract parts of an HTTP or HTTPS URL (i.e. not specific to
+ * site-configurable extra protocols, or user input) then `parse_url()` can be used
+ * directly instead.
*
* @param string $url A URL to parse
* @return ?string[] Bits of the URL in an associative array, or null on failure.
@@ -432,8 +433,8 @@ class UrlUtils {
$url = "http:$url";
}
$bits = parse_url( $url );
- // parse_url() returns an array without scheme for some invalid URLs, e.g.
- // parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
+ // parse_url() returns an array without scheme for invalid URLs, e.g.
+ // parse_url("something bad://example") == [ 'path' => 'something bad://example' ]
if ( !$bits || !isset( $bits['scheme'] ) ) {
return null;
}
diff --git a/tests/phpunit/unit/includes/utils/UrlUtilsTest.php b/tests/phpunit/unit/includes/utils/UrlUtilsTest.php
index deb6ee017ea5..2d9f325daeaf 100644
--- a/tests/phpunit/unit/includes/utils/UrlUtilsTest.php
+++ b/tests/phpunit/unit/includes/utils/UrlUtilsTest.php
@@ -65,14 +65,7 @@ class UrlUtilsTest extends MediaWikiUnitTestCase {
* @param string $expected
*/
public function testAssemble( array $bits, string $expected ): void {
- $urlUtils = new UrlUtils( [ UrlUtils::VALID_PROTOCOLS => [
- '//',
- 'http://',
- 'https://',
- 'file://',
- 'mailto:',
- ] ] );
- $this->assertSame( $expected, $urlUtils->assemble( $bits ) );
+ $this->assertSame( $expected, UrlUtils::assemble( $bits ) );
}
/**
@@ -82,7 +75,7 @@ class UrlUtilsTest extends MediaWikiUnitTestCase {
* @param string $expected
*/
public function testRemoveDotSegments( string $input, string $expected ): void {
- $this->assertSame( $expected, ( new UrlUtils )->removeDotSegments( $input ) );
+ $this->assertSame( $expected, UrlUtils::removeDotSegments( $input ) );
}
/**