aboutsummaryrefslogtreecommitdiffstats
path: root/includes/utils
diff options
context:
space:
mode:
authorTimo Tijhof <krinkle@fastmail.com>2023-08-05 03:53:16 +0100
committerTimo Tijhof <krinkle@fastmail.com>2023-08-10 03:39:42 +0100
commitb862174dc0fd09aae147a0c2f328298bcf1a7f28 (patch)
tree34078969015c0742a7164230333f1bac88cf7189 /includes/utils
parent1dca5ba36c9358f1df25e5bb849e9c0e41bc789c (diff)
downloadmediawikicore-b862174dc0fd09aae147a0c2f328298bcf1a7f28.tar.gz
mediawikicore-b862174dc0fd09aae147a0c2f328298bcf1a7f28.zip
UrlUtils: Make assemble() and removeDotSegments() stateless
Follows-up 472a914c63baa (I706ef8a50aafb51), which moved various functions here en-mass, but these two don't require any state. The warnings about parse_url() in UrlUtils.php have been obsolete since about PHP 5.4, when it started to support protocol-relative URLs, non-slash protocols like "mailto", and deal with spaces/newlines correctly (https://3v4l.org/YWUkl). Rather than complicate many components that would otherwise remain stateless and unit-testable, fix these to once again be the static functions they were. For impact, see upto PS17 of change I5117eab95f57297eb02bed. Bug: T227900 Change-Id: Ifb3f720fc429b107348644c98eb9cd8e1113a42a
Diffstat (limited to 'includes/utils')
-rw-r--r--includes/utils/UrlUtils.php37
1 files changed, 19 insertions, 18 deletions
diff --git a/includes/utils/UrlUtils.php b/includes/utils/UrlUtils.php
index 3e1cd7a534f0..8c3fcfff9942 100644
--- a/includes/utils/UrlUtils.php
+++ b/includes/utils/UrlUtils.php
@@ -180,14 +180,14 @@ class UrlUtils {
$bits = $this->parse( $url );
if ( $bits && isset( $bits['path'] ) ) {
- $bits['path'] = $this->removeDotSegments( $bits['path'] );
- return $this->assemble( $bits );
+ $bits['path'] = self::removeDotSegments( $bits['path'] );
+ return self::assemble( $bits );
} elseif ( $bits ) {
# No path to expand
return $url;
} elseif ( !str_starts_with( $url, '/' ) ) {
# URL is a relative path
- return $this->removeDotSegments( $url );
+ return self::removeDotSegments( $url );
}
# Expanded URL is not valid.
@@ -227,12 +227,11 @@ class UrlUtils {
* This is the basic structure used (brackets contain keys for $urlParts):
* [scheme][delimiter][user]:[pass]@[host]:[port][path]?[query]#[fragment]
*
- * @todo Need to integrate this into expand() (see T34168)
- *
+ * @since 1.41
* @param array $urlParts URL parts, as output from parse()
* @return string URL assembled from its component parts
*/
- public function assemble( array $urlParts ): string {
+ public static function assemble( array $urlParts ): string {
$result = '';
if ( isset( $urlParts['delimiter'] ) ) {
@@ -278,12 +277,11 @@ class UrlUtils {
* Remove all dot-segments in the provided URL path. For example, '/a/./b/../c/' becomes
* '/a/c/'. For details on the algorithm, please see RFC3986 section 5.2.4.
*
- * @todo Need to integrate this into expand() (see T34168)
- *
+ * @since 1.41
* @param string $urlPath URL path, potentially containing dot-segments
* @return string URL path with all dot-segments removed
*/
- public function removeDotSegments( string $urlPath ): string {
+ public static function removeDotSegments( string $urlPath ): string {
$output = '';
$inputOffset = 0;
$inputLength = strlen( $urlPath );
@@ -400,14 +398,17 @@ class UrlUtils {
}
/**
- * parse_url() work-alike, but non-broken. Differences:
+ * Advanced and configurable version of parse_url().
+ *
+ * 1) Add a "delimiter" element to the array, which helps permits to blindly re-assemble
+ * any URL regardless of protocol, including those that don't use `://`,
+ * such as "mailto:" and "news:".
+ * 2) Reject URLs with protocols not in $wgUrlProtocols.
+ * 3) Reject relative or incomplete URLs that parse_url would return a partial array for.
*
- * 1) Handles protocols that don't use :// (e.g., mailto: and news:, as well as
- * protocol-relative URLs) correctly.
- * 2) Adds a "delimiter" element to the array (see (2)).
- * 3) Verifies that the protocol is on the UrlProtocols allowed list.
- * 4) Rejects some invalid URLs that parse_url doesn't, e.g. the empty string or URLs starting
- * with a line feed character.
+ * If all you need is to extract parts of an HTTP or HTTPS URL (i.e. not specific to
+ * site-configurable extra protocols, or user input) then `parse_url()` can be used
+ * directly instead.
*
* @param string $url A URL to parse
* @return ?string[] Bits of the URL in an associative array, or null on failure.
@@ -432,8 +433,8 @@ class UrlUtils {
$url = "http:$url";
}
$bits = parse_url( $url );
- // parse_url() returns an array without scheme for some invalid URLs, e.g.
- // parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
+ // parse_url() returns an array without scheme for invalid URLs, e.g.
+ // parse_url("something bad://example") == [ 'path' => 'something bad://example' ]
if ( !$bits || !isset( $bits['scheme'] ) ) {
return null;
}