Skip to content
This repository has been archived by the owner on Feb 4, 2022. It is now read-only.

Commit

Permalink
Merge pull request #57 from timble/feature/56-httpurl
Browse files Browse the repository at this point in the history
Fix and harden url encoding
  • Loading branch information
johanjanssens committed May 2, 2016
2 parents 18afa9d + 2980301 commit 732da7f
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 62 deletions.
9 changes: 9 additions & 0 deletions code/http/url/interface.php
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ public function setPort($port);
/**
* Returns the path portion as a string or array
*
* This method will encode the path to conform to RFC 3986 returned as string.
* @link https://tools.ietf.org/html/rfc3986
*
* @param boolean $toArray If TRUE return an array. Default FALSE
* @return string|array The path string; e.g., `path/to/site`.
*/
Expand All @@ -147,6 +150,9 @@ public function setPath($path);
/**
* Returns the query portion as a string or array
*
* This method will encode the query to conform to RFC 3986 if returned as string
* @link https://tools.ietf.org/html/rfc3986
*
* @param boolean $toArray If TRUE return an array. Default FALSE
* @param boolean|null $escape If TRUE escapes '&' to '&' for xml compliance. If NULL use the default.
* @return string|array The query string; e.g., `foo=bar&baz=dib`.
Expand Down Expand Up @@ -229,6 +235,9 @@ public static function fromString($url);
/**
* Get the full url, of the format scheme:https://user:pass@host/path?query#fragment';
*
* This will method will encode the resulting url to comform to RFC 3986
* @link https://tools.ietf.org/html/rfc3986
*
* @param integer $parts A bitmask of binary or'ed HTTP_URL constants; FULL is the default
* @param boolean|null $escape If TRUE escapes '&' to '&' for xml compliance. If NULL use the default.
* @return string
Expand Down
111 changes: 49 additions & 62 deletions code/http/url/url.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
* ?>
* </code>
*
* @link https://tools.ietf.org/html/rfc3986
*
* @author Johan Janssens <https://github.com/johanjanssens>
* @package Kodekit\Library\Http\Url
*/
Expand Down Expand Up @@ -139,22 +141,7 @@ class HttpUrl extends Object implements HttpUrlInterface
protected $_path = '';

/**
* Url-encode only these characters in path elements.
*
* Characters are ' ' (space), '/', '?', '&', and '#'.
*
* @var array
*/
protected $_encode_path = array(
' ' => '+',
'/' => '%2F',
'?' => '%3F',
'&' => '%26',
'#' => '%23',
);

/**
* Escape '&' to '&amp;'
* Escapes '&' to '&amp;'
*
* @var boolean
*
Expand Down Expand Up @@ -290,7 +277,7 @@ public function getUser()
*/
public function setUser($user)
{
$this->user = $user;
$this->user = rawurldecode($user);
return $this;
}

Expand All @@ -312,7 +299,7 @@ public function getPass()
*/
public function setPass($pass)
{
$this->pass = $pass;
$this->pass = rawurldecode($pass);
return $this;
}

Expand All @@ -334,7 +321,7 @@ public function getHost()
*/
public function setHost($host)
{
$this->host = $host;
$this->host = rawurldecode($host);
return $this;
}

Expand All @@ -356,20 +343,28 @@ public function getPort()
*/
public function setPort($port)
{
$this->port = $port;
$this->port = (int) rawurldecode($port);
return $this;
}

/**
* Returns the path portion as a string or array
*
* @param boolean $toArray If TRUE return an array. Default FALSE
* @return string|array The path string; e.g., `path/to/site`.
* This method will encode the path to conform to RFC 3986 returned as string.
* @link https://tools.ietf.org/html/rfc3986
*
* @param boolean $toArray If TRUE return an array. Default FALSE
* @return string|array The path string; e.g., `/path/to/site`.
*/
public function getPath($toArray = false)
{
$result = $toArray ? $this->_path : $this->_pathEncode($this->_path);
return $result;
if(!$toArray) {
$path = !empty($this->_path) ? '/'.implode('/', array_map('rawurlencode', $this->_path)) : '';
} else {
$path = (array) $this->_path;
}

return $path;
}

/**
Expand All @@ -387,14 +382,14 @@ public function setPath($path)
if (is_string($path))
{
if (!empty($path)) {
$path = explode('/', $path);
$path = explode('/', ltrim($path, '/'));
} else {
$path = array();
}
}

foreach ($path as $key => $val) {
$path[$key] = urldecode($val);
$path[$key] = rawurldecode($val);
}

$this->_path = $path;
Expand All @@ -404,6 +399,9 @@ public function setPath($path)
/**
* Returns the query portion as a string or array
*
* This method will encode the query to conform to RFC 3986 if returned as string
* @link https://tools.ietf.org/html/rfc3986
*
* @param boolean $toArray If TRUE return an array. Default FALSE
* @param boolean|null $escape If TRUE escapes '&' to '&amp;' for xml compliance. If NULL use the default.
* @return string|array The query string; e.g., `foo=bar&baz=dib`.
Expand All @@ -415,7 +413,7 @@ public function getQuery($toArray = false, $escape = null)

if(!$toArray)
{
$result = http_build_query($this->_query, '', $escape ? '&amp;' : '&');
$result = http_build_query($this->_query, '', $escape ? '&amp;' : '&');

// We replace the + used for spaces by http_build_query with the more standard %20.
$result = str_replace('+', '%20', $result);
Expand All @@ -436,7 +434,8 @@ public function getQuery($toArray = false, $escape = null)
*/
public function setQuery($query, $merge = false)
{
$result = $query;
//Parse
$array = $query;
if (!is_array($query))
{
if (strpos($query, '&amp;') !== false)
Expand All @@ -446,9 +445,20 @@ public function setQuery($query, $merge = false)
}

//Set the query vars
parse_str($query, $result);
parse_str($query, $array);
}

//Decode
$result = array();
foreach($array as $key => $value)
{
$key = rawurldecode($key);
$value = rawurldecode($value);

$result[$key] = $value;
}

//Merge
if ($merge) {
$this->_query = array_merge($this->_query, $result);
} else {
Expand Down Expand Up @@ -476,7 +486,7 @@ public function getFragment()
*/
public function setFragment($fragment)
{
$this->fragment = $fragment;
$this->fragment = rawurldecode($fragment);
return $this;
}

Expand Down Expand Up @@ -539,7 +549,6 @@ public function toArray($parts = self::FULL, $escape = null)
$result['path'] = $this->_path;
}


if (($parts & self::QUERY) && !empty($this->_query)) {
$result['query'] = $this->getQuery(false, $escape);
}
Expand Down Expand Up @@ -590,6 +599,9 @@ public static function fromString($url)
/**
* Get the full url, of the format scheme:https://user:pass@host/path?query#fragment';
*
* This will method will encode the resulting url to comform to RFC 3986
* @link https://tools.ietf.org/html/rfc3986
*
* @param integer $parts A bitmask of binary or'ed HTTP_URL constants; FULL is the default
* @param boolean|null $escape If TRUE escapes '&' to '&amp;' for xml compliance. If NULL use the default.
* @return string
Expand All @@ -601,7 +613,7 @@ public function toString($parts = self::FULL, $escape = null)

//Add the scheme
if (($parts & self::SCHEME) && !empty($this->scheme)) {
$url .= urlencode($this->scheme) . ':';
$url .= rawurlencode($this->scheme) . ':';
}

// Add the host and port, if any.
Expand All @@ -612,18 +624,18 @@ public function toString($parts = self::FULL, $escape = null)
//Add the username and password
if (($parts & self::USER) && !empty($this->user))
{
$url .= urlencode($this->user);
$url .= rawurlencode($this->user);
if (($parts & self::PASS) && !empty($this->pass)) {
$url .= ':' . urlencode($this->pass);
$url .= ':' . rawurlencode($this->pass);
}

$url .= '@';
}

$url .= urlencode($this->host);
$url .= rawurlencode($this->host);

if (($parts & self::PORT) && !empty($this->port)) {
$url .= ':' . (int)$this->port;
$url .= ':' . (int)rawurlencode($this->port);
}
}

Expand All @@ -642,7 +654,7 @@ public function toString($parts = self::FULL, $escape = null)
}

if (($parts & self::FRAGMENT) && trim($this->fragment) !== '') {
$url .= '#' . urlencode($this->fragment);
$url .= '#' . rawurlencode($this->fragment);
}

return $url;
Expand All @@ -668,31 +680,6 @@ public function equals(HttpUrlInterface $url)
return true;
}

/**
* Converts an array of path elements into a string.
*
* Does not use urlencode(); instead, only converts characters found in HttpUrl::$_encode_path.
*
* @param array $spec The path elements.
* @return string A url path string.
*/
protected function _pathEncode($spec)
{
if (is_string($spec)) {
$spec = explode('/', $spec);
}

$keys = array_keys($this->_encode_path);
$vals = array_values($this->_encode_path);

$out = array();
foreach ((array)$spec as $elem) {
$out[] = str_replace($keys, $vals, $elem);
}

return implode('/', $out);
}

/**
* Set the virtual properties.
*
Expand Down

0 comments on commit 732da7f

Please sign in to comment.