UriResolver.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\DomCrawler;
  11. /**
  12. * The UriResolver class takes an URI (relative, absolute, fragment, etc.)
  13. * and turns it into an absolute URI against another given base URI.
  14. *
  15. * @author Fabien Potencier <fabien@symfony.com>
  16. * @author Grégoire Pineau <lyrixx@lyrixx.info>
  17. */
  18. class UriResolver
  19. {
  20. /**
  21. * Resolves a URI according to a base URI.
  22. *
  23. * For example if $uri=/foo/bar and $baseUri=https://symfony.com it will
  24. * return https://symfony.com/foo/bar
  25. *
  26. * If the $uri is not absolute you must pass an absolute $baseUri
  27. */
  28. public static function resolve(string $uri, ?string $baseUri): string
  29. {
  30. $uri = trim($uri);
  31. // absolute URL?
  32. if (null !== parse_url($uri, \PHP_URL_SCHEME)) {
  33. return $uri;
  34. }
  35. if (null === $baseUri) {
  36. throw new \InvalidArgumentException('The URI is relative, so you must define its base URI passing an absolute URL.');
  37. }
  38. // empty URI
  39. if (!$uri) {
  40. return $baseUri;
  41. }
  42. // an anchor
  43. if ('#' === $uri[0]) {
  44. return self::cleanupAnchor($baseUri).$uri;
  45. }
  46. $baseUriCleaned = self::cleanupUri($baseUri);
  47. if ('?' === $uri[0]) {
  48. return $baseUriCleaned.$uri;
  49. }
  50. // absolute URL with relative schema
  51. if (0 === strpos($uri, '//')) {
  52. return preg_replace('#^([^/]*)//.*$#', '$1', $baseUriCleaned).$uri;
  53. }
  54. $baseUriCleaned = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUriCleaned);
  55. // absolute path
  56. if ('/' === $uri[0]) {
  57. return $baseUriCleaned.$uri;
  58. }
  59. // relative path
  60. $path = parse_url(substr($baseUri, \strlen($baseUriCleaned)), \PHP_URL_PATH);
  61. $path = self::canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
  62. return $baseUriCleaned.('' === $path || '/' !== $path[0] ? '/' : '').$path;
  63. }
  64. /**
  65. * Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
  66. */
  67. private static function canonicalizePath(string $path): string
  68. {
  69. if ('' === $path || '/' === $path) {
  70. return $path;
  71. }
  72. if ('.' === substr($path, -1)) {
  73. $path .= '/';
  74. }
  75. $output = [];
  76. foreach (explode('/', $path) as $segment) {
  77. if ('..' === $segment) {
  78. array_pop($output);
  79. } elseif ('.' !== $segment) {
  80. $output[] = $segment;
  81. }
  82. }
  83. return implode('/', $output);
  84. }
  85. /**
  86. * Removes the query string and the anchor from the given uri.
  87. */
  88. private static function cleanupUri(string $uri): string
  89. {
  90. return self::cleanupQuery(self::cleanupAnchor($uri));
  91. }
  92. /**
  93. * Removes the query string from the uri.
  94. */
  95. private static function cleanupQuery(string $uri): string
  96. {
  97. if (false !== $pos = strpos($uri, '?')) {
  98. return substr($uri, 0, $pos);
  99. }
  100. return $uri;
  101. }
  102. /**
  103. * Removes the anchor from the uri.
  104. */
  105. private static function cleanupAnchor(string $uri): string
  106. {
  107. if (false !== $pos = strpos($uri, '#')) {
  108. return substr($uri, 0, $pos);
  109. }
  110. return $uri;
  111. }
  112. }