AbstractBrowser.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\BrowserKit;
  11. use Symfony\Component\BrowserKit\Exception\BadMethodCallException;
  12. use Symfony\Component\DomCrawler\Crawler;
  13. use Symfony\Component\DomCrawler\Form;
  14. use Symfony\Component\DomCrawler\Link;
  15. use Symfony\Component\Process\PhpProcess;
  16. /**
  17. * Simulates a browser.
  18. *
  19. * To make the actual request, you need to implement the doRequest() method.
  20. *
  21. * If you want to be able to run requests in their own process (insulated flag),
  22. * you need to also implement the getScript() method.
  23. *
  24. * @author Fabien Potencier <fabien@symfony.com>
  25. */
  26. abstract class AbstractBrowser
  27. {
  28. protected $history;
  29. protected $cookieJar;
  30. protected $server = [];
  31. protected $internalRequest;
  32. protected $request;
  33. protected $internalResponse;
  34. protected $response;
  35. protected $crawler;
  36. protected $insulated = false;
  37. protected $redirect;
  38. protected $followRedirects = true;
  39. protected $followMetaRefresh = false;
  40. private $maxRedirects = -1;
  41. private $redirectCount = 0;
  42. private $redirects = [];
  43. private $isMainRequest = true;
  44. /**
  45. * @param array $server The server parameters (equivalent of $_SERVER)
  46. */
  47. public function __construct(array $server = [], History $history = null, CookieJar $cookieJar = null)
  48. {
  49. $this->setServerParameters($server);
  50. $this->history = $history ?: new History();
  51. $this->cookieJar = $cookieJar ?: new CookieJar();
  52. }
  53. /**
  54. * Sets whether to automatically follow redirects or not.
  55. */
  56. public function followRedirects(bool $followRedirects = true)
  57. {
  58. $this->followRedirects = $followRedirects;
  59. }
  60. /**
  61. * Sets whether to automatically follow meta refresh redirects or not.
  62. */
  63. public function followMetaRefresh(bool $followMetaRefresh = true)
  64. {
  65. $this->followMetaRefresh = $followMetaRefresh;
  66. }
  67. /**
  68. * Returns whether client automatically follows redirects or not.
  69. *
  70. * @return bool
  71. */
  72. public function isFollowingRedirects()
  73. {
  74. return $this->followRedirects;
  75. }
  76. /**
  77. * Sets the maximum number of redirects that crawler can follow.
  78. */
  79. public function setMaxRedirects(int $maxRedirects)
  80. {
  81. $this->maxRedirects = $maxRedirects < 0 ? -1 : $maxRedirects;
  82. $this->followRedirects = -1 != $this->maxRedirects;
  83. }
  84. /**
  85. * Returns the maximum number of redirects that crawler can follow.
  86. *
  87. * @return int
  88. */
  89. public function getMaxRedirects()
  90. {
  91. return $this->maxRedirects;
  92. }
  93. /**
  94. * Sets the insulated flag.
  95. *
  96. * @param bool $insulated Whether to insulate the requests or not
  97. *
  98. * @throws \RuntimeException When Symfony Process Component is not installed
  99. */
  100. public function insulate(bool $insulated = true)
  101. {
  102. if ($insulated && !class_exists(\Symfony\Component\Process\Process::class)) {
  103. throw new \LogicException('Unable to isolate requests as the Symfony Process Component is not installed.');
  104. }
  105. $this->insulated = $insulated;
  106. }
  107. /**
  108. * Sets server parameters.
  109. *
  110. * @param array $server An array of server parameters
  111. */
  112. public function setServerParameters(array $server)
  113. {
  114. $this->server = array_merge([
  115. 'HTTP_USER_AGENT' => 'Symfony BrowserKit',
  116. ], $server);
  117. }
  118. /**
  119. * Sets single server parameter.
  120. */
  121. public function setServerParameter(string $key, string $value)
  122. {
  123. $this->server[$key] = $value;
  124. }
  125. /**
  126. * Gets single server parameter for specified key.
  127. *
  128. * @param mixed $default A default value when key is undefined
  129. *
  130. * @return mixed A value of the parameter
  131. */
  132. public function getServerParameter(string $key, $default = '')
  133. {
  134. return $this->server[$key] ?? $default;
  135. }
  136. public function xmlHttpRequest(string $method, string $uri, array $parameters = [], array $files = [], array $server = [], string $content = null, bool $changeHistory = true): Crawler
  137. {
  138. $this->setServerParameter('HTTP_X_REQUESTED_WITH', 'XMLHttpRequest');
  139. try {
  140. return $this->request($method, $uri, $parameters, $files, $server, $content, $changeHistory);
  141. } finally {
  142. unset($this->server['HTTP_X_REQUESTED_WITH']);
  143. }
  144. }
  145. /**
  146. * Returns the History instance.
  147. *
  148. * @return History A History instance
  149. */
  150. public function getHistory()
  151. {
  152. return $this->history;
  153. }
  154. /**
  155. * Returns the CookieJar instance.
  156. *
  157. * @return CookieJar A CookieJar instance
  158. */
  159. public function getCookieJar()
  160. {
  161. return $this->cookieJar;
  162. }
  163. /**
  164. * Returns the current Crawler instance.
  165. *
  166. * @return Crawler A Crawler instance
  167. */
  168. public function getCrawler()
  169. {
  170. if (null === $this->crawler) {
  171. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  172. }
  173. return $this->crawler;
  174. }
  175. /**
  176. * Returns the current BrowserKit Response instance.
  177. *
  178. * @return Response A BrowserKit Response instance
  179. */
  180. public function getInternalResponse()
  181. {
  182. if (null === $this->internalResponse) {
  183. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  184. }
  185. return $this->internalResponse;
  186. }
  187. /**
  188. * Returns the current origin response instance.
  189. *
  190. * The origin response is the response instance that is returned
  191. * by the code that handles requests.
  192. *
  193. * @return object A response instance
  194. *
  195. * @see doRequest()
  196. */
  197. public function getResponse()
  198. {
  199. if (null === $this->response) {
  200. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  201. }
  202. return $this->response;
  203. }
  204. /**
  205. * Returns the current BrowserKit Request instance.
  206. *
  207. * @return Request A BrowserKit Request instance
  208. */
  209. public function getInternalRequest()
  210. {
  211. if (null === $this->internalRequest) {
  212. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  213. }
  214. return $this->internalRequest;
  215. }
  216. /**
  217. * Returns the current origin Request instance.
  218. *
  219. * The origin request is the request instance that is sent
  220. * to the code that handles requests.
  221. *
  222. * @return object A Request instance
  223. *
  224. * @see doRequest()
  225. */
  226. public function getRequest()
  227. {
  228. if (null === $this->request) {
  229. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  230. }
  231. return $this->request;
  232. }
  233. /**
  234. * Clicks on a given link.
  235. *
  236. * @return Crawler
  237. */
  238. public function click(Link $link)
  239. {
  240. if ($link instanceof Form) {
  241. return $this->submit($link);
  242. }
  243. return $this->request($link->getMethod(), $link->getUri());
  244. }
  245. /**
  246. * Clicks the first link (or clickable image) that contains the given text.
  247. *
  248. * @param string $linkText The text of the link or the alt attribute of the clickable image
  249. */
  250. public function clickLink(string $linkText): Crawler
  251. {
  252. if (null === $this->crawler) {
  253. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  254. }
  255. return $this->click($this->crawler->selectLink($linkText)->link());
  256. }
  257. /**
  258. * Submits a form.
  259. *
  260. * @param array $values An array of form field values
  261. * @param array $serverParameters An array of server parameters
  262. *
  263. * @return Crawler
  264. */
  265. public function submit(Form $form, array $values = [], array $serverParameters = [])
  266. {
  267. $form->setValues($values);
  268. return $this->request($form->getMethod(), $form->getUri(), $form->getPhpValues(), $form->getPhpFiles(), $serverParameters);
  269. }
  270. /**
  271. * Finds the first form that contains a button with the given content and
  272. * uses it to submit the given form field values.
  273. *
  274. * @param string $button The text content, id, value or name of the form <button> or <input type="submit">
  275. * @param array $fieldValues Use this syntax: ['my_form[name]' => '...', 'my_form[email]' => '...']
  276. * @param string $method The HTTP method used to submit the form
  277. * @param array $serverParameters These values override the ones stored in $_SERVER (HTTP headers must include an HTTP_ prefix as PHP does)
  278. */
  279. public function submitForm(string $button, array $fieldValues = [], string $method = 'POST', array $serverParameters = []): Crawler
  280. {
  281. if (null === $this->crawler) {
  282. throw new BadMethodCallException(sprintf('The "request()" method must be called before "%s()".', __METHOD__));
  283. }
  284. $buttonNode = $this->crawler->selectButton($button);
  285. $form = $buttonNode->form($fieldValues, $method);
  286. return $this->submit($form, [], $serverParameters);
  287. }
  288. /**
  289. * Calls a URI.
  290. *
  291. * @param string $method The request method
  292. * @param string $uri The URI to fetch
  293. * @param array $parameters The Request parameters
  294. * @param array $files The files
  295. * @param array $server The server parameters (HTTP headers are referenced with an HTTP_ prefix as PHP does)
  296. * @param string $content The raw body data
  297. * @param bool $changeHistory Whether to update the history or not (only used internally for back(), forward(), and reload())
  298. *
  299. * @return Crawler
  300. */
  301. public function request(string $method, string $uri, array $parameters = [], array $files = [], array $server = [], string $content = null, bool $changeHistory = true)
  302. {
  303. if ($this->isMainRequest) {
  304. $this->redirectCount = 0;
  305. } else {
  306. ++$this->redirectCount;
  307. }
  308. $originalUri = $uri;
  309. $uri = $this->getAbsoluteUri($uri);
  310. $server = array_merge($this->server, $server);
  311. if (!empty($server['HTTP_HOST']) && null === parse_url($originalUri, \PHP_URL_HOST)) {
  312. $uri = preg_replace('{^(https?\://)'.preg_quote($this->extractHost($uri)).'}', '${1}'.$server['HTTP_HOST'], $uri);
  313. }
  314. if (isset($server['HTTPS']) && null === parse_url($originalUri, \PHP_URL_SCHEME)) {
  315. $uri = preg_replace('{^'.parse_url($uri, \PHP_URL_SCHEME).'}', $server['HTTPS'] ? 'https' : 'http', $uri);
  316. }
  317. if (!isset($server['HTTP_REFERER']) && !$this->history->isEmpty()) {
  318. $server['HTTP_REFERER'] = $this->history->current()->getUri();
  319. }
  320. if (empty($server['HTTP_HOST'])) {
  321. $server['HTTP_HOST'] = $this->extractHost($uri);
  322. }
  323. $server['HTTPS'] = 'https' == parse_url($uri, \PHP_URL_SCHEME);
  324. $this->internalRequest = new Request($uri, $method, $parameters, $files, $this->cookieJar->allValues($uri), $server, $content);
  325. $this->request = $this->filterRequest($this->internalRequest);
  326. if (true === $changeHistory) {
  327. $this->history->add($this->internalRequest);
  328. }
  329. if ($this->insulated) {
  330. $this->response = $this->doRequestInProcess($this->request);
  331. } else {
  332. $this->response = $this->doRequest($this->request);
  333. }
  334. $this->internalResponse = $this->filterResponse($this->response);
  335. $this->cookieJar->updateFromResponse($this->internalResponse, $uri);
  336. $status = $this->internalResponse->getStatusCode();
  337. if ($status >= 300 && $status < 400) {
  338. $this->redirect = $this->internalResponse->getHeader('Location');
  339. } else {
  340. $this->redirect = null;
  341. }
  342. if ($this->followRedirects && $this->redirect) {
  343. $this->redirects[serialize($this->history->current())] = true;
  344. return $this->crawler = $this->followRedirect();
  345. }
  346. $this->crawler = $this->createCrawlerFromContent($this->internalRequest->getUri(), $this->internalResponse->getContent(), $this->internalResponse->getHeader('Content-Type') ?? '');
  347. // Check for meta refresh redirect
  348. if ($this->followMetaRefresh && null !== $redirect = $this->getMetaRefreshUrl()) {
  349. $this->redirect = $redirect;
  350. $this->redirects[serialize($this->history->current())] = true;
  351. $this->crawler = $this->followRedirect();
  352. }
  353. return $this->crawler;
  354. }
  355. /**
  356. * Makes a request in another process.
  357. *
  358. * @param object $request An origin request instance
  359. *
  360. * @return object An origin response instance
  361. *
  362. * @throws \RuntimeException When processing returns exit code
  363. */
  364. protected function doRequestInProcess($request)
  365. {
  366. $deprecationsFile = tempnam(sys_get_temp_dir(), 'deprec');
  367. putenv('SYMFONY_DEPRECATIONS_SERIALIZE='.$deprecationsFile);
  368. $_ENV['SYMFONY_DEPRECATIONS_SERIALIZE'] = $deprecationsFile;
  369. $process = new PhpProcess($this->getScript($request), null, null);
  370. $process->run();
  371. if (file_exists($deprecationsFile)) {
  372. $deprecations = file_get_contents($deprecationsFile);
  373. unlink($deprecationsFile);
  374. foreach ($deprecations ? unserialize($deprecations) : [] as $deprecation) {
  375. if ($deprecation[0]) {
  376. // unsilenced on purpose
  377. trigger_error($deprecation[1], \E_USER_DEPRECATED);
  378. } else {
  379. @trigger_error($deprecation[1], \E_USER_DEPRECATED);
  380. }
  381. }
  382. }
  383. if (!$process->isSuccessful() || !preg_match('/^O\:\d+\:/', $process->getOutput())) {
  384. throw new \RuntimeException(sprintf('OUTPUT: %s ERROR OUTPUT: %s.', $process->getOutput(), $process->getErrorOutput()));
  385. }
  386. return unserialize($process->getOutput());
  387. }
  388. /**
  389. * Makes a request.
  390. *
  391. * @param object $request An origin request instance
  392. *
  393. * @return object An origin response instance
  394. */
  395. abstract protected function doRequest($request);
  396. /**
  397. * Returns the script to execute when the request must be insulated.
  398. *
  399. * @param object $request An origin request instance
  400. *
  401. * @throws \LogicException When this abstract class is not implemented
  402. */
  403. protected function getScript($request)
  404. {
  405. throw new \LogicException('To insulate requests, you need to override the getScript() method.');
  406. }
  407. /**
  408. * Filters the BrowserKit request to the origin one.
  409. *
  410. * @return object An origin request instance
  411. */
  412. protected function filterRequest(Request $request)
  413. {
  414. return $request;
  415. }
  416. /**
  417. * Filters the origin response to the BrowserKit one.
  418. *
  419. * @param object $response The origin response to filter
  420. *
  421. * @return Response An BrowserKit Response instance
  422. */
  423. protected function filterResponse($response)
  424. {
  425. return $response;
  426. }
  427. /**
  428. * Creates a crawler.
  429. *
  430. * This method returns null if the DomCrawler component is not available.
  431. *
  432. * @return Crawler|null
  433. */
  434. protected function createCrawlerFromContent(string $uri, string $content, string $type)
  435. {
  436. if (!class_exists(Crawler::class)) {
  437. return null;
  438. }
  439. $crawler = new Crawler(null, $uri);
  440. $crawler->addContent($content, $type);
  441. return $crawler;
  442. }
  443. /**
  444. * Goes back in the browser history.
  445. *
  446. * @return Crawler
  447. */
  448. public function back()
  449. {
  450. do {
  451. $request = $this->history->back();
  452. } while (\array_key_exists(serialize($request), $this->redirects));
  453. return $this->requestFromRequest($request, false);
  454. }
  455. /**
  456. * Goes forward in the browser history.
  457. *
  458. * @return Crawler
  459. */
  460. public function forward()
  461. {
  462. do {
  463. $request = $this->history->forward();
  464. } while (\array_key_exists(serialize($request), $this->redirects));
  465. return $this->requestFromRequest($request, false);
  466. }
  467. /**
  468. * Reloads the current browser.
  469. *
  470. * @return Crawler
  471. */
  472. public function reload()
  473. {
  474. return $this->requestFromRequest($this->history->current(), false);
  475. }
  476. /**
  477. * Follow redirects?
  478. *
  479. * @return Crawler
  480. *
  481. * @throws \LogicException If request was not a redirect
  482. */
  483. public function followRedirect()
  484. {
  485. if (empty($this->redirect)) {
  486. throw new \LogicException('The request was not redirected.');
  487. }
  488. if (-1 !== $this->maxRedirects) {
  489. if ($this->redirectCount > $this->maxRedirects) {
  490. $this->redirectCount = 0;
  491. throw new \LogicException(sprintf('The maximum number (%d) of redirections was reached.', $this->maxRedirects));
  492. }
  493. }
  494. $request = $this->internalRequest;
  495. if (\in_array($this->internalResponse->getStatusCode(), [301, 302, 303])) {
  496. $method = 'GET';
  497. $files = [];
  498. $content = null;
  499. } else {
  500. $method = $request->getMethod();
  501. $files = $request->getFiles();
  502. $content = $request->getContent();
  503. }
  504. if ('GET' === strtoupper($method)) {
  505. // Don't forward parameters for GET request as it should reach the redirection URI
  506. $parameters = [];
  507. } else {
  508. $parameters = $request->getParameters();
  509. }
  510. $server = $request->getServer();
  511. $server = $this->updateServerFromUri($server, $this->redirect);
  512. $this->isMainRequest = false;
  513. $response = $this->request($method, $this->redirect, $parameters, $files, $server, $content);
  514. $this->isMainRequest = true;
  515. return $response;
  516. }
  517. /**
  518. * @see https://dev.w3.org/html5/spec-preview/the-meta-element.html#attr-meta-http-equiv-refresh
  519. */
  520. private function getMetaRefreshUrl(): ?string
  521. {
  522. $metaRefresh = $this->getCrawler()->filter('head meta[http-equiv="refresh"]');
  523. foreach ($metaRefresh->extract(['content']) as $content) {
  524. if (preg_match('/^\s*0\s*;\s*URL\s*=\s*(?|\'([^\']++)|"([^"]++)|([^\'"].*))/i', $content, $m)) {
  525. return str_replace("\t\r\n", '', rtrim($m[1]));
  526. }
  527. }
  528. return null;
  529. }
  530. /**
  531. * Restarts the client.
  532. *
  533. * It flushes history and all cookies.
  534. */
  535. public function restart()
  536. {
  537. $this->cookieJar->clear();
  538. $this->history->clear();
  539. }
  540. /**
  541. * Takes a URI and converts it to absolute if it is not already absolute.
  542. *
  543. * @param string $uri A URI
  544. *
  545. * @return string An absolute URI
  546. */
  547. protected function getAbsoluteUri(string $uri)
  548. {
  549. // already absolute?
  550. if (0 === strpos($uri, 'http://') || 0 === strpos($uri, 'https://')) {
  551. return $uri;
  552. }
  553. if (!$this->history->isEmpty()) {
  554. $currentUri = $this->history->current()->getUri();
  555. } else {
  556. $currentUri = sprintf('http%s://%s/',
  557. isset($this->server['HTTPS']) ? 's' : '',
  558. $this->server['HTTP_HOST'] ?? 'localhost'
  559. );
  560. }
  561. // protocol relative URL
  562. if (0 === strpos($uri, '//')) {
  563. return parse_url($currentUri, \PHP_URL_SCHEME).':'.$uri;
  564. }
  565. // anchor or query string parameters?
  566. if (!$uri || '#' == $uri[0] || '?' == $uri[0]) {
  567. return preg_replace('/[#?].*?$/', '', $currentUri).$uri;
  568. }
  569. if ('/' !== $uri[0]) {
  570. $path = parse_url($currentUri, \PHP_URL_PATH);
  571. if ('/' !== substr($path, -1)) {
  572. $path = substr($path, 0, strrpos($path, '/') + 1);
  573. }
  574. $uri = $path.$uri;
  575. }
  576. return preg_replace('#^(.*?//[^/]+)\/.*$#', '$1', $currentUri).$uri;
  577. }
  578. /**
  579. * Makes a request from a Request object directly.
  580. *
  581. * @param bool $changeHistory Whether to update the history or not (only used internally for back(), forward(), and reload())
  582. *
  583. * @return Crawler
  584. */
  585. protected function requestFromRequest(Request $request, $changeHistory = true)
  586. {
  587. return $this->request($request->getMethod(), $request->getUri(), $request->getParameters(), $request->getFiles(), $request->getServer(), $request->getContent(), $changeHistory);
  588. }
  589. private function updateServerFromUri(array $server, string $uri): array
  590. {
  591. $server['HTTP_HOST'] = $this->extractHost($uri);
  592. $scheme = parse_url($uri, \PHP_URL_SCHEME);
  593. $server['HTTPS'] = null === $scheme ? $server['HTTPS'] : 'https' == $scheme;
  594. unset($server['HTTP_IF_NONE_MATCH'], $server['HTTP_IF_MODIFIED_SINCE']);
  595. return $server;
  596. }
  597. private function extractHost(string $uri): ?string
  598. {
  599. $host = parse_url($uri, \PHP_URL_HOST);
  600. if ($port = parse_url($uri, \PHP_URL_PORT)) {
  601. return $host.':'.$port;
  602. }
  603. return $host;
  604. }
  605. }