Parser.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\ExpressionLanguage;
  11. /**
  12. * Parsers a token stream.
  13. *
  14. * This parser implements a "Precedence climbing" algorithm.
  15. *
  16. * @see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
  17. * @see http://en.wikipedia.org/wiki/Operator-precedence_parser
  18. *
  19. * @author Fabien Potencier <fabien@symfony.com>
  20. */
  21. class Parser
  22. {
  23. public const OPERATOR_LEFT = 1;
  24. public const OPERATOR_RIGHT = 2;
  25. private $stream;
  26. private $unaryOperators;
  27. private $binaryOperators;
  28. private $functions;
  29. private $names;
  30. private $lint;
  31. public function __construct(array $functions)
  32. {
  33. $this->functions = $functions;
  34. $this->unaryOperators = [
  35. 'not' => ['precedence' => 50],
  36. '!' => ['precedence' => 50],
  37. '-' => ['precedence' => 500],
  38. '+' => ['precedence' => 500],
  39. ];
  40. $this->binaryOperators = [
  41. 'or' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
  42. '||' => ['precedence' => 10, 'associativity' => self::OPERATOR_LEFT],
  43. 'and' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
  44. '&&' => ['precedence' => 15, 'associativity' => self::OPERATOR_LEFT],
  45. '|' => ['precedence' => 16, 'associativity' => self::OPERATOR_LEFT],
  46. '^' => ['precedence' => 17, 'associativity' => self::OPERATOR_LEFT],
  47. '&' => ['precedence' => 18, 'associativity' => self::OPERATOR_LEFT],
  48. '==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  49. '===' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  50. '!=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  51. '!==' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  52. '<' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  53. '>' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  54. '>=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  55. '<=' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  56. 'not in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  57. 'in' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  58. 'matches' => ['precedence' => 20, 'associativity' => self::OPERATOR_LEFT],
  59. '..' => ['precedence' => 25, 'associativity' => self::OPERATOR_LEFT],
  60. '+' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
  61. '-' => ['precedence' => 30, 'associativity' => self::OPERATOR_LEFT],
  62. '~' => ['precedence' => 40, 'associativity' => self::OPERATOR_LEFT],
  63. '*' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
  64. '/' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
  65. '%' => ['precedence' => 60, 'associativity' => self::OPERATOR_LEFT],
  66. '**' => ['precedence' => 200, 'associativity' => self::OPERATOR_RIGHT],
  67. ];
  68. }
  69. /**
  70. * Converts a token stream to a node tree.
  71. *
  72. * The valid names is an array where the values
  73. * are the names that the user can use in an expression.
  74. *
  75. * If the variable name in the compiled PHP code must be
  76. * different, define it as the key.
  77. *
  78. * For instance, ['this' => 'container'] means that the
  79. * variable 'container' can be used in the expression
  80. * but the compiled code will use 'this'.
  81. *
  82. * @return Node\Node A node tree
  83. *
  84. * @throws SyntaxError
  85. */
  86. public function parse(TokenStream $stream, array $names = [])
  87. {
  88. $this->lint = false;
  89. return $this->doParse($stream, $names);
  90. }
  91. /**
  92. * Validates the syntax of an expression.
  93. *
  94. * The syntax of the passed expression will be checked, but not parsed.
  95. * If you want to skip checking dynamic variable names, pass `null` instead of the array.
  96. *
  97. * @throws SyntaxError When the passed expression is invalid
  98. */
  99. public function lint(TokenStream $stream, ?array $names = []): void
  100. {
  101. $this->lint = true;
  102. $this->doParse($stream, $names);
  103. }
  104. /**
  105. * @throws SyntaxError
  106. */
  107. private function doParse(TokenStream $stream, ?array $names = []): Node\Node
  108. {
  109. $this->stream = $stream;
  110. $this->names = $names;
  111. $node = $this->parseExpression();
  112. if (!$stream->isEOF()) {
  113. throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $stream->current->type, $stream->current->value), $stream->current->cursor, $stream->getExpression());
  114. }
  115. $this->stream = null;
  116. $this->names = null;
  117. return $node;
  118. }
  119. public function parseExpression(int $precedence = 0)
  120. {
  121. $expr = $this->getPrimary();
  122. $token = $this->stream->current;
  123. while ($token->test(Token::OPERATOR_TYPE) && isset($this->binaryOperators[$token->value]) && $this->binaryOperators[$token->value]['precedence'] >= $precedence) {
  124. $op = $this->binaryOperators[$token->value];
  125. $this->stream->next();
  126. $expr1 = $this->parseExpression(self::OPERATOR_LEFT === $op['associativity'] ? $op['precedence'] + 1 : $op['precedence']);
  127. $expr = new Node\BinaryNode($token->value, $expr, $expr1);
  128. $token = $this->stream->current;
  129. }
  130. if (0 === $precedence) {
  131. return $this->parseConditionalExpression($expr);
  132. }
  133. return $expr;
  134. }
  135. protected function getPrimary()
  136. {
  137. $token = $this->stream->current;
  138. if ($token->test(Token::OPERATOR_TYPE) && isset($this->unaryOperators[$token->value])) {
  139. $operator = $this->unaryOperators[$token->value];
  140. $this->stream->next();
  141. $expr = $this->parseExpression($operator['precedence']);
  142. return $this->parsePostfixExpression(new Node\UnaryNode($token->value, $expr));
  143. }
  144. if ($token->test(Token::PUNCTUATION_TYPE, '(')) {
  145. $this->stream->next();
  146. $expr = $this->parseExpression();
  147. $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'An opened parenthesis is not properly closed');
  148. return $this->parsePostfixExpression($expr);
  149. }
  150. return $this->parsePrimaryExpression();
  151. }
  152. protected function parseConditionalExpression(Node\Node $expr)
  153. {
  154. while ($this->stream->current->test(Token::PUNCTUATION_TYPE, '?')) {
  155. $this->stream->next();
  156. if (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
  157. $expr2 = $this->parseExpression();
  158. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
  159. $this->stream->next();
  160. $expr3 = $this->parseExpression();
  161. } else {
  162. $expr3 = new Node\ConstantNode(null);
  163. }
  164. } else {
  165. $this->stream->next();
  166. $expr2 = $expr;
  167. $expr3 = $this->parseExpression();
  168. }
  169. $expr = new Node\ConditionalNode($expr, $expr2, $expr3);
  170. }
  171. return $expr;
  172. }
  173. public function parsePrimaryExpression()
  174. {
  175. $token = $this->stream->current;
  176. switch ($token->type) {
  177. case Token::NAME_TYPE:
  178. $this->stream->next();
  179. switch ($token->value) {
  180. case 'true':
  181. case 'TRUE':
  182. return new Node\ConstantNode(true);
  183. case 'false':
  184. case 'FALSE':
  185. return new Node\ConstantNode(false);
  186. case 'null':
  187. case 'NULL':
  188. return new Node\ConstantNode(null);
  189. default:
  190. if ('(' === $this->stream->current->value) {
  191. if (false === isset($this->functions[$token->value])) {
  192. throw new SyntaxError(sprintf('The function "%s" does not exist.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, array_keys($this->functions));
  193. }
  194. $node = new Node\FunctionNode($token->value, $this->parseArguments());
  195. } else {
  196. if (!$this->lint || \is_array($this->names)) {
  197. if (!\in_array($token->value, $this->names, true)) {
  198. throw new SyntaxError(sprintf('Variable "%s" is not valid.', $token->value), $token->cursor, $this->stream->getExpression(), $token->value, $this->names);
  199. }
  200. // is the name used in the compiled code different
  201. // from the name used in the expression?
  202. if (\is_int($name = array_search($token->value, $this->names))) {
  203. $name = $token->value;
  204. }
  205. } else {
  206. $name = $token->value;
  207. }
  208. $node = new Node\NameNode($name);
  209. }
  210. }
  211. break;
  212. case Token::NUMBER_TYPE:
  213. case Token::STRING_TYPE:
  214. $this->stream->next();
  215. return new Node\ConstantNode($token->value);
  216. default:
  217. if ($token->test(Token::PUNCTUATION_TYPE, '[')) {
  218. $node = $this->parseArrayExpression();
  219. } elseif ($token->test(Token::PUNCTUATION_TYPE, '{')) {
  220. $node = $this->parseHashExpression();
  221. } else {
  222. throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s".', $token->type, $token->value), $token->cursor, $this->stream->getExpression());
  223. }
  224. }
  225. return $this->parsePostfixExpression($node);
  226. }
  227. public function parseArrayExpression()
  228. {
  229. $this->stream->expect(Token::PUNCTUATION_TYPE, '[', 'An array element was expected');
  230. $node = new Node\ArrayNode();
  231. $first = true;
  232. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
  233. if (!$first) {
  234. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'An array element must be followed by a comma');
  235. // trailing ,?
  236. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
  237. break;
  238. }
  239. }
  240. $first = false;
  241. $node->addElement($this->parseExpression());
  242. }
  243. $this->stream->expect(Token::PUNCTUATION_TYPE, ']', 'An opened array is not properly closed');
  244. return $node;
  245. }
  246. public function parseHashExpression()
  247. {
  248. $this->stream->expect(Token::PUNCTUATION_TYPE, '{', 'A hash element was expected');
  249. $node = new Node\ArrayNode();
  250. $first = true;
  251. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
  252. if (!$first) {
  253. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'A hash value must be followed by a comma');
  254. // trailing ,?
  255. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
  256. break;
  257. }
  258. }
  259. $first = false;
  260. // a hash key can be:
  261. //
  262. // * a number -- 12
  263. // * a string -- 'a'
  264. // * a name, which is equivalent to a string -- a
  265. // * an expression, which must be enclosed in parentheses -- (1 + 2)
  266. if ($this->stream->current->test(Token::STRING_TYPE) || $this->stream->current->test(Token::NAME_TYPE) || $this->stream->current->test(Token::NUMBER_TYPE)) {
  267. $key = new Node\ConstantNode($this->stream->current->value);
  268. $this->stream->next();
  269. } elseif ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
  270. $key = $this->parseExpression();
  271. } else {
  272. $current = $this->stream->current;
  273. throw new SyntaxError(sprintf('A hash key must be a quoted string, a number, a name, or an expression enclosed in parentheses (unexpected token "%s" of value "%s".', $current->type, $current->value), $current->cursor, $this->stream->getExpression());
  274. }
  275. $this->stream->expect(Token::PUNCTUATION_TYPE, ':', 'A hash key must be followed by a colon (:)');
  276. $value = $this->parseExpression();
  277. $node->addElement($value, $key);
  278. }
  279. $this->stream->expect(Token::PUNCTUATION_TYPE, '}', 'An opened hash is not properly closed');
  280. return $node;
  281. }
  282. public function parsePostfixExpression(Node\Node $node)
  283. {
  284. $token = $this->stream->current;
  285. while (Token::PUNCTUATION_TYPE == $token->type) {
  286. if ('.' === $token->value) {
  287. $this->stream->next();
  288. $token = $this->stream->current;
  289. $this->stream->next();
  290. if (
  291. Token::NAME_TYPE !== $token->type
  292. &&
  293. // Operators like "not" and "matches" are valid method or property names,
  294. //
  295. // In other words, besides NAME_TYPE, OPERATOR_TYPE could also be parsed as a property or method.
  296. // This is because operators are processed by the lexer prior to names. So "not" in "foo.not()" or "matches" in "foo.matches" will be recognized as an operator first.
  297. // But in fact, "not" and "matches" in such expressions shall be parsed as method or property names.
  298. //
  299. // And this ONLY works if the operator consists of valid characters for a property or method name.
  300. //
  301. // Other types, such as STRING_TYPE and NUMBER_TYPE, can't be parsed as property nor method names.
  302. //
  303. // As a result, if $token is NOT an operator OR $token->value is NOT a valid property or method name, an exception shall be thrown.
  304. (Token::OPERATOR_TYPE !== $token->type || !preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $token->value))
  305. ) {
  306. throw new SyntaxError('Expected name.', $token->cursor, $this->stream->getExpression());
  307. }
  308. $arg = new Node\ConstantNode($token->value, true);
  309. $arguments = new Node\ArgumentsNode();
  310. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
  311. $type = Node\GetAttrNode::METHOD_CALL;
  312. foreach ($this->parseArguments()->nodes as $n) {
  313. $arguments->addElement($n);
  314. }
  315. } else {
  316. $type = Node\GetAttrNode::PROPERTY_CALL;
  317. }
  318. $node = new Node\GetAttrNode($node, $arg, $arguments, $type);
  319. } elseif ('[' === $token->value) {
  320. $this->stream->next();
  321. $arg = $this->parseExpression();
  322. $this->stream->expect(Token::PUNCTUATION_TYPE, ']');
  323. $node = new Node\GetAttrNode($node, $arg, new Node\ArgumentsNode(), Node\GetAttrNode::ARRAY_CALL);
  324. } else {
  325. break;
  326. }
  327. $token = $this->stream->current;
  328. }
  329. return $node;
  330. }
  331. /**
  332. * Parses arguments.
  333. */
  334. public function parseArguments()
  335. {
  336. $args = [];
  337. $this->stream->expect(Token::PUNCTUATION_TYPE, '(', 'A list of arguments must begin with an opening parenthesis');
  338. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ')')) {
  339. if (!empty($args)) {
  340. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'Arguments must be separated by a comma');
  341. }
  342. $args[] = $this->parseExpression();
  343. }
  344. $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'A list of arguments must be closed by a parenthesis');
  345. return new Node\Node($args);
  346. }
  347. }