TokenParser.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. <?php
  2. namespace Doctrine\Common\Annotations;
  3. use function array_merge;
  4. use function count;
  5. use function explode;
  6. use function strtolower;
  7. use function token_get_all;
  8. use const PHP_VERSION_ID;
  9. use const T_AS;
  10. use const T_COMMENT;
  11. use const T_DOC_COMMENT;
  12. use const T_NAME_FULLY_QUALIFIED;
  13. use const T_NAME_QUALIFIED;
  14. use const T_NAMESPACE;
  15. use const T_NS_SEPARATOR;
  16. use const T_STRING;
  17. use const T_USE;
  18. use const T_WHITESPACE;
  19. /**
  20. * Parses a file for namespaces/use/class declarations.
  21. */
  22. class TokenParser
  23. {
  24. /**
  25. * The token list.
  26. *
  27. * @phpstan-var list<mixed[]>
  28. */
  29. private $tokens;
  30. /**
  31. * The number of tokens.
  32. *
  33. * @var int
  34. */
  35. private $numTokens;
  36. /**
  37. * The current array pointer.
  38. *
  39. * @var int
  40. */
  41. private $pointer = 0;
  42. /**
  43. * @param string $contents
  44. */
  45. public function __construct($contents)
  46. {
  47. $this->tokens = token_get_all($contents);
  48. // The PHP parser sets internal compiler globals for certain things. Annoyingly, the last docblock comment it
  49. // saw gets stored in doc_comment. When it comes to compile the next thing to be include()d this stored
  50. // doc_comment becomes owned by the first thing the compiler sees in the file that it considers might have a
  51. // docblock. If the first thing in the file is a class without a doc block this would cause calls to
  52. // getDocBlock() on said class to return our long lost doc_comment. Argh.
  53. // To workaround, cause the parser to parse an empty docblock. Sure getDocBlock() will return this, but at least
  54. // it's harmless to us.
  55. token_get_all("<?php\n/**\n *\n */");
  56. $this->numTokens = count($this->tokens);
  57. }
  58. /**
  59. * Gets the next non whitespace and non comment token.
  60. *
  61. * @param bool $docCommentIsComment If TRUE then a doc comment is considered a comment and skipped.
  62. * If FALSE then only whitespace and normal comments are skipped.
  63. *
  64. * @return mixed[]|string|null The token if exists, null otherwise.
  65. */
  66. public function next($docCommentIsComment = true)
  67. {
  68. for ($i = $this->pointer; $i < $this->numTokens; $i++) {
  69. $this->pointer++;
  70. if (
  71. $this->tokens[$i][0] === T_WHITESPACE ||
  72. $this->tokens[$i][0] === T_COMMENT ||
  73. ($docCommentIsComment && $this->tokens[$i][0] === T_DOC_COMMENT)
  74. ) {
  75. continue;
  76. }
  77. return $this->tokens[$i];
  78. }
  79. return null;
  80. }
  81. /**
  82. * Parses a single use statement.
  83. *
  84. * @return array<string, string> A list with all found class names for a use statement.
  85. */
  86. public function parseUseStatement()
  87. {
  88. $groupRoot = '';
  89. $class = '';
  90. $alias = '';
  91. $statements = [];
  92. $explicitAlias = false;
  93. while (($token = $this->next())) {
  94. if (! $explicitAlias && $token[0] === T_STRING) {
  95. $class .= $token[1];
  96. $alias = $token[1];
  97. } elseif ($explicitAlias && $token[0] === T_STRING) {
  98. $alias = $token[1];
  99. } elseif (
  100. PHP_VERSION_ID >= 80000 &&
  101. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  102. ) {
  103. $class .= $token[1];
  104. $classSplit = explode('\\', $token[1]);
  105. $alias = $classSplit[count($classSplit) - 1];
  106. } elseif ($token[0] === T_NS_SEPARATOR) {
  107. $class .= '\\';
  108. $alias = '';
  109. } elseif ($token[0] === T_AS) {
  110. $explicitAlias = true;
  111. $alias = '';
  112. } elseif ($token === ',') {
  113. $statements[strtolower($alias)] = $groupRoot . $class;
  114. $class = '';
  115. $alias = '';
  116. $explicitAlias = false;
  117. } elseif ($token === ';') {
  118. $statements[strtolower($alias)] = $groupRoot . $class;
  119. break;
  120. } elseif ($token === '{') {
  121. $groupRoot = $class;
  122. $class = '';
  123. } elseif ($token === '}') {
  124. continue;
  125. } else {
  126. break;
  127. }
  128. }
  129. return $statements;
  130. }
  131. /**
  132. * Gets all use statements.
  133. *
  134. * @param string $namespaceName The namespace name of the reflected class.
  135. *
  136. * @return array<string, string> A list with all found use statements.
  137. */
  138. public function parseUseStatements($namespaceName)
  139. {
  140. $statements = [];
  141. while (($token = $this->next())) {
  142. if ($token[0] === T_USE) {
  143. $statements = array_merge($statements, $this->parseUseStatement());
  144. continue;
  145. }
  146. if ($token[0] !== T_NAMESPACE || $this->parseNamespace() !== $namespaceName) {
  147. continue;
  148. }
  149. // Get fresh array for new namespace. This is to prevent the parser to collect the use statements
  150. // for a previous namespace with the same name. This is the case if a namespace is defined twice
  151. // or if a namespace with the same name is commented out.
  152. $statements = [];
  153. }
  154. return $statements;
  155. }
  156. /**
  157. * Gets the namespace.
  158. *
  159. * @return string The found namespace.
  160. */
  161. public function parseNamespace()
  162. {
  163. $name = '';
  164. while (
  165. ($token = $this->next()) && ($token[0] === T_STRING || $token[0] === T_NS_SEPARATOR || (
  166. PHP_VERSION_ID >= 80000 &&
  167. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  168. ))
  169. ) {
  170. $name .= $token[1];
  171. }
  172. return $name;
  173. }
  174. /**
  175. * Gets the class name.
  176. *
  177. * @return string The found class name.
  178. */
  179. public function parseClass()
  180. {
  181. // Namespaces and class names are tokenized the same: T_STRINGs
  182. // separated by T_NS_SEPARATOR so we can use one function to provide
  183. // both.
  184. return $this->parseNamespace();
  185. }
  186. }