LexerTest.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. <?php declare(strict_types=1);
  2. namespace PhpParser;
  3. use PhpParser\Parser\Tokens;
  4. use PHPUnit\Framework\TestCase;
  5. class LexerTest extends TestCase
  6. {
  7. /* To allow overwriting in parent class */
  8. protected function getLexer(array $options = []) {
  9. return new Lexer($options);
  10. }
  11. /**
  12. * @dataProvider provideTestError
  13. */
  14. public function testError($code, $messages) {
  15. if (defined('HHVM_VERSION')) {
  16. $this->markTestSkipped('HHVM does not throw warnings from token_get_all()');
  17. }
  18. $errorHandler = new ErrorHandler\Collecting();
  19. $lexer = $this->getLexer(['usedAttributes' => [
  20. 'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'
  21. ]]);
  22. $lexer->startLexing($code, $errorHandler);
  23. $errors = $errorHandler->getErrors();
  24. $this->assertCount(count($messages), $errors);
  25. for ($i = 0; $i < count($messages); $i++) {
  26. $this->assertSame($messages[$i], $errors[$i]->getMessageWithColumnInfo($code));
  27. }
  28. }
  29. public function provideTestError() {
  30. return [
  31. ["<?php /*", ["Unterminated comment from 1:7 to 1:9"]],
  32. ["<?php \1", ["Unexpected character \"\1\" (ASCII 1) from 1:7 to 1:7"]],
  33. ["<?php \0", ["Unexpected null byte from 1:7 to 1:7"]],
  34. // Error with potentially emulated token
  35. ["<?php ?? \0", ["Unexpected null byte from 1:10 to 1:10"]],
  36. ["<?php\n\0\1 foo /* bar", [
  37. "Unexpected null byte from 2:1 to 2:1",
  38. "Unexpected character \"\1\" (ASCII 1) from 2:2 to 2:2",
  39. "Unterminated comment from 2:8 to 2:14"
  40. ]],
  41. ];
  42. }
  43. /**
  44. * @dataProvider provideTestLex
  45. */
  46. public function testLex($code, $options, $tokens) {
  47. $lexer = $this->getLexer($options);
  48. $lexer->startLexing($code);
  49. while ($id = $lexer->getNextToken($value, $startAttributes, $endAttributes)) {
  50. $token = array_shift($tokens);
  51. $this->assertSame($token[0], $id);
  52. $this->assertSame($token[1], $value);
  53. $this->assertEquals($token[2], $startAttributes);
  54. $this->assertEquals($token[3], $endAttributes);
  55. }
  56. }
  57. public function provideTestLex() {
  58. return [
  59. // tests conversion of closing PHP tag and drop of whitespace and opening tags
  60. [
  61. '<?php tokens ?>plaintext',
  62. [],
  63. [
  64. [
  65. Tokens::T_STRING, 'tokens',
  66. ['startLine' => 1], ['endLine' => 1]
  67. ],
  68. [
  69. ord(';'), '?>',
  70. ['startLine' => 1], ['endLine' => 1]
  71. ],
  72. [
  73. Tokens::T_INLINE_HTML, 'plaintext',
  74. ['startLine' => 1, 'hasLeadingNewline' => false],
  75. ['endLine' => 1]
  76. ],
  77. ]
  78. ],
  79. // tests line numbers
  80. [
  81. '<?php' . "\n" . '$ token /** doc' . "\n" . 'comment */ $',
  82. [],
  83. [
  84. [
  85. ord('$'), '$',
  86. ['startLine' => 2], ['endLine' => 2]
  87. ],
  88. [
  89. Tokens::T_STRING, 'token',
  90. ['startLine' => 2], ['endLine' => 2]
  91. ],
  92. [
  93. ord('$'), '$',
  94. [
  95. 'startLine' => 3,
  96. 'comments' => [
  97. new Comment\Doc('/** doc' . "\n" . 'comment */', 2, 14, 5),
  98. ]
  99. ],
  100. ['endLine' => 3]
  101. ],
  102. ]
  103. ],
  104. // tests comment extraction
  105. [
  106. '<?php /* comment */ // comment' . "\n" . '/** docComment 1 *//** docComment 2 */ token',
  107. [],
  108. [
  109. [
  110. Tokens::T_STRING, 'token',
  111. [
  112. 'startLine' => 2,
  113. 'comments' => [
  114. new Comment('/* comment */', 1, 6, 1),
  115. new Comment('// comment' . "\n", 1, 20, 3),
  116. new Comment\Doc('/** docComment 1 */', 2, 31, 4),
  117. new Comment\Doc('/** docComment 2 */', 2, 50, 5),
  118. ],
  119. ],
  120. ['endLine' => 2]
  121. ],
  122. ]
  123. ],
  124. // tests differing start and end line
  125. [
  126. '<?php "foo' . "\n" . 'bar"',
  127. [],
  128. [
  129. [
  130. Tokens::T_CONSTANT_ENCAPSED_STRING, '"foo' . "\n" . 'bar"',
  131. ['startLine' => 1], ['endLine' => 2]
  132. ],
  133. ]
  134. ],
  135. // tests exact file offsets
  136. [
  137. '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
  138. ['usedAttributes' => ['startFilePos', 'endFilePos']],
  139. [
  140. [
  141. Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
  142. ['startFilePos' => 6], ['endFilePos' => 8]
  143. ],
  144. [
  145. ord(';'), ';',
  146. ['startFilePos' => 9], ['endFilePos' => 9]
  147. ],
  148. [
  149. Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
  150. ['startFilePos' => 18], ['endFilePos' => 20]
  151. ],
  152. [
  153. ord(';'), ';',
  154. ['startFilePos' => 21], ['endFilePos' => 21]
  155. ],
  156. ]
  157. ],
  158. // tests token offsets
  159. [
  160. '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
  161. ['usedAttributes' => ['startTokenPos', 'endTokenPos']],
  162. [
  163. [
  164. Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
  165. ['startTokenPos' => 1], ['endTokenPos' => 1]
  166. ],
  167. [
  168. ord(';'), ';',
  169. ['startTokenPos' => 2], ['endTokenPos' => 2]
  170. ],
  171. [
  172. Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
  173. ['startTokenPos' => 5], ['endTokenPos' => 5]
  174. ],
  175. [
  176. ord(';'), ';',
  177. ['startTokenPos' => 6], ['endTokenPos' => 6]
  178. ],
  179. ]
  180. ],
  181. // tests all attributes being disabled
  182. [
  183. '<?php /* foo */ $bar;',
  184. ['usedAttributes' => []],
  185. [
  186. [
  187. Tokens::T_VARIABLE, '$bar',
  188. [], []
  189. ],
  190. [
  191. ord(';'), ';',
  192. [], []
  193. ]
  194. ]
  195. ],
  196. // tests no tokens
  197. [
  198. '',
  199. [],
  200. []
  201. ],
  202. ];
  203. }
  204. /**
  205. * @dataProvider provideTestHaltCompiler
  206. */
  207. public function testHandleHaltCompiler($code, $remaining) {
  208. $lexer = $this->getLexer();
  209. $lexer->startLexing($code);
  210. while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
  211. $this->assertSame($remaining, $lexer->handleHaltCompiler());
  212. $this->assertSame(0, $lexer->getNextToken());
  213. }
  214. public function provideTestHaltCompiler() {
  215. return [
  216. ['<?php ... __halt_compiler();Remaining Text', 'Remaining Text'],
  217. ['<?php ... __halt_compiler ( ) ;Remaining Text', 'Remaining Text'],
  218. ['<?php ... __halt_compiler() ?>Remaining Text', 'Remaining Text'],
  219. //array('<?php ... __halt_compiler();' . "\0", "\0"),
  220. //array('<?php ... __halt_compiler /* */ ( ) ;Remaining Text', 'Remaining Text'),
  221. ];
  222. }
  223. public function testHandleHaltCompilerError() {
  224. $this->expectException(Error::class);
  225. $this->expectExceptionMessage('__HALT_COMPILER must be followed by "();"');
  226. $lexer = $this->getLexer();
  227. $lexer->startLexing('<?php ... __halt_compiler invalid ();');
  228. while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
  229. $lexer->handleHaltCompiler();
  230. }
  231. public function testGetTokens() {
  232. $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
  233. $expectedTokens = [
  234. [T_OPEN_TAG, '<?php ', 1],
  235. [T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
  236. ';',
  237. [T_WHITESPACE, "\n", 1],
  238. [T_COMMENT, '// foo' . "\n", 2],
  239. [T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
  240. ';',
  241. ];
  242. $lexer = $this->getLexer();
  243. $lexer->startLexing($code);
  244. $this->assertSame($expectedTokens, $lexer->getTokens());
  245. }
  246. }