RegexHelperTest.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. <?php
  2. declare(strict_types=1);
  3. /*
  4. * This file is part of the league/commonmark package.
  5. *
  6. * (c) Colin O'Dell <colinodell@gmail.com>
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace League\CommonMark\Tests\Unit\Util;
  12. use League\CommonMark\Exception\InvalidArgumentException;
  13. use League\CommonMark\Extension\CommonMark\Node\Block\HtmlBlock;
  14. use League\CommonMark\Util\RegexHelper;
  15. use PHPUnit\Framework\TestCase;
  16. /**
  17. * Tests the different regular expressions
  18. */
  19. final class RegexHelperTest extends TestCase
  20. {
  21. public function testEscapable(): void
  22. {
  23. $regex = '/^' . RegexHelper::PARTIAL_ESCAPABLE . '$/';
  24. $this->assertRegexMatches($regex, '&');
  25. $this->assertRegexMatches($regex, '/');
  26. $this->assertRegexMatches($regex, '\\');
  27. $this->assertRegexMatches($regex, '(');
  28. $this->assertRegexMatches($regex, ')');
  29. }
  30. public function testEscapedChar(): void
  31. {
  32. $regex = '/^' . RegexHelper::PARTIAL_ESCAPED_CHAR . '$/';
  33. $this->assertRegexMatches($regex, '\\&');
  34. $this->assertRegexMatches($regex, '\\/');
  35. $this->assertRegexMatches($regex, '\\\\');
  36. $this->assertRegexMatches($regex, '\)');
  37. $this->assertRegexMatches($regex, '\(');
  38. }
  39. public function testInDoubleQuotes(): void
  40. {
  41. $regex = '/^' . RegexHelper::PARTIAL_IN_DOUBLE_QUOTES . '$/';
  42. $this->assertRegexMatches($regex, '"\\&"');
  43. $this->assertRegexMatches($regex, '"\\/"');
  44. $this->assertRegexMatches($regex, '"\\\\"');
  45. }
  46. public function testInSingleQuotes(): void
  47. {
  48. $regex = '/^' . RegexHelper::PARTIAL_IN_SINGLE_QUOTES . '$/';
  49. $this->assertRegexMatches($regex, '\'\\&\'');
  50. $this->assertRegexMatches($regex, '\'\\/\'');
  51. $this->assertRegexMatches($regex, '\'\\\\\'');
  52. }
  53. public function testInParens(): void
  54. {
  55. $regex = '/^' . RegexHelper::PARTIAL_IN_PARENS . '$/';
  56. $this->assertRegexMatches($regex, '(\\&)');
  57. $this->assertRegexMatches($regex, '(\\/)');
  58. $this->assertRegexMatches($regex, '(\\\\)');
  59. }
  60. public function testRegChar(): void
  61. {
  62. $regex = '/^' . RegexHelper::PARTIAL_REG_CHAR . '$/';
  63. $this->assertRegexMatches($regex, 'a');
  64. $this->assertRegexMatches($regex, 'A');
  65. $this->assertRegexMatches($regex, '!');
  66. $this->assertRegexDoesNotMatch($regex, ' ');
  67. }
  68. public function testInParensNoSp(): void
  69. {
  70. $regex = '/^' . RegexHelper::PARTIAL_IN_PARENS_NOSP . '$/';
  71. $this->assertRegexMatches($regex, '(a)');
  72. $this->assertRegexMatches($regex, '(A)');
  73. $this->assertRegexMatches($regex, '(!)');
  74. $this->assertRegexDoesNotMatch($regex, '(a )');
  75. }
  76. public function testTagname(): void
  77. {
  78. $regex = '/^' . RegexHelper::PARTIAL_TAGNAME . '$/';
  79. $this->assertRegexMatches($regex, 'a');
  80. $this->assertRegexMatches($regex, 'img');
  81. $this->assertRegexMatches($regex, 'h1');
  82. $this->assertRegexDoesNotMatch($regex, '11');
  83. }
  84. public function testBlockTagName(): void
  85. {
  86. $regex = '/^' . RegexHelper::PARTIAL_BLOCKTAGNAME . '$/';
  87. $this->assertRegexMatches($regex, 'p');
  88. $this->assertRegexMatches($regex, 'div');
  89. $this->assertRegexMatches($regex, 'h1');
  90. $this->assertRegexDoesNotMatch($regex, 'a');
  91. $this->assertRegexDoesNotMatch($regex, 'h7');
  92. }
  93. public function testAttributeName(): void
  94. {
  95. $regex = '/^' . RegexHelper::PARTIAL_ATTRIBUTENAME . '$/';
  96. $this->assertRegexMatches($regex, 'href');
  97. $this->assertRegexMatches($regex, 'class');
  98. $this->assertRegexMatches($regex, 'data-src');
  99. $this->assertRegexDoesNotMatch($regex, '-key');
  100. }
  101. public function testUnquotedValue(): void
  102. {
  103. $regex = '/^' . RegexHelper::PARTIAL_UNQUOTEDVALUE . '$/';
  104. $this->assertRegexMatches($regex, 'foo');
  105. $this->assertRegexMatches($regex, 'bar');
  106. $this->assertRegexDoesNotMatch($regex, '"baz"');
  107. }
  108. public function testSingleQuotedValue(): void
  109. {
  110. $regex = '/^' . RegexHelper::PARTIAL_SINGLEQUOTEDVALUE . '$/';
  111. $this->assertRegexMatches($regex, '\'foo\'');
  112. $this->assertRegexMatches($regex, '\'bar\'');
  113. $this->assertRegexDoesNotMatch($regex, '"baz"');
  114. }
  115. public function testDoubleQuotedValue(): void
  116. {
  117. $regex = '/^' . RegexHelper::PARTIAL_DOUBLEQUOTEDVALUE . '$/';
  118. $this->assertRegexMatches($regex, '"foo"');
  119. $this->assertRegexMatches($regex, '"bar"');
  120. $this->assertRegexDoesNotMatch($regex, '\'baz\'');
  121. }
  122. public function testAttributeValue(): void
  123. {
  124. $regex = '/^' . RegexHelper::PARTIAL_ATTRIBUTEVALUE . '$/';
  125. $this->assertRegexMatches($regex, 'foo');
  126. $this->assertRegexMatches($regex, '\'bar\'');
  127. $this->assertRegexMatches($regex, '"baz"');
  128. }
  129. public function testAttributeValueSpec(): void
  130. {
  131. $regex = '/^' . RegexHelper::PARTIAL_ATTRIBUTEVALUESPEC . '$/';
  132. $this->assertRegexMatches($regex, '=foo');
  133. $this->assertRegexMatches($regex, '= foo');
  134. $this->assertRegexMatches($regex, ' =foo');
  135. $this->assertRegexMatches($regex, ' = foo');
  136. $this->assertRegexMatches($regex, '=\'bar\'');
  137. $this->assertRegexMatches($regex, '= \'bar\'');
  138. $this->assertRegexMatches($regex, ' =\'bar\'');
  139. $this->assertRegexMatches($regex, ' = \'bar\'');
  140. $this->assertRegexMatches($regex, '="baz"');
  141. $this->assertRegexMatches($regex, '= "baz"');
  142. $this->assertRegexMatches($regex, ' ="baz"');
  143. $this->assertRegexMatches($regex, ' = "baz"');
  144. }
  145. public function testAttribute(): void
  146. {
  147. $regex = '/^' . RegexHelper::PARTIAL_ATTRIBUTE . '$/';
  148. $this->assertRegexMatches($regex, ' disabled');
  149. $this->assertRegexMatches($regex, ' disabled="disabled"');
  150. $this->assertRegexMatches($regex, ' href="http://www.google.com"');
  151. $this->assertRegexDoesNotMatch($regex, 'disabled', 'There must be at least one space at the start');
  152. }
  153. public function testOpenTag(): void
  154. {
  155. $regex = '/^' . RegexHelper::PARTIAL_OPENTAG . '$/';
  156. $this->assertRegexMatches($regex, '<hr>');
  157. $this->assertRegexMatches($regex, '<a href="http://www.google.com">');
  158. $this->assertRegexMatches($regex, '<img src="http://www.google.com/logo.png" />');
  159. $this->assertRegexDoesNotMatch($regex, '</p>');
  160. }
  161. public function testCloseTag(): void
  162. {
  163. $regex = '/^' . RegexHelper::PARTIAL_CLOSETAG . '$/';
  164. $this->assertRegexMatches($regex, '</p>');
  165. $this->assertRegexMatches($regex, '</a>');
  166. $this->assertRegexDoesNotMatch($regex, '<hr>');
  167. $this->assertRegexDoesNotMatch($regex, '<img src="http://www.google.com/logo.png" />');
  168. }
  169. public function testOpenBlockTag(): void
  170. {
  171. $regex = '/^' . RegexHelper::PARTIAL_OPENBLOCKTAG . '$/';
  172. $this->assertRegexMatches($regex, '<body>');
  173. $this->assertRegexMatches($regex, '<hr>');
  174. $this->assertRegexMatches($regex, '<hr />');
  175. $this->assertRegexMatches($regex, '<p id="foo" class="bar">');
  176. $this->assertRegexDoesNotMatch($regex, '<a href="http://www.google.com">', 'This is not a block element');
  177. $this->assertRegexDoesNotMatch($regex, '</p>', 'This is not an opening tag');
  178. }
  179. public function testCloseBlockTag(): void
  180. {
  181. $regex = '/^' . RegexHelper::PARTIAL_CLOSEBLOCKTAG . '$/';
  182. $this->assertRegexMatches($regex, '</body>');
  183. $this->assertRegexMatches($regex, '</p>');
  184. $this->assertRegexDoesNotMatch($regex, '</a>', 'This is not a block element');
  185. $this->assertRegexDoesNotMatch($regex, '<br>', 'This is not a closing tag');
  186. }
  187. public function testHtmlComment(): void
  188. {
  189. $regex = '/^' . RegexHelper::PARTIAL_HTMLCOMMENT . '$/';
  190. $this->assertRegexMatches($regex, '<!---->');
  191. $this->assertRegexMatches($regex, '<!-- -->');
  192. $this->assertRegexMatches($regex, '<!-- HELLO WORLD -->');
  193. $this->assertRegexDoesNotMatch($regex, '<!->');
  194. $this->assertRegexDoesNotMatch($regex, '<!-->');
  195. $this->assertRegexDoesNotMatch($regex, '<!--->');
  196. $this->assertRegexDoesNotMatch($regex, '<!- ->');
  197. }
  198. public function testProcessingInstruction(): void
  199. {
  200. $regex = '/^' . RegexHelper::PARTIAL_PROCESSINGINSTRUCTION . '$/';
  201. $this->assertRegexMatches($regex, '<?PITarget PIContent?>');
  202. $this->assertRegexMatches($regex, '<?xml-stylesheet type="text/xsl" href="style.xsl"?>');
  203. }
  204. public function testDeclaration(): void
  205. {
  206. $regex = '/^' . RegexHelper::PARTIAL_DECLARATION . '$/';
  207. $this->assertRegexMatches($regex, '<!DOCTYPE html>');
  208. $this->assertRegexMatches($regex, '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">');
  209. $this->assertRegexMatches($regex, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">');
  210. }
  211. public function testCDATA(): void
  212. {
  213. $regex = '/^' . RegexHelper::PARTIAL_CDATA . '$/';
  214. $this->assertRegexMatches($regex, '<![CDATA[<sender>John Smith</sender>]]>');
  215. $this->assertRegexMatches($regex, '<![CDATA[]]]]><![CDATA[>]]>');
  216. }
  217. public function testHtmlTag(): void
  218. {
  219. $regex = '/^' . RegexHelper::PARTIAL_HTMLTAG . '$/';
  220. $this->assertRegexMatches($regex, '<body id="main">');
  221. $this->assertRegexMatches($regex, '</p>');
  222. $this->assertRegexMatches($regex, '<!-- HELLO WORLD -->');
  223. $this->assertRegexMatches($regex, '<?xml-stylesheet type="text/xsl" href="style.xsl"?>');
  224. $this->assertRegexMatches($regex, '<!DOCTYPE html>');
  225. $this->assertRegexMatches($regex, '<![CDATA[<sender>John Smith</sender>]]>');
  226. $this->assertRegexDoesNotMatch($regex, '<![cdata[<sender>John Smith</sender>]]>');
  227. }
  228. public function testHtmlBlockOpen(): void
  229. {
  230. $regex = '/^' . RegexHelper::PARTIAL_HTMLBLOCKOPEN . '$/';
  231. $this->assertRegexMatches($regex, '<h1>');
  232. $this->assertRegexMatches($regex, '</p>');
  233. }
  234. public function testLinkTitle(): void
  235. {
  236. $regex = '/^' . RegexHelper::PARTIAL_HTMLBLOCKOPEN . '$/';
  237. $this->assertRegexMatches($regex, '<h1>');
  238. $this->assertRegexMatches($regex, '</p>');
  239. }
  240. public function testUnescape(): void
  241. {
  242. $this->assertEquals('foo(and(bar))', RegexHelper::unescape('foo(and\\(bar\\))'));
  243. }
  244. public function testIsEscapable(): void
  245. {
  246. $this->assertFalse(RegexHelper::isEscapable(''));
  247. $this->assertFalse(RegexHelper::isEscapable('A'));
  248. $this->assertTrue(RegexHelper::isEscapable('\\'));
  249. }
  250. /**
  251. * @dataProvider dataForTestMatchAt
  252. */
  253. public function testMatchAt(string $regex, string $string, ?int $offset, int $expectedResult): void
  254. {
  255. if ($offset === null) {
  256. $this->assertEquals($expectedResult, RegexHelper::matchAt($regex, $string));
  257. } else {
  258. $this->assertEquals($expectedResult, RegexHelper::matchAt($regex, $string, $offset));
  259. }
  260. }
  261. /**
  262. * @return iterable<array<mixed>>
  263. */
  264. public function dataForTestMatchAt(): iterable
  265. {
  266. return [
  267. ['/ /', 'foo bar', null, 3],
  268. ['/ /', 'foo bar', 0, 3],
  269. ['/ /', 'foo bar', 1, 3],
  270. ['/ /', 'это тест', null, 3],
  271. ['/ /', 'это тест', 0, 3],
  272. ['/ /', 'это тест', 1, 3],
  273. ];
  274. }
  275. public function testMatchFirst(): void
  276. {
  277. $this->assertSame(null, RegexHelper::matchFirst('/^bar/', 'foobar'));
  278. $this->assertSame(['bar'], RegexHelper::matchFirst('/^bar/', 'foobar', 3));
  279. $this->assertSame(['bar', 'bar'], RegexHelper::matchFirst('/^(bar)/', 'foobar', 3));
  280. $this->assertSame(['bar', 'ar'], RegexHelper::matchFirst('/^b(.+)/', 'foobar', 3));
  281. $this->assertSame(['brown fox', 'brown', 'fox'], RegexHelper::matchFirst('/(quick|brown|lazy) (fox|dog)/', 'The quick brown fox jumps over the lazy dog'));
  282. }
  283. /**
  284. * @dataProvider blockTypesWithValidOpenerRegexes
  285. */
  286. public function testValidHtmlBlockOpenRegex(int $type): void
  287. {
  288. $this->assertNotEmpty(RegexHelper::getHtmlBlockOpenRegex($type));
  289. }
  290. /**
  291. * @return iterable<int>
  292. */
  293. public function blockTypesWithValidOpenerRegexes(): iterable
  294. {
  295. yield [HtmlBlock::TYPE_1_CODE_CONTAINER];
  296. yield [HtmlBlock::TYPE_2_COMMENT];
  297. yield [HtmlBlock::TYPE_3];
  298. yield [HtmlBlock::TYPE_4];
  299. yield [HtmlBlock::TYPE_5_CDATA];
  300. yield [HtmlBlock::TYPE_6_BLOCK_ELEMENT];
  301. yield [HtmlBlock::TYPE_7_MISC_ELEMENT];
  302. }
  303. public function testInvalidHtmlBlockOpenRegex(): void
  304. {
  305. $this->expectException(InvalidArgumentException::class);
  306. RegexHelper::getHtmlBlockOpenRegex(8);
  307. }
  308. /**
  309. * @dataProvider blockTypesWithValidCloserRegexes
  310. */
  311. public function testValidHtmlBlockCloseRegex(int $type): void
  312. {
  313. $this->assertNotEmpty(RegexHelper::getHtmlBlockOpenRegex($type));
  314. }
  315. /**
  316. * @return iterable<int>
  317. */
  318. public function blockTypesWithValidCloserRegexes(): iterable
  319. {
  320. yield [HtmlBlock::TYPE_1_CODE_CONTAINER];
  321. yield [HtmlBlock::TYPE_2_COMMENT];
  322. yield [HtmlBlock::TYPE_3];
  323. yield [HtmlBlock::TYPE_4];
  324. yield [HtmlBlock::TYPE_5_CDATA];
  325. }
  326. /**
  327. * @dataProvider blockTypesWithInvalidCloserRegexes
  328. */
  329. public function testInvalidHtmlBlockCloseRegex(int $type): void
  330. {
  331. $this->expectException(InvalidArgumentException::class);
  332. RegexHelper::getHtmlBlockCloseRegex($type);
  333. }
  334. /**
  335. * @return iterable<int>
  336. */
  337. public function blockTypesWithInvalidCloserRegexes(): iterable
  338. {
  339. yield [HtmlBlock::TYPE_6_BLOCK_ELEMENT];
  340. yield [HtmlBlock::TYPE_7_MISC_ELEMENT];
  341. yield [8];
  342. }
  343. private function assertRegexMatches(string $pattern, string $string, string $message = ''): void
  344. {
  345. if (\method_exists($this, 'assertMatchesRegularExpression')) {
  346. $this->assertMatchesRegularExpression($pattern, $string, $message);
  347. } else {
  348. $this->assertRegExp($pattern, $string, $message);
  349. }
  350. }
  351. private function assertRegexDoesNotMatch(string $pattern, string $string, string $message = ''): void
  352. {
  353. if (\method_exists($this, 'assertDoesNotMatchRegularExpression')) {
  354. $this->assertDoesNotMatchRegularExpression($pattern, $string, $message);
  355. } else {
  356. $this->assertNotRegExp($pattern, $string, $message);
  357. }
  358. }
  359. }