regex.js 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. /**
  2. * @param {string} value
  3. * @returns {RegExp}
  4. * */
  5. function escape(value) {
  6. return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
  7. }
  8. /**
  9. * @param {RegExp | string } re
  10. * @returns {string}
  11. */
  12. function source(re) {
  13. if (!re) return null;
  14. if (typeof re === "string") return re;
  15. return re.source;
  16. }
  17. /**
  18. * @param {RegExp | string } re
  19. * @returns {string}
  20. */
  21. function lookahead(re) {
  22. return concat('(?=', re, ')');
  23. }
  24. /**
  25. * @param {RegExp | string } re
  26. * @returns {string}
  27. */
  28. function anyNumberOfTimes(re) {
  29. return concat('(?:', re, ')*');
  30. }
  31. /**
  32. * @param {RegExp | string } re
  33. * @returns {string}
  34. */
  35. function optional(re) {
  36. return concat('(?:', re, ')?');
  37. }
  38. /**
  39. * @param {...(RegExp | string) } args
  40. * @returns {string}
  41. */
  42. function concat(...args) {
  43. const joined = args.map((x) => source(x)).join("");
  44. return joined;
  45. }
  46. /**
  47. * @param { Array<string | RegExp | Object> } args
  48. * @returns {object}
  49. */
  50. function stripOptionsFromArgs(args) {
  51. const opts = args[args.length - 1];
  52. if (typeof opts === 'object' && opts.constructor === Object) {
  53. args.splice(args.length - 1, 1);
  54. return opts;
  55. } else {
  56. return {};
  57. }
  58. }
  59. /** @typedef { {capture?: boolean} } RegexEitherOptions */
  60. /**
  61. * Any of the passed expresssions may match
  62. *
  63. * Creates a huge this | this | that | that match
  64. * @param {(RegExp | string)[] | [...(RegExp | string)[], RegexEitherOptions]} args
  65. * @returns {string}
  66. */
  67. function either(...args) {
  68. /** @type { object & {capture?: boolean} } */
  69. const opts = stripOptionsFromArgs(args);
  70. const joined = '('
  71. + (opts.capture ? "" : "?:")
  72. + args.map((x) => source(x)).join("|") + ")";
  73. return joined;
  74. }
  75. /**
  76. * @param {RegExp | string} re
  77. * @returns {number}
  78. */
  79. function countMatchGroups(re) {
  80. return (new RegExp(re.toString() + '|')).exec('').length - 1;
  81. }
  82. /**
  83. * Does lexeme start with a regular expression match at the beginning
  84. * @param {RegExp} re
  85. * @param {string} lexeme
  86. */
  87. function startsWith(re, lexeme) {
  88. const match = re && re.exec(lexeme);
  89. return match && match.index === 0;
  90. }
  91. // BACKREF_RE matches an open parenthesis or backreference. To avoid
  92. // an incorrect parse, it additionally matches the following:
  93. // - [...] elements, where the meaning of parentheses and escapes change
  94. // - other escape sequences, so we do not misparse escape sequences as
  95. // interesting elements
  96. // - non-matching or lookahead parentheses, which do not capture. These
  97. // follow the '(' with a '?'.
  98. const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
  99. // **INTERNAL** Not intended for outside usage
  100. // join logically computes regexps.join(separator), but fixes the
  101. // backreferences so they continue to match.
  102. // it also places each individual regular expression into it's own
  103. // match group, keeping track of the sequencing of those match groups
  104. // is currently an exercise for the caller. :-)
  105. /**
  106. * @param {(string | RegExp)[]} regexps
  107. * @param {{joinWith: string}} opts
  108. * @returns {string}
  109. */
  110. function _rewriteBackreferences(regexps, { joinWith }) {
  111. let numCaptures = 0;
  112. return regexps.map((regex) => {
  113. numCaptures += 1;
  114. const offset = numCaptures;
  115. let re = source(regex);
  116. let out = '';
  117. while (re.length > 0) {
  118. const match = BACKREF_RE.exec(re);
  119. if (!match) {
  120. out += re;
  121. break;
  122. }
  123. out += re.substring(0, match.index);
  124. re = re.substring(match.index + match[0].length);
  125. if (match[0][0] === '\\' && match[1]) {
  126. // Adjust the backreference.
  127. out += '\\' + String(Number(match[1]) + offset);
  128. } else {
  129. out += match[0];
  130. if (match[0] === '(') {
  131. numCaptures++;
  132. }
  133. }
  134. }
  135. return out;
  136. }).map(re => `(${re})`).join(joinWith);
  137. }
  138. export { _rewriteBackreferences, anyNumberOfTimes, concat, countMatchGroups, either, escape, lookahead, optional, source, startsWith };