aboutsummaryrefslogtreecommitdiff
path: root/node_modules/ret/lib
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/ret/lib')
-rw-r--r--node_modules/ret/lib/index.js282
-rw-r--r--node_modules/ret/lib/positions.js17
-rw-r--r--node_modules/ret/lib/sets.js82
-rw-r--r--node_modules/ret/lib/types.js10
-rw-r--r--node_modules/ret/lib/util.js111
5 files changed, 502 insertions, 0 deletions
diff --git a/node_modules/ret/lib/index.js b/node_modules/ret/lib/index.js
new file mode 100644
index 0000000..0e151c3
--- /dev/null
+++ b/node_modules/ret/lib/index.js
@@ -0,0 +1,282 @@
+var util = require('./util');
+var types = require('./types');
+var sets = require('./sets');
+var positions = require('./positions');
+
+
+module.exports = function(regexpStr) {
+ var i = 0, l, c,
+ start = { type: types.ROOT, stack: []},
+
+ // Keep track of last clause/group and stack.
+ lastGroup = start,
+ last = start.stack,
+ groupStack = [];
+
+
+ var repeatErr = function(i) {
+ util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));
+ };
+
+ // Decode a few escaped characters.
+ var str = util.strToChars(regexpStr);
+ l = str.length;
+
+ // Iterate through each character in string.
+ while (i < l) {
+ c = str[i++];
+
+ switch (c) {
+ // Handle escaped characters, inclues a few sets.
+ case '\\':
+ c = str[i++];
+
+ switch (c) {
+ case 'b':
+ last.push(positions.wordBoundary());
+ break;
+
+ case 'B':
+ last.push(positions.nonWordBoundary());
+ break;
+
+ case 'w':
+ last.push(sets.words());
+ break;
+
+ case 'W':
+ last.push(sets.notWords());
+ break;
+
+ case 'd':
+ last.push(sets.ints());
+ break;
+
+ case 'D':
+ last.push(sets.notInts());
+ break;
+
+ case 's':
+ last.push(sets.whitespace());
+ break;
+
+ case 'S':
+ last.push(sets.notWhitespace());
+ break;
+
+ default:
+ // Check if c is integer.
+ // In which case it's a reference.
+ if (/\d/.test(c)) {
+ last.push({ type: types.REFERENCE, value: parseInt(c, 10) });
+
+ // Escaped character.
+ } else {
+ last.push({ type: types.CHAR, value: c.charCodeAt(0) });
+ }
+ }
+
+ break;
+
+
+ // Positionals.
+ case '^':
+ last.push(positions.begin());
+ break;
+
+ case '$':
+ last.push(positions.end());
+ break;
+
+
+ // Handle custom sets.
+ case '[':
+ // Check if this class is 'anti' i.e. [^abc].
+ var not;
+ if (str[i] === '^') {
+ not = true;
+ i++;
+ } else {
+ not = false;
+ }
+
+ // Get all the characters in class.
+ var classTokens = util.tokenizeClass(str.slice(i), regexpStr);
+
+ // Increase index by length of class.
+ i += classTokens[1];
+ last.push({
+ type: types.SET,
+ set: classTokens[0],
+ not: not,
+ });
+
+ break;
+
+
+ // Class of any character except \n.
+ case '.':
+ last.push(sets.anyChar());
+ break;
+
+
+ // Push group onto stack.
+ case '(':
+ // Create group.
+ var group = {
+ type: types.GROUP,
+ stack: [],
+ remember: true,
+ };
+
+ c = str[i];
+
+ // If if this is a special kind of group.
+ if (c === '?') {
+ c = str[i + 1];
+ i += 2;
+
+ // Match if followed by.
+ if (c === '=') {
+ group.followedBy = true;
+
+ // Match if not followed by.
+ } else if (c === '!') {
+ group.notFollowedBy = true;
+
+ } else if (c !== ':') {
+ util.error(regexpStr,
+ 'Invalid group, character \'' + c +
+ '\' after \'?\' at column ' + (i - 1));
+ }
+
+ group.remember = false;
+ }
+
+ // Insert subgroup into current group stack.
+ last.push(group);
+
+ // Remember the current group for when the group closes.
+ groupStack.push(lastGroup);
+
+ // Make this new group the current group.
+ lastGroup = group;
+ last = group.stack;
+ break;
+
+
+ // Pop group out of stack.
+ case ')':
+ if (groupStack.length === 0) {
+ util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));
+ }
+ lastGroup = groupStack.pop();
+
+ // Check if this group has a PIPE.
+ // To get back the correct last stack.
+ last = lastGroup.options ?
+ lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;
+ break;
+
+
+ // Use pipe character to give more choices.
+ case '|':
+ // Create array where options are if this is the first PIPE
+ // in this clause.
+ if (!lastGroup.options) {
+ lastGroup.options = [lastGroup.stack];
+ delete lastGroup.stack;
+ }
+
+ // Create a new stack and add to options for rest of clause.
+ var stack = [];
+ lastGroup.options.push(stack);
+ last = stack;
+ break;
+
+
+ // Repetition.
+ // For every repetition, remove last element from last stack
+ // then insert back a RANGE object.
+ // This design is chosen because there could be more than
+ // one repetition symbols in a regex i.e. `a?+{2,3}`.
+ case '{':
+ var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
+ if (rs !== null) {
+ if (last.length === 0) {
+ repeatErr(i);
+ }
+ min = parseInt(rs[1], 10);
+ max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
+ i += rs[0].length;
+
+ last.push({
+ type: types.REPETITION,
+ min: min,
+ max: max,
+ value: last.pop(),
+ });
+ } else {
+ last.push({
+ type: types.CHAR,
+ value: 123,
+ });
+ }
+ break;
+
+ case '?':
+ if (last.length === 0) {
+ repeatErr(i);
+ }
+ last.push({
+ type: types.REPETITION,
+ min: 0,
+ max: 1,
+ value: last.pop(),
+ });
+ break;
+
+ case '+':
+ if (last.length === 0) {
+ repeatErr(i);
+ }
+ last.push({
+ type: types.REPETITION,
+ min: 1,
+ max: Infinity,
+ value: last.pop(),
+ });
+ break;
+
+ case '*':
+ if (last.length === 0) {
+ repeatErr(i);
+ }
+ last.push({
+ type: types.REPETITION,
+ min: 0,
+ max: Infinity,
+ value: last.pop(),
+ });
+ break;
+
+
+ // Default is a character that is not `\[](){}?+*^$`.
+ default:
+ last.push({
+ type: types.CHAR,
+ value: c.charCodeAt(0),
+ });
+ }
+
+ }
+
+ // Check if any groups have not been closed.
+ if (groupStack.length !== 0) {
+ util.error(regexpStr, 'Unterminated group');
+ }
+
+ return start;
+};
+
+module.exports.types = types;
diff --git a/node_modules/ret/lib/positions.js b/node_modules/ret/lib/positions.js
new file mode 100644
index 0000000..80677ee
--- /dev/null
+++ b/node_modules/ret/lib/positions.js
@@ -0,0 +1,17 @@
+var types = require('./types');
+
+exports.wordBoundary = function() {
+ return { type: types.POSITION, value: 'b' };
+};
+
+exports.nonWordBoundary = function() {
+ return { type: types.POSITION, value: 'B' };
+};
+
+exports.begin = function() {
+ return { type: types.POSITION, value: '^' };
+};
+
+exports.end = function() {
+ return { type: types.POSITION, value: '$' };
+};
diff --git a/node_modules/ret/lib/sets.js b/node_modules/ret/lib/sets.js
new file mode 100644
index 0000000..5fb6be5
--- /dev/null
+++ b/node_modules/ret/lib/sets.js
@@ -0,0 +1,82 @@
+var types = require('./types');
+
+var INTS = function() {
+ return [{ type: types.RANGE , from: 48, to: 57 }];
+};
+
+var WORDS = function() {
+ return [
+ { type: types.CHAR, value: 95 },
+ { type: types.RANGE, from: 97, to: 122 },
+ { type: types.RANGE, from: 65, to: 90 }
+ ].concat(INTS());
+};
+
+var WHITESPACE = function() {
+ return [
+ { type: types.CHAR, value: 9 },
+ { type: types.CHAR, value: 10 },
+ { type: types.CHAR, value: 11 },
+ { type: types.CHAR, value: 12 },
+ { type: types.CHAR, value: 13 },
+ { type: types.CHAR, value: 32 },
+ { type: types.CHAR, value: 160 },
+ { type: types.CHAR, value: 5760 },
+ { type: types.CHAR, value: 6158 },
+ { type: types.CHAR, value: 8192 },
+ { type: types.CHAR, value: 8193 },
+ { type: types.CHAR, value: 8194 },
+ { type: types.CHAR, value: 8195 },
+ { type: types.CHAR, value: 8196 },
+ { type: types.CHAR, value: 8197 },
+ { type: types.CHAR, value: 8198 },
+ { type: types.CHAR, value: 8199 },
+ { type: types.CHAR, value: 8200 },
+ { type: types.CHAR, value: 8201 },
+ { type: types.CHAR, value: 8202 },
+ { type: types.CHAR, value: 8232 },
+ { type: types.CHAR, value: 8233 },
+ { type: types.CHAR, value: 8239 },
+ { type: types.CHAR, value: 8287 },
+ { type: types.CHAR, value: 12288 },
+ { type: types.CHAR, value: 65279 }
+ ];
+};
+
+var NOTANYCHAR = function() {
+ return [
+ { type: types.CHAR, value: 10 },
+ { type: types.CHAR, value: 13 },
+ { type: types.CHAR, value: 8232 },
+ { type: types.CHAR, value: 8233 },
+ ];
+};
+
+// Predefined class objects.
+exports.words = function() {
+ return { type: types.SET, set: WORDS(), not: false };
+};
+
+exports.notWords = function() {
+ return { type: types.SET, set: WORDS(), not: true };
+};
+
+exports.ints = function() {
+ return { type: types.SET, set: INTS(), not: false };
+};
+
+exports.notInts = function() {
+ return { type: types.SET, set: INTS(), not: true };
+};
+
+exports.whitespace = function() {
+ return { type: types.SET, set: WHITESPACE(), not: false };
+};
+
+exports.notWhitespace = function() {
+ return { type: types.SET, set: WHITESPACE(), not: true };
+};
+
+exports.anyChar = function() {
+ return { type: types.SET, set: NOTANYCHAR(), not: true };
+};
diff --git a/node_modules/ret/lib/types.js b/node_modules/ret/lib/types.js
new file mode 100644
index 0000000..9484145
--- /dev/null
+++ b/node_modules/ret/lib/types.js
@@ -0,0 +1,10 @@
+module.exports = {
+ ROOT : 0,
+ GROUP : 1,
+ POSITION : 2,
+ SET : 3,
+ RANGE : 4,
+ REPETITION : 5,
+ REFERENCE : 6,
+ CHAR : 7,
+};
diff --git a/node_modules/ret/lib/util.js b/node_modules/ret/lib/util.js
new file mode 100644
index 0000000..97d8cf5
--- /dev/null
+++ b/node_modules/ret/lib/util.js
@@ -0,0 +1,111 @@
+var types = require('./types');
+var sets = require('./sets');
+
+
+// All of these are private and only used by randexp.
+// It's assumed that they will always be called with the correct input.
+
+var CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?';
+var SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 };
+
+/**
+ * Finds character representations in str and convert all to
+ * their respective characters
+ *
+ * @param {String} str
+ * @return {String}
+ */
+exports.strToChars = function(str) {
+ /* jshint maxlen: false */
+ var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z\[\\\]\^?])|([0tnvfr]))/g;
+ str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) {
+ if (lbs) {
+ return s;
+ }
+
+ var code = b ? 8 :
+ a16 ? parseInt(a16, 16) :
+ b16 ? parseInt(b16, 16) :
+ c8 ? parseInt(c8, 8) :
+ dctrl ? CTRL.indexOf(dctrl) :
+ SLSH[eslsh];
+
+ var c = String.fromCharCode(code);
+
+ // Escape special regex characters.
+ if (/[\[\]{}\^$.|?*+()]/.test(c)) {
+ c = '\\' + c;
+ }
+
+ return c;
+ });
+
+ return str;
+};
+
+
+/**
+ * turns class into tokens
+ * reads str until it encounters a ] not preceeded by a \
+ *
+ * @param {String} str
+ * @param {String} regexpStr
+ * @return {Array.<Array.<Object>, Number>}
+ */
+exports.tokenizeClass = function(str, regexpStr) {
+ /* jshint maxlen: false */
+ var tokens = [];
+ var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?(.)/g;
+ var rs, c;
+
+
+ while ((rs = regexp.exec(str)) != null) {
+ if (rs[1]) {
+ tokens.push(sets.words());
+
+ } else if (rs[2]) {
+ tokens.push(sets.ints());
+
+ } else if (rs[3]) {
+ tokens.push(sets.whitespace());
+
+ } else if (rs[4]) {
+ tokens.push(sets.notWords());
+
+ } else if (rs[5]) {
+ tokens.push(sets.notInts());
+
+ } else if (rs[6]) {
+ tokens.push(sets.notWhitespace());
+
+ } else if (rs[7]) {
+ tokens.push({
+ type: types.RANGE,
+ from: (rs[8] || rs[9]).charCodeAt(0),
+ to: rs[10].charCodeAt(0),
+ });
+
+ } else if (c = rs[12]) {
+ tokens.push({
+ type: types.CHAR,
+ value: c.charCodeAt(0),
+ });
+
+ } else {
+ return [tokens, regexp.lastIndex];
+ }
+ }
+
+ exports.error(regexpStr, 'Unterminated character class');
+};
+
+
+/**
+ * Shortcut to throw errors.
+ *
+ * @param {String} regexp
+ * @param {String} msg
+ */
+exports.error = function(regexp, msg) {
+ throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg);
+};