diff options
Diffstat (limited to 'node_modules/ret/lib')
-rw-r--r-- | node_modules/ret/lib/index.js | 282 | ||||
-rw-r--r-- | node_modules/ret/lib/positions.js | 17 | ||||
-rw-r--r-- | node_modules/ret/lib/sets.js | 82 | ||||
-rw-r--r-- | node_modules/ret/lib/types.js | 10 | ||||
-rw-r--r-- | node_modules/ret/lib/util.js | 111 |
5 files changed, 502 insertions, 0 deletions
diff --git a/node_modules/ret/lib/index.js b/node_modules/ret/lib/index.js new file mode 100644 index 0000000..0e151c3 --- /dev/null +++ b/node_modules/ret/lib/index.js @@ -0,0 +1,282 @@ +var util = require('./util'); +var types = require('./types'); +var sets = require('./sets'); +var positions = require('./positions'); + + +module.exports = function(regexpStr) { + var i = 0, l, c, + start = { type: types.ROOT, stack: []}, + + // Keep track of last clause/group and stack. + lastGroup = start, + last = start.stack, + groupStack = []; + + + var repeatErr = function(i) { + util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); + }; + + // Decode a few escaped characters. + var str = util.strToChars(regexpStr); + l = str.length; + + // Iterate through each character in string. + while (i < l) { + c = str[i++]; + + switch (c) { + // Handle escaped characters, inclues a few sets. + case '\\': + c = str[i++]; + + switch (c) { + case 'b': + last.push(positions.wordBoundary()); + break; + + case 'B': + last.push(positions.nonWordBoundary()); + break; + + case 'w': + last.push(sets.words()); + break; + + case 'W': + last.push(sets.notWords()); + break; + + case 'd': + last.push(sets.ints()); + break; + + case 'D': + last.push(sets.notInts()); + break; + + case 's': + last.push(sets.whitespace()); + break; + + case 'S': + last.push(sets.notWhitespace()); + break; + + default: + // Check if c is integer. + // In which case it's a reference. + if (/\d/.test(c)) { + last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); + + // Escaped character. + } else { + last.push({ type: types.CHAR, value: c.charCodeAt(0) }); + } + } + + break; + + + // Positionals. + case '^': + last.push(positions.begin()); + break; + + case '$': + last.push(positions.end()); + break; + + + // Handle custom sets. + case '[': + // Check if this class is 'anti' i.e. [^abc]. + var not; + if (str[i] === '^') { + not = true; + i++; + } else { + not = false; + } + + // Get all the characters in class. + var classTokens = util.tokenizeClass(str.slice(i), regexpStr); + + // Increase index by length of class. + i += classTokens[1]; + last.push({ + type: types.SET, + set: classTokens[0], + not: not, + }); + + break; + + + // Class of any character except \n. + case '.': + last.push(sets.anyChar()); + break; + + + // Push group onto stack. + case '(': + // Create group. + var group = { + type: types.GROUP, + stack: [], + remember: true, + }; + + c = str[i]; + + // If if this is a special kind of group. + if (c === '?') { + c = str[i + 1]; + i += 2; + + // Match if followed by. + if (c === '=') { + group.followedBy = true; + + // Match if not followed by. + } else if (c === '!') { + group.notFollowedBy = true; + + } else if (c !== ':') { + util.error(regexpStr, + 'Invalid group, character \'' + c + + '\' after \'?\' at column ' + (i - 1)); + } + + group.remember = false; + } + + // Insert subgroup into current group stack. + last.push(group); + + // Remember the current group for when the group closes. + groupStack.push(lastGroup); + + // Make this new group the current group. + lastGroup = group; + last = group.stack; + break; + + + // Pop group out of stack. + case ')': + if (groupStack.length === 0) { + util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); + } + lastGroup = groupStack.pop(); + + // Check if this group has a PIPE. + // To get back the correct last stack. + last = lastGroup.options ? + lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; + break; + + + // Use pipe character to give more choices. + case '|': + // Create array where options are if this is the first PIPE + // in this clause. + if (!lastGroup.options) { + lastGroup.options = [lastGroup.stack]; + delete lastGroup.stack; + } + + // Create a new stack and add to options for rest of clause. + var stack = []; + lastGroup.options.push(stack); + last = stack; + break; + + + // Repetition. + // For every repetition, remove last element from last stack + // then insert back a RANGE object. + // This design is chosen because there could be more than + // one repetition symbols in a regex i.e. `a?+{2,3}`. + case '{': + var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; + if (rs !== null) { + if (last.length === 0) { + repeatErr(i); + } + min = parseInt(rs[1], 10); + max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; + i += rs[0].length; + + last.push({ + type: types.REPETITION, + min: min, + max: max, + value: last.pop(), + }); + } else { + last.push({ + type: types.CHAR, + value: 123, + }); + } + break; + + case '?': + if (last.length === 0) { + repeatErr(i); + } + last.push({ + type: types.REPETITION, + min: 0, + max: 1, + value: last.pop(), + }); + break; + + case '+': + if (last.length === 0) { + repeatErr(i); + } + last.push({ + type: types.REPETITION, + min: 1, + max: Infinity, + value: last.pop(), + }); + break; + + case '*': + if (last.length === 0) { + repeatErr(i); + } + last.push({ + type: types.REPETITION, + min: 0, + max: Infinity, + value: last.pop(), + }); + break; + + + // Default is a character that is not `\[](){}?+*^$`. + default: + last.push({ + type: types.CHAR, + value: c.charCodeAt(0), + }); + } + + } + + // Check if any groups have not been closed. + if (groupStack.length !== 0) { + util.error(regexpStr, 'Unterminated group'); + } + + return start; +}; + +module.exports.types = types; diff --git a/node_modules/ret/lib/positions.js b/node_modules/ret/lib/positions.js new file mode 100644 index 0000000..80677ee --- /dev/null +++ b/node_modules/ret/lib/positions.js @@ -0,0 +1,17 @@ +var types = require('./types'); + +exports.wordBoundary = function() { + return { type: types.POSITION, value: 'b' }; +}; + +exports.nonWordBoundary = function() { + return { type: types.POSITION, value: 'B' }; +}; + +exports.begin = function() { + return { type: types.POSITION, value: '^' }; +}; + +exports.end = function() { + return { type: types.POSITION, value: '$' }; +}; diff --git a/node_modules/ret/lib/sets.js b/node_modules/ret/lib/sets.js new file mode 100644 index 0000000..5fb6be5 --- /dev/null +++ b/node_modules/ret/lib/sets.js @@ -0,0 +1,82 @@ +var types = require('./types'); + +var INTS = function() { + return [{ type: types.RANGE , from: 48, to: 57 }]; +}; + +var WORDS = function() { + return [ + { type: types.CHAR, value: 95 }, + { type: types.RANGE, from: 97, to: 122 }, + { type: types.RANGE, from: 65, to: 90 } + ].concat(INTS()); +}; + +var WHITESPACE = function() { + return [ + { type: types.CHAR, value: 9 }, + { type: types.CHAR, value: 10 }, + { type: types.CHAR, value: 11 }, + { type: types.CHAR, value: 12 }, + { type: types.CHAR, value: 13 }, + { type: types.CHAR, value: 32 }, + { type: types.CHAR, value: 160 }, + { type: types.CHAR, value: 5760 }, + { type: types.CHAR, value: 6158 }, + { type: types.CHAR, value: 8192 }, + { type: types.CHAR, value: 8193 }, + { type: types.CHAR, value: 8194 }, + { type: types.CHAR, value: 8195 }, + { type: types.CHAR, value: 8196 }, + { type: types.CHAR, value: 8197 }, + { type: types.CHAR, value: 8198 }, + { type: types.CHAR, value: 8199 }, + { type: types.CHAR, value: 8200 }, + { type: types.CHAR, value: 8201 }, + { type: types.CHAR, value: 8202 }, + { type: types.CHAR, value: 8232 }, + { type: types.CHAR, value: 8233 }, + { type: types.CHAR, value: 8239 }, + { type: types.CHAR, value: 8287 }, + { type: types.CHAR, value: 12288 }, + { type: types.CHAR, value: 65279 } + ]; +}; + +var NOTANYCHAR = function() { + return [ + { type: types.CHAR, value: 10 }, + { type: types.CHAR, value: 13 }, + { type: types.CHAR, value: 8232 }, + { type: types.CHAR, value: 8233 }, + ]; +}; + +// Predefined class objects. +exports.words = function() { + return { type: types.SET, set: WORDS(), not: false }; +}; + +exports.notWords = function() { + return { type: types.SET, set: WORDS(), not: true }; +}; + +exports.ints = function() { + return { type: types.SET, set: INTS(), not: false }; +}; + +exports.notInts = function() { + return { type: types.SET, set: INTS(), not: true }; +}; + +exports.whitespace = function() { + return { type: types.SET, set: WHITESPACE(), not: false }; +}; + +exports.notWhitespace = function() { + return { type: types.SET, set: WHITESPACE(), not: true }; +}; + +exports.anyChar = function() { + return { type: types.SET, set: NOTANYCHAR(), not: true }; +}; diff --git a/node_modules/ret/lib/types.js b/node_modules/ret/lib/types.js new file mode 100644 index 0000000..9484145 --- /dev/null +++ b/node_modules/ret/lib/types.js @@ -0,0 +1,10 @@ +module.exports = { + ROOT : 0, + GROUP : 1, + POSITION : 2, + SET : 3, + RANGE : 4, + REPETITION : 5, + REFERENCE : 6, + CHAR : 7, +}; diff --git a/node_modules/ret/lib/util.js b/node_modules/ret/lib/util.js new file mode 100644 index 0000000..97d8cf5 --- /dev/null +++ b/node_modules/ret/lib/util.js @@ -0,0 +1,111 @@ +var types = require('./types'); +var sets = require('./sets'); + + +// All of these are private and only used by randexp. +// It's assumed that they will always be called with the correct input. + +var CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?'; +var SLSH = { '0': 0, 't': 9, 'n': 10, 'v': 11, 'f': 12, 'r': 13 }; + +/** + * Finds character representations in str and convert all to + * their respective characters + * + * @param {String} str + * @return {String} + */ +exports.strToChars = function(str) { + /* jshint maxlen: false */ + var chars_regex = /(\[\\b\])|(\\)?\\(?:u([A-F0-9]{4})|x([A-F0-9]{2})|(0?[0-7]{2})|c([@A-Z\[\\\]\^?])|([0tnvfr]))/g; + str = str.replace(chars_regex, function(s, b, lbs, a16, b16, c8, dctrl, eslsh) { + if (lbs) { + return s; + } + + var code = b ? 8 : + a16 ? parseInt(a16, 16) : + b16 ? parseInt(b16, 16) : + c8 ? parseInt(c8, 8) : + dctrl ? CTRL.indexOf(dctrl) : + SLSH[eslsh]; + + var c = String.fromCharCode(code); + + // Escape special regex characters. + if (/[\[\]{}\^$.|?*+()]/.test(c)) { + c = '\\' + c; + } + + return c; + }); + + return str; +}; + + +/** + * turns class into tokens + * reads str until it encounters a ] not preceeded by a \ + * + * @param {String} str + * @param {String} regexpStr + * @return {Array.<Array.<Object>, Number>} + */ +exports.tokenizeClass = function(str, regexpStr) { + /* jshint maxlen: false */ + var tokens = []; + var regexp = /\\(?:(w)|(d)|(s)|(W)|(D)|(S))|((?:(?:\\)(.)|([^\]\\]))-(?:\\)?([^\]]))|(\])|(?:\\)?(.)/g; + var rs, c; + + + while ((rs = regexp.exec(str)) != null) { + if (rs[1]) { + tokens.push(sets.words()); + + } else if (rs[2]) { + tokens.push(sets.ints()); + + } else if (rs[3]) { + tokens.push(sets.whitespace()); + + } else if (rs[4]) { + tokens.push(sets.notWords()); + + } else if (rs[5]) { + tokens.push(sets.notInts()); + + } else if (rs[6]) { + tokens.push(sets.notWhitespace()); + + } else if (rs[7]) { + tokens.push({ + type: types.RANGE, + from: (rs[8] || rs[9]).charCodeAt(0), + to: rs[10].charCodeAt(0), + }); + + } else if (c = rs[12]) { + tokens.push({ + type: types.CHAR, + value: c.charCodeAt(0), + }); + + } else { + return [tokens, regexp.lastIndex]; + } + } + + exports.error(regexpStr, 'Unterminated character class'); +}; + + +/** + * Shortcut to throw errors. + * + * @param {String} regexp + * @param {String} msg + */ +exports.error = function(regexp, msg) { + throw new SyntaxError('Invalid regular expression: /' + regexp + '/: ' + msg); +}; |