var util = require('./util'); var types = require('./types'); var sets = require('./sets'); var positions = require('./positions'); module.exports = function(regexpStr) { var i = 0, l, c, start = { type: types.ROOT, stack: []}, // Keep track of last clause/group and stack. lastGroup = start, last = start.stack, groupStack = []; var repeatErr = function(i) { util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); }; // Decode a few escaped characters. var str = util.strToChars(regexpStr); l = str.length; // Iterate through each character in string. while (i < l) { c = str[i++]; switch (c) { // Handle escaped characters, inclues a few sets. case '\\': c = str[i++]; switch (c) { case 'b': last.push(positions.wordBoundary()); break; case 'B': last.push(positions.nonWordBoundary()); break; case 'w': last.push(sets.words()); break; case 'W': last.push(sets.notWords()); break; case 'd': last.push(sets.ints()); break; case 'D': last.push(sets.notInts()); break; case 's': last.push(sets.whitespace()); break; case 'S': last.push(sets.notWhitespace()); break; default: // Check if c is integer. // In which case it's a reference. if (/\d/.test(c)) { last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); // Escaped character. } else { last.push({ type: types.CHAR, value: c.charCodeAt(0) }); } } break; // Positionals. case '^': last.push(positions.begin()); break; case '$': last.push(positions.end()); break; // Handle custom sets. case '[': // Check if this class is 'anti' i.e. [^abc]. var not; if (str[i] === '^') { not = true; i++; } else { not = false; } // Get all the characters in class. var classTokens = util.tokenizeClass(str.slice(i), regexpStr); // Increase index by length of class. i += classTokens[1]; last.push({ type: types.SET, set: classTokens[0], not: not, }); break; // Class of any character except \n. case '.': last.push(sets.anyChar()); break; // Push group onto stack. case '(': // Create group. var group = { type: types.GROUP, stack: [], remember: true, }; c = str[i]; // If if this is a special kind of group. if (c === '?') { c = str[i + 1]; i += 2; // Match if followed by. if (c === '=') { group.followedBy = true; // Match if not followed by. } else if (c === '!') { group.notFollowedBy = true; } else if (c !== ':') { util.error(regexpStr, 'Invalid group, character \'' + c + '\' after \'?\' at column ' + (i - 1)); } group.remember = false; } // Insert subgroup into current group stack. last.push(group); // Remember the current group for when the group closes. groupStack.push(lastGroup); // Make this new group the current group. lastGroup = group; last = group.stack; break; // Pop group out of stack. case ')': if (groupStack.length === 0) { util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); } lastGroup = groupStack.pop(); // Check if this group has a PIPE. // To get back the correct last stack. last = lastGroup.options ? lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; break; // Use pipe character to give more choices. case '|': // Create array where options are if this is the first PIPE // in this clause. if (!lastGroup.options) { lastGroup.options = [lastGroup.stack]; delete lastGroup.stack; } // Create a new stack and add to options for rest of clause. var stack = []; lastGroup.options.push(stack); last = stack; break; // Repetition. // For every repetition, remove last element from last stack // then insert back a RANGE object. // This design is chosen because there could be more than // one repetition symbols in a regex i.e. `a?+{2,3}`. case '{': var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; if (rs !== null) { if (last.length === 0) { repeatErr(i); } min = parseInt(rs[1], 10); max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; i += rs[0].length; last.push({ type: types.REPETITION, min: min, max: max, value: last.pop(), }); } else { last.push({ type: types.CHAR, value: 123, }); } break; case '?': if (last.length === 0) { repeatErr(i); } last.push({ type: types.REPETITION, min: 0, max: 1, value: last.pop(), }); break; case '+': if (last.length === 0) { repeatErr(i); } last.push({ type: types.REPETITION, min: 1, max: Infinity, value: last.pop(), }); break; case '*': if (last.length === 0) { repeatErr(i); } last.push({ type: types.REPETITION, min: 0, max: Infinity, value: last.pop(), }); break; // Default is a character that is not `\[](){}?+*^$`. default: last.push({ type: types.CHAR, value: c.charCodeAt(0), }); } } // Check if any groups have not been closed. if (groupStack.length !== 0) { util.error(regexpStr, 'Unterminated group'); } return start; }; module.exports.types = types;