使用RegExp.exec从字符串中提取所有匹配项

我试图解析以下类型的字符串:

[key:"val" key2:"val2"]

其中有任意键:“val”对在里面。我想获取键名和值。对于那些好奇的人，我试图解析任务战士的数据库格式。

这是我的测试字符串:

[description:"aoeu" uuid:"123sth"]

这意味着除了空格之外，任何东西都可以放在键或值中，冒号周围没有空格，值总是在双引号中。

在node中，这是我的输出:

[deuteronomy][gatlin][~]$ node
> var re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g
> re.exec('[description:"aoeu" uuid:"123sth"]');
[ '[description:"aoeu" uuid:"123sth"]',
  'uuid',
  '123sth',
  index: 0,
  input: '[description:"aoeu" uuid:"123sth"]' ]

但是描述:“aoeu”也符合这个模式。我怎么能得到所有的比赛回来?

继续在循环中调用re.exec(s)以获取所有匹配项:

var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';
var m;

do {
    m = re.exec(s);
    if (m) {
        console.log(m[1], m[2]);
    }
} while (m);

试试这个JSFiddle: https://jsfiddle.net/7yS2V/

2011-06-12 18:06:05

要遍历所有匹配项，可以使用replace函数:

var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';

s.replace(re, function(match, g1, g2) { console.log(g1, g2); });

2012-07-12 01:35:19

这是一个解

var s = '[description:"aoeu" uuid:"123sth"]';

var re = /\s*([^[:]+):\"([^"]+)"/g;
var m;
while (m = re.exec(s)) {
  console.log(m[1], m[2]);
}

这是基于lawnsea的答案，但更简短。

注意，必须设置' g'标志，以便在多个调用之间向前移动内部指针。

2014-06-05 08:17:24

下面是我得到匹配的函数:

function getAllMatches(regex, text) {
    if (regex.constructor !== RegExp) {
        throw new Error('not RegExp');
    }

    var res = [];
    var match = null;

    if (regex.global) {
        while (match = regex.exec(text)) {
            res.push(match);
        }
    }
    else {
        if (match = regex.exec(text)) {
            res.push(match);
        }
    }

    return res;
}

// Example:

var regex = /abc|def|ghi/g;
var res = getAllMatches(regex, 'abcdefghi');

res.forEach(function (item) {
    console.log(item[0]);
});

2015-05-02 06:57:46

基于Agus的函数，但我更喜欢返回匹配值:

var bob = "&gt; bob &lt;";
function matchAll(str, regex) {
    var res = [];
    var m;
    if (regex.global) {
        while (m = regex.exec(str)) {
            res.push(m[1]);
        }
    } else {
        if (m = regex.exec(str)) {
            res.push(m[1]);
        }
    }
    return res;
}
var Amatch = matchAll(bob, /(&.*?;)/g);
console.log(Amatch);  // yeilds: [&gt;, &lt;]

2015-07-21 17:44:30

Str.match (pattern)，如果pattern具有全局标志g，将以数组形式返回所有匹配项。

例如:

const str = '除了@Emran， @Raju和@Noman，我们都在那里'; console.log ( str.match (/ @ \ w * / g) ）; // Will log ["@Emran"， "@Raju"， "@Noman"]

2017-04-25 22:35:45

以下是我的回答:

var str = '[me nombre es] : My name is. [Yo puedo] is the right word'; 

var reg = /\[(.*?)\]/g;

var a = str.match(reg);

a = a.toString().replace(/[\[\]]/g, "").split(','));

2017-07-03 06:24:13

如果你有ES9

(意思是如果你的系统:Chrome, Node.js, Firefox等支持Ecmascript 2019或更高版本) 使用新的yourString。matchAll(/your-regex/g)。

如果你没有ES9

如果您使用的是较旧的系统，这里有一个简单的复制和粘贴函数

function findAll(regexPattern, sourceString) {
    let output = []
    let match
    // auto-add global flag while keeping others as-is
    let regexPatternWithGlobal = RegExp(regexPattern,[...new Set("g"+regexPattern.flags)].join(""))
    while (match = regexPatternWithGlobal.exec(sourceString)) {
        // get rid of the string copy
        delete match.input
        // store the match data
        output.push(match)
    } 
    return output
}

使用示例:

console.log(   findAll(/blah/g,'blah1 blah2')   )

输出:

[ [ 'blah', index: 0 ], [ 'blah', index: 6 ] ]

2018-01-21 08:50:23

用这个……

var all_matches = your_string.match(re);
console.log(all_matches)

它将返回一个包含所有匹配项的数组…这很好.... 但是记住它不会考虑分组，它只会返回完整的匹配。

2018-03-11 11:05:40

可迭代对象更好:

const matches = (text, pattern) => ({
  [Symbol.iterator]: function * () {
    const clone = new RegExp(pattern.source, pattern.flags);
    let match = null;
    do {
      match = clone.exec(text);
      if (match) {
        yield match;
      }
    } while (match);
  }
});

循环中的用法:

for (const match of matches('abcdefabcdef', /ab/g)) {
  console.log(match);
}

或者如果你想要一个数组:

[ ...matches('abcdefabcdef', /ab/g) ]

2018-05-22 14:58:33

我强烈推荐使用String.match()函数，并为它创建一个相关的RegEx。我的例子是一个字符串列表，在扫描用户输入的关键字和短语时，这通常是必要的。

    // 1) Define keywords
    var keywords = ['apple', 'orange', 'banana'];

    // 2) Create regex, pass "i" for case-insensitive and "g" for global search
    regex = new RegExp("(" + keywords.join('|') + ")", "ig");
    => /(apple|orange|banana)/gi

    // 3) Match it against any string to get all matches 
    "Test string for ORANGE's or apples were mentioned".match(regex);
    => ["ORANGE", "apple"]

希望这能有所帮助!

2018-06-06 22:13:38

这并不能真正帮助你解决更复杂的问题，但我还是发布了这篇文章，因为对于那些不像你这样做全局搜索的人来说，这是一个简单的解决方案。

我简化了答案中的正则表达式，以使其更清晰(这不是您的确切问题的解决方案)。

var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);

// We only want the group matches in the array
function purify_regex(reResult){

  // Removes the Regex specific values and clones the array to prevent mutation
  let purifiedArray = [...reResult];

  // Removes the full match value at position 0
  purifiedArray.shift();

  // Returns a pure array without mutating the original regex result
  return purifiedArray;
}

// purifiedResult= ["description", "aoeu"]

这看起来比没有注释时更冗长，这是没有注释时的样子

var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);

function purify_regex(reResult){
  let purifiedArray = [...reResult];
  purifiedArray.shift();
  return purifiedArray;
}

注意，任何不匹配的组都将作为未定义的值列在数组中。

该解决方案使用ES6展开操作符来净化正则表达式特定值的数组。如果你想要IE11支持，你需要通过Babel来运行你的代码。

2018-07-10 12:06:45

str.match(/regex/g)

以数组形式返回所有匹配项。

如果出于某种神秘的原因，您需要exec附带的额外信息，作为前面答案的替代方案，您可以使用递归函数来代替循环，如下所示(这看起来也很酷:)。

function findMatches(regex, str, matches = []) {
   const res = regex.exec(str)
   res && matches.push(res) && findMatches(regex, str, matches)
   return matches
}

// Usage
const matches = findMatches(/regex/g, str)

正如前面的评论中所述，在regex定义的末尾加上g，以便在每次执行时将指针向前移动，这很重要。

2018-11-21 23:44:17

我们终于开始看到一个内置的matchAll函数，请参阅这里的描述和兼容性表。似乎到2020年5月，Chrome、Edge、Firefox和Node.js(12+)都被支持，但IE、Safari和Opera不支持。它似乎是在2018年12月起草的，所以给它一些时间来传播到所有浏览器，但我相信它会到达那里。

内置的matchAll函数很好，因为它返回一个可迭代对象。它还为每个匹配返回捕获组!你可以这样做

// get the letters before and after "o"
let matches = "stackoverflow".matchAll(/(\w)o(\w)/g);

for (match of matches) {
    console.log("letter before:" + match[1]);
    console.log("letter after:" + match[2]);
}

arrayOfAllMatches = [...matches]; // you can also turn the iterable into an array

似乎每个匹配对象都使用与match()相同的格式。因此，每个对象都是匹配和捕获组的数组，以及三个附加属性索引、输入和组。它看起来是这样的:

[<match>, <group1>, <group2>, ..., index: <match offset>, input: <original string>, groups: <named capture groups>]

有关matchAll的更多信息，还有一个谷歌开发人员页面。也有填料/垫片可用。

2019-04-08 12:09:30

从ES9开始，现在有一个更简单，更好的方法来获取所有的匹配，以及关于捕获组的信息，以及它们的索引:

const string = 'Mice like to dice rice';
const regex = /.ice/gu;
for(const match of string.matchAll(regex)) {
    console.log(match);
}

// ["mice"， index: 0, input: "mice like dice rice"， groups: 未定义) // ["dice"，索引:13，输入:"老鼠喜欢切米饭"，组:未定义) // ["rice"，索引:18，输入:"老鼠喜欢掷骰子。大米”，组别:未定义

目前支持Chrome, Firefox, Opera。根据您阅读本文的时间，检查此链接以查看其当前支持。

2019-05-09 07:05:25

这里有一个没有while循环的一行解决方案。

结果列表中保留该顺序。

潜在的缺点是

它为每个匹配复制正则表达式。结果与预期的解形式不同。你需要再处理一次。

let re = /\s*([^[:]+):\"([^"]+)"/g
let str = '[description:"aoeu" uuid:"123sth"]'

(str.match(re) || []).map(e => RegExp(re.source, re.flags).exec(e))


[ [ 'description:"aoeu"',
    'description',
    'aoeu',
    index: 0,
    input: 'description:"aoeu"',
    groups: undefined ],
  [ ' uuid:"123sth"',
    'uuid',
    '123sth',
    index: 0,
    input: ' uuid:"123sth"',
    groups: undefined ] ]

2019-05-23 02:07:05

我的猜测是，如果有边界情况，比如额外的或缺失的空格，这种边界更少的表达式也可能是一种选择:

^\s*\[\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*\]\s*$

如果你想探索/简化/修改这个表达式，它已经被在右上方的面板上有解释 regex101.com。如果你愿意，你可以还能在这看吗链接，如何匹配对照一些样本输入。

Test

RegEx电路

jex。Im可视化正则表达式:

2019-08-11 19:37:22

如果你能够使用matchAll，这里有一个技巧:

数组中。From有一个“选择器”参数，这样你就不会得到一个尴尬的“匹配”结果数组，你可以把它投射到你真正需要的东西上:

Array.from(str.matchAll(regexp), m => m[0]);

如果你已经命名了组。(/(?<firstname>[a-z][a-z] +)/g)你可以这样做:

Array.from(str.matchAll(regexp), m => m.groups.firstName);

2020-12-22 20:57:17

Const re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g Const匹配=[…re.]exec(“[描述:“aoeu”uuid:“123…”)”).entries ()) console.log(匹配) 基本上，这是ES6将exec返回的Iterator转换为常规数组的方法

2023-02-06 17:57:06

使用RegExp.exec从字符串中提取所有匹配项

推荐文章

最新文章

标签