我有一个这样的字符串:
this is "a test"
我试图在Python中写一些东西,通过空格分割它,同时忽略引号中的空格。我想要的结果是:
['this', 'is', 'a test']
PS,我知道你会问“如果引号中有引号会发生什么,在我的应用程序中,这永远不会发生。
我有一个这样的字符串:
this is "a test"
我试图在Python中写一些东西,通过空格分割它,同时忽略引号中的空格。我想要的结果是:
['this', 'is', 'a test']
PS,我知道你会问“如果引号中有引号会发生什么,在我的应用程序中,这永远不会发生。
当前回答
被接受的shlex方法的主要问题是它不会忽略带引号的子字符串外的转义字符,并且在某些极端情况下会给出稍微出乎意料的结果。
我有下面的用例,其中我需要一个split函数来分割输入字符串,以便保留单引号或双引号子字符串,并能够在这样的子字符串中转义引号。未加引号的字符串中的引号不应与任何其他字符区别对待。一些带有预期输出的示例测试用例:
input string | expected output =============================================== 'abc def' | ['abc', 'def'] "abc \\s def" | ['abc', '\\s', 'def'] '"abc def" ghi' | ['abc def', 'ghi'] "'abc def' ghi" | ['abc def', 'ghi'] '"abc \\" def" ghi' | ['abc " def', 'ghi'] "'abc \\' def' ghi" | ["abc ' def", 'ghi'] "'abc \\s def' ghi" | ['abc \\s def', 'ghi'] '"abc \\s def" ghi' | ['abc \\s def', 'ghi'] '"" test' | ['', 'test'] "'' test" | ['', 'test'] "abc'def" | ["abc'def"] "abc'def'" | ["abc'def'"] "abc'def' ghi" | ["abc'def'", 'ghi'] "abc'def'ghi" | ["abc'def'ghi"] 'abc"def' | ['abc"def'] 'abc"def"' | ['abc"def"'] 'abc"def" ghi' | ['abc"def"', 'ghi'] 'abc"def"ghi' | ['abc"def"ghi'] "r'AA' r'.*_xyz$'" | ["r'AA'", "r'.*_xyz$'"] 'abc"def ghi"' | ['abc"def ghi"'] 'abc"def ghi""jkl"' | ['abc"def ghi""jkl"'] 'a"b c"d"e"f"g h"' | ['a"b c"d"e"f"g h"'] 'c="ls /" type key' | ['c="ls /"', 'type', 'key'] "abc'def ghi'" | ["abc'def ghi'"] "c='ls /' type key" | ["c='ls /'", 'type', 'key']
我最终使用以下函数来分割字符串,以便所有输入字符串的预期输出结果:
import re
def quoted_split(s):
def strip_quotes(s):
if s and (s[0] == '"' or s[0] == "'") and s[0] == s[-1]:
return s[1:-1]
return s
return [strip_quotes(p).replace('\\"', '"').replace("\\'", "'") \
for p in re.findall(r'(?:[^"\s]*"(?:\\.|[^"])*"[^"\s]*)+|(?:[^\'\s]*\'(?:\\.|[^\'])*\'[^\'\s]*)+|[^\s]+', s)]
这可不漂亮;但它确实有效。下面的测试应用程序检查其他方法(目前是shlex和csv)和自定义分割实现的结果:
#!/bin/python2.7
import csv
import re
import shlex
from timeit import timeit
def test_case(fn, s, expected):
try:
if fn(s) == expected:
print '[ OK ] %s -> %s' % (s, fn(s))
else:
print '[FAIL] %s -> %s' % (s, fn(s))
except Exception as e:
print '[FAIL] %s -> exception: %s' % (s, e)
def test_case_no_output(fn, s, expected):
try:
fn(s)
except:
pass
def test_split(fn, test_case_fn=test_case):
test_case_fn(fn, 'abc def', ['abc', 'def'])
test_case_fn(fn, "abc \\s def", ['abc', '\\s', 'def'])
test_case_fn(fn, '"abc def" ghi', ['abc def', 'ghi'])
test_case_fn(fn, "'abc def' ghi", ['abc def', 'ghi'])
test_case_fn(fn, '"abc \\" def" ghi', ['abc " def', 'ghi'])
test_case_fn(fn, "'abc \\' def' ghi", ["abc ' def", 'ghi'])
test_case_fn(fn, "'abc \\s def' ghi", ['abc \\s def', 'ghi'])
test_case_fn(fn, '"abc \\s def" ghi', ['abc \\s def', 'ghi'])
test_case_fn(fn, '"" test', ['', 'test'])
test_case_fn(fn, "'' test", ['', 'test'])
test_case_fn(fn, "abc'def", ["abc'def"])
test_case_fn(fn, "abc'def'", ["abc'def'"])
test_case_fn(fn, "abc'def' ghi", ["abc'def'", 'ghi'])
test_case_fn(fn, "abc'def'ghi", ["abc'def'ghi"])
test_case_fn(fn, 'abc"def', ['abc"def'])
test_case_fn(fn, 'abc"def"', ['abc"def"'])
test_case_fn(fn, 'abc"def" ghi', ['abc"def"', 'ghi'])
test_case_fn(fn, 'abc"def"ghi', ['abc"def"ghi'])
test_case_fn(fn, "r'AA' r'.*_xyz$'", ["r'AA'", "r'.*_xyz$'"])
test_case_fn(fn, 'abc"def ghi"', ['abc"def ghi"'])
test_case_fn(fn, 'abc"def ghi""jkl"', ['abc"def ghi""jkl"'])
test_case_fn(fn, 'a"b c"d"e"f"g h"', ['a"b c"d"e"f"g h"'])
test_case_fn(fn, 'c="ls /" type key', ['c="ls /"', 'type', 'key'])
test_case_fn(fn, "abc'def ghi'", ["abc'def ghi'"])
test_case_fn(fn, "c='ls /' type key", ["c='ls /'", 'type', 'key'])
def csv_split(s):
return list(csv.reader([s], delimiter=' '))[0]
def re_split(s):
def strip_quotes(s):
if s and (s[0] == '"' or s[0] == "'") and s[0] == s[-1]:
return s[1:-1]
return s
return [strip_quotes(p).replace('\\"', '"').replace("\\'", "'") for p in re.findall(r'(?:[^"\s]*"(?:\\.|[^"])*"[^"\s]*)+|(?:[^\'\s]*\'(?:\\.|[^\'])*\'[^\'\s]*)+|[^\s]+', s)]
if __name__ == '__main__':
print 'shlex\n'
test_split(shlex.split)
print
print 'csv\n'
test_split(csv_split)
print
print 're\n'
test_split(re_split)
print
iterations = 100
setup = 'from __main__ import test_split, test_case_no_output, csv_split, re_split\nimport shlex, re'
def benchmark(method, code):
print '%s: %.3fms per iteration' % (method, (1000 * timeit(code, setup=setup, number=iterations) / iterations))
benchmark('shlex', 'test_split(shlex.split, test_case_no_output)')
benchmark('csv', 'test_split(csv_split, test_case_no_output)')
benchmark('re', 'test_split(re_split, test_case_no_output)')
输出:
shlex [ OK ] abc def -> ['abc', 'def'] [FAIL] abc \s def -> ['abc', 's', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [ OK ] 'abc def' ghi -> ['abc def', 'ghi'] [ OK ] "abc \" def" ghi -> ['abc " def', 'ghi'] [FAIL] 'abc \' def' ghi -> exception: No closing quotation [ OK ] 'abc \s def' ghi -> ['abc \\s def', 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [ OK ] '' test -> ['', 'test'] [FAIL] abc'def -> exception: No closing quotation [FAIL] abc'def' -> ['abcdef'] [FAIL] abc'def' ghi -> ['abcdef', 'ghi'] [FAIL] abc'def'ghi -> ['abcdefghi'] [FAIL] abc"def -> exception: No closing quotation [FAIL] abc"def" -> ['abcdef'] [FAIL] abc"def" ghi -> ['abcdef', 'ghi'] [FAIL] abc"def"ghi -> ['abcdefghi'] [FAIL] r'AA' r'.*_xyz$' -> ['rAA', 'r.*_xyz$'] [FAIL] abc"def ghi" -> ['abcdef ghi'] [FAIL] abc"def ghi""jkl" -> ['abcdef ghijkl'] [FAIL] a"b c"d"e"f"g h" -> ['ab cdefg h'] [FAIL] c="ls /" type key -> ['c=ls /', 'type', 'key'] [FAIL] abc'def ghi' -> ['abcdef ghi'] [FAIL] c='ls /' type key -> ['c=ls /', 'type', 'key'] csv [ OK ] abc def -> ['abc', 'def'] [ OK ] abc \s def -> ['abc', '\\s', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [FAIL] 'abc def' ghi -> ["'abc", "def'", 'ghi'] [FAIL] "abc \" def" ghi -> ['abc \\', 'def"', 'ghi'] [FAIL] 'abc \' def' ghi -> ["'abc", "\\'", "def'", 'ghi'] [FAIL] 'abc \s def' ghi -> ["'abc", '\\s', "def'", 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [FAIL] '' test -> ["''", 'test'] [ OK ] abc'def -> ["abc'def"] [ OK ] abc'def' -> ["abc'def'"] [ OK ] abc'def' ghi -> ["abc'def'", 'ghi'] [ OK ] abc'def'ghi -> ["abc'def'ghi"] [ OK ] abc"def -> ['abc"def'] [ OK ] abc"def" -> ['abc"def"'] [ OK ] abc"def" ghi -> ['abc"def"', 'ghi'] [ OK ] abc"def"ghi -> ['abc"def"ghi'] [ OK ] r'AA' r'.*_xyz$' -> ["r'AA'", "r'.*_xyz$'"] [FAIL] abc"def ghi" -> ['abc"def', 'ghi"'] [FAIL] abc"def ghi""jkl" -> ['abc"def', 'ghi""jkl"'] [FAIL] a"b c"d"e"f"g h" -> ['a"b', 'c"d"e"f"g', 'h"'] [FAIL] c="ls /" type key -> ['c="ls', '/"', 'type', 'key'] [FAIL] abc'def ghi' -> ["abc'def", "ghi'"] [FAIL] c='ls /' type key -> ["c='ls", "/'", 'type', 'key'] re [ OK ] abc def -> ['abc', 'def'] [ OK ] abc \s def -> ['abc', '\\s', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [ OK ] 'abc def' ghi -> ['abc def', 'ghi'] [ OK ] "abc \" def" ghi -> ['abc " def', 'ghi'] [ OK ] 'abc \' def' ghi -> ["abc ' def", 'ghi'] [ OK ] 'abc \s def' ghi -> ['abc \\s def', 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [ OK ] '' test -> ['', 'test'] [ OK ] abc'def -> ["abc'def"] [ OK ] abc'def' -> ["abc'def'"] [ OK ] abc'def' ghi -> ["abc'def'", 'ghi'] [ OK ] abc'def'ghi -> ["abc'def'ghi"] [ OK ] abc"def -> ['abc"def'] [ OK ] abc"def" -> ['abc"def"'] [ OK ] abc"def" ghi -> ['abc"def"', 'ghi'] [ OK ] abc"def"ghi -> ['abc"def"ghi'] [ OK ] r'AA' r'.*_xyz$' -> ["r'AA'", "r'.*_xyz$'"] [ OK ] abc"def ghi" -> ['abc"def ghi"'] [ OK ] abc"def ghi""jkl" -> ['abc"def ghi""jkl"'] [ OK ] a"b c"d"e"f"g h" -> ['a"b c"d"e"f"g h"'] [ OK ] c="ls /" type key -> ['c="ls /"', 'type', 'key'] [ OK ] abc'def ghi' -> ["abc'def ghi'"] [ OK ] c='ls /' type key -> ["c='ls /'", 'type', 'key'] shlex: 0.335ms per iteration csv: 0.036ms per iteration re: 0.068ms per iteration
因此,性能比shlex好得多,并且可以通过预编译正则表达式进一步提高,在这种情况下,它将优于csv方法。
其他回答
嗯,好像找不到“回复”按钮了…不管怎样,这个答案是基于Kate的方法,但正确地分割了包含转义引号的子字符串,并删除了子字符串的开始和结束引号:
[i.strip('"').strip("'") for i in re.split(r'(\s+|(?<!\\)".*?(?<!\\)"|(?<!\\)\'.*?(?<!\\)\')', string) if i.strip()]
这适用于'这是' a \\\"test\\\"\\\'s子字符串' '这样的字符串(不幸的是,为了防止Python删除转义,这个疯狂的标记是必要的)。
如果返回列表中的字符串不需要转义,您可以使用稍微修改过的函数版本:
[i.strip('"').strip("'").decode('string_escape') for i in re.split(r'(\s+|(?<!\\)".*?(?<!\\)"|(?<!\\)\'.*?(?<!\\)\')', string) if i.strip()]
由于性能原因,re似乎更快。下面是我的解决方案,使用最小贪婪操作符,保留外部引号:
re.findall("(?:\".*?\"|\S)+", s)
结果:
['this', 'is', '"a test"']
它将像aaa“bla blub”bbb这样的结构放在一起,因为这些标记没有被空格分隔。如果字符串包含转义字符,你可以这样匹配:
>>> a = "She said \"He said, \\\"My name is Mark.\\\"\""
>>> a
'She said "He said, \\"My name is Mark.\\""'
>>> for i in re.findall("(?:\".*?[^\\\\]\"|\S)+", a): print(i)
...
She
said
"He said, \"My name is Mark.\""
请注意,这也通过模式的\S部分来匹配空字符串“”。
试试这个:
def adamsplit(s):
result = []
inquotes = False
for substring in s.split('"'):
if not inquotes:
result.extend(substring.split())
else:
result.append(substring)
inquotes = not inquotes
return result
一些测试字符串:
'This is "a test"' -> ['This', 'is', 'a test']
'"This is \'a test\'"' -> ["This is 'a test'"]
我用shlex。拆分来处理七千万行鱿鱼日志,太慢了。所以我换成了re。
请尝试这个,如果你有性能问题与shlex。
import re
def line_split(line):
return re.findall(r'[^"\s]\S*|".+?"', line)
被接受的shlex方法的主要问题是它不会忽略带引号的子字符串外的转义字符,并且在某些极端情况下会给出稍微出乎意料的结果。
我有下面的用例,其中我需要一个split函数来分割输入字符串,以便保留单引号或双引号子字符串,并能够在这样的子字符串中转义引号。未加引号的字符串中的引号不应与任何其他字符区别对待。一些带有预期输出的示例测试用例:
input string | expected output =============================================== 'abc def' | ['abc', 'def'] "abc \\s def" | ['abc', '\\s', 'def'] '"abc def" ghi' | ['abc def', 'ghi'] "'abc def' ghi" | ['abc def', 'ghi'] '"abc \\" def" ghi' | ['abc " def', 'ghi'] "'abc \\' def' ghi" | ["abc ' def", 'ghi'] "'abc \\s def' ghi" | ['abc \\s def', 'ghi'] '"abc \\s def" ghi' | ['abc \\s def', 'ghi'] '"" test' | ['', 'test'] "'' test" | ['', 'test'] "abc'def" | ["abc'def"] "abc'def'" | ["abc'def'"] "abc'def' ghi" | ["abc'def'", 'ghi'] "abc'def'ghi" | ["abc'def'ghi"] 'abc"def' | ['abc"def'] 'abc"def"' | ['abc"def"'] 'abc"def" ghi' | ['abc"def"', 'ghi'] 'abc"def"ghi' | ['abc"def"ghi'] "r'AA' r'.*_xyz$'" | ["r'AA'", "r'.*_xyz$'"] 'abc"def ghi"' | ['abc"def ghi"'] 'abc"def ghi""jkl"' | ['abc"def ghi""jkl"'] 'a"b c"d"e"f"g h"' | ['a"b c"d"e"f"g h"'] 'c="ls /" type key' | ['c="ls /"', 'type', 'key'] "abc'def ghi'" | ["abc'def ghi'"] "c='ls /' type key" | ["c='ls /'", 'type', 'key']
我最终使用以下函数来分割字符串,以便所有输入字符串的预期输出结果:
import re
def quoted_split(s):
def strip_quotes(s):
if s and (s[0] == '"' or s[0] == "'") and s[0] == s[-1]:
return s[1:-1]
return s
return [strip_quotes(p).replace('\\"', '"').replace("\\'", "'") \
for p in re.findall(r'(?:[^"\s]*"(?:\\.|[^"])*"[^"\s]*)+|(?:[^\'\s]*\'(?:\\.|[^\'])*\'[^\'\s]*)+|[^\s]+', s)]
这可不漂亮;但它确实有效。下面的测试应用程序检查其他方法(目前是shlex和csv)和自定义分割实现的结果:
#!/bin/python2.7
import csv
import re
import shlex
from timeit import timeit
def test_case(fn, s, expected):
try:
if fn(s) == expected:
print '[ OK ] %s -> %s' % (s, fn(s))
else:
print '[FAIL] %s -> %s' % (s, fn(s))
except Exception as e:
print '[FAIL] %s -> exception: %s' % (s, e)
def test_case_no_output(fn, s, expected):
try:
fn(s)
except:
pass
def test_split(fn, test_case_fn=test_case):
test_case_fn(fn, 'abc def', ['abc', 'def'])
test_case_fn(fn, "abc \\s def", ['abc', '\\s', 'def'])
test_case_fn(fn, '"abc def" ghi', ['abc def', 'ghi'])
test_case_fn(fn, "'abc def' ghi", ['abc def', 'ghi'])
test_case_fn(fn, '"abc \\" def" ghi', ['abc " def', 'ghi'])
test_case_fn(fn, "'abc \\' def' ghi", ["abc ' def", 'ghi'])
test_case_fn(fn, "'abc \\s def' ghi", ['abc \\s def', 'ghi'])
test_case_fn(fn, '"abc \\s def" ghi', ['abc \\s def', 'ghi'])
test_case_fn(fn, '"" test', ['', 'test'])
test_case_fn(fn, "'' test", ['', 'test'])
test_case_fn(fn, "abc'def", ["abc'def"])
test_case_fn(fn, "abc'def'", ["abc'def'"])
test_case_fn(fn, "abc'def' ghi", ["abc'def'", 'ghi'])
test_case_fn(fn, "abc'def'ghi", ["abc'def'ghi"])
test_case_fn(fn, 'abc"def', ['abc"def'])
test_case_fn(fn, 'abc"def"', ['abc"def"'])
test_case_fn(fn, 'abc"def" ghi', ['abc"def"', 'ghi'])
test_case_fn(fn, 'abc"def"ghi', ['abc"def"ghi'])
test_case_fn(fn, "r'AA' r'.*_xyz$'", ["r'AA'", "r'.*_xyz$'"])
test_case_fn(fn, 'abc"def ghi"', ['abc"def ghi"'])
test_case_fn(fn, 'abc"def ghi""jkl"', ['abc"def ghi""jkl"'])
test_case_fn(fn, 'a"b c"d"e"f"g h"', ['a"b c"d"e"f"g h"'])
test_case_fn(fn, 'c="ls /" type key', ['c="ls /"', 'type', 'key'])
test_case_fn(fn, "abc'def ghi'", ["abc'def ghi'"])
test_case_fn(fn, "c='ls /' type key", ["c='ls /'", 'type', 'key'])
def csv_split(s):
return list(csv.reader([s], delimiter=' '))[0]
def re_split(s):
def strip_quotes(s):
if s and (s[0] == '"' or s[0] == "'") and s[0] == s[-1]:
return s[1:-1]
return s
return [strip_quotes(p).replace('\\"', '"').replace("\\'", "'") for p in re.findall(r'(?:[^"\s]*"(?:\\.|[^"])*"[^"\s]*)+|(?:[^\'\s]*\'(?:\\.|[^\'])*\'[^\'\s]*)+|[^\s]+', s)]
if __name__ == '__main__':
print 'shlex\n'
test_split(shlex.split)
print
print 'csv\n'
test_split(csv_split)
print
print 're\n'
test_split(re_split)
print
iterations = 100
setup = 'from __main__ import test_split, test_case_no_output, csv_split, re_split\nimport shlex, re'
def benchmark(method, code):
print '%s: %.3fms per iteration' % (method, (1000 * timeit(code, setup=setup, number=iterations) / iterations))
benchmark('shlex', 'test_split(shlex.split, test_case_no_output)')
benchmark('csv', 'test_split(csv_split, test_case_no_output)')
benchmark('re', 'test_split(re_split, test_case_no_output)')
输出:
shlex [ OK ] abc def -> ['abc', 'def'] [FAIL] abc \s def -> ['abc', 's', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [ OK ] 'abc def' ghi -> ['abc def', 'ghi'] [ OK ] "abc \" def" ghi -> ['abc " def', 'ghi'] [FAIL] 'abc \' def' ghi -> exception: No closing quotation [ OK ] 'abc \s def' ghi -> ['abc \\s def', 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [ OK ] '' test -> ['', 'test'] [FAIL] abc'def -> exception: No closing quotation [FAIL] abc'def' -> ['abcdef'] [FAIL] abc'def' ghi -> ['abcdef', 'ghi'] [FAIL] abc'def'ghi -> ['abcdefghi'] [FAIL] abc"def -> exception: No closing quotation [FAIL] abc"def" -> ['abcdef'] [FAIL] abc"def" ghi -> ['abcdef', 'ghi'] [FAIL] abc"def"ghi -> ['abcdefghi'] [FAIL] r'AA' r'.*_xyz$' -> ['rAA', 'r.*_xyz$'] [FAIL] abc"def ghi" -> ['abcdef ghi'] [FAIL] abc"def ghi""jkl" -> ['abcdef ghijkl'] [FAIL] a"b c"d"e"f"g h" -> ['ab cdefg h'] [FAIL] c="ls /" type key -> ['c=ls /', 'type', 'key'] [FAIL] abc'def ghi' -> ['abcdef ghi'] [FAIL] c='ls /' type key -> ['c=ls /', 'type', 'key'] csv [ OK ] abc def -> ['abc', 'def'] [ OK ] abc \s def -> ['abc', '\\s', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [FAIL] 'abc def' ghi -> ["'abc", "def'", 'ghi'] [FAIL] "abc \" def" ghi -> ['abc \\', 'def"', 'ghi'] [FAIL] 'abc \' def' ghi -> ["'abc", "\\'", "def'", 'ghi'] [FAIL] 'abc \s def' ghi -> ["'abc", '\\s', "def'", 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [FAIL] '' test -> ["''", 'test'] [ OK ] abc'def -> ["abc'def"] [ OK ] abc'def' -> ["abc'def'"] [ OK ] abc'def' ghi -> ["abc'def'", 'ghi'] [ OK ] abc'def'ghi -> ["abc'def'ghi"] [ OK ] abc"def -> ['abc"def'] [ OK ] abc"def" -> ['abc"def"'] [ OK ] abc"def" ghi -> ['abc"def"', 'ghi'] [ OK ] abc"def"ghi -> ['abc"def"ghi'] [ OK ] r'AA' r'.*_xyz$' -> ["r'AA'", "r'.*_xyz$'"] [FAIL] abc"def ghi" -> ['abc"def', 'ghi"'] [FAIL] abc"def ghi""jkl" -> ['abc"def', 'ghi""jkl"'] [FAIL] a"b c"d"e"f"g h" -> ['a"b', 'c"d"e"f"g', 'h"'] [FAIL] c="ls /" type key -> ['c="ls', '/"', 'type', 'key'] [FAIL] abc'def ghi' -> ["abc'def", "ghi'"] [FAIL] c='ls /' type key -> ["c='ls", "/'", 'type', 'key'] re [ OK ] abc def -> ['abc', 'def'] [ OK ] abc \s def -> ['abc', '\\s', 'def'] [ OK ] "abc def" ghi -> ['abc def', 'ghi'] [ OK ] 'abc def' ghi -> ['abc def', 'ghi'] [ OK ] "abc \" def" ghi -> ['abc " def', 'ghi'] [ OK ] 'abc \' def' ghi -> ["abc ' def", 'ghi'] [ OK ] 'abc \s def' ghi -> ['abc \\s def', 'ghi'] [ OK ] "abc \s def" ghi -> ['abc \\s def', 'ghi'] [ OK ] "" test -> ['', 'test'] [ OK ] '' test -> ['', 'test'] [ OK ] abc'def -> ["abc'def"] [ OK ] abc'def' -> ["abc'def'"] [ OK ] abc'def' ghi -> ["abc'def'", 'ghi'] [ OK ] abc'def'ghi -> ["abc'def'ghi"] [ OK ] abc"def -> ['abc"def'] [ OK ] abc"def" -> ['abc"def"'] [ OK ] abc"def" ghi -> ['abc"def"', 'ghi'] [ OK ] abc"def"ghi -> ['abc"def"ghi'] [ OK ] r'AA' r'.*_xyz$' -> ["r'AA'", "r'.*_xyz$'"] [ OK ] abc"def ghi" -> ['abc"def ghi"'] [ OK ] abc"def ghi""jkl" -> ['abc"def ghi""jkl"'] [ OK ] a"b c"d"e"f"g h" -> ['a"b c"d"e"f"g h"'] [ OK ] c="ls /" type key -> ['c="ls /"', 'type', 'key'] [ OK ] abc'def ghi' -> ["abc'def ghi'"] [ OK ] c='ls /' type key -> ["c='ls /'", 'type', 'key'] shlex: 0.335ms per iteration csv: 0.036ms per iteration re: 0.068ms per iteration
因此,性能比shlex好得多,并且可以通过预编译正则表达式进一步提高,在这种情况下,它将优于csv方法。