我有这样的代码:

good = [x for x in mylist if x in goodvals]
bad = [x for x in mylist if x not in goodvals]

目标是根据mylist的内容是否满足条件,将它们拆分为另外两个列表。

我怎样才能做得更优雅呢?我能避免在mylist上做两个单独的迭代吗?我可以通过这样做来提高性能吗?


当前回答

优雅快捷

受到DanSalmo评论的启发,这里有一个简洁、优雅的解决方案,同时也是最快的解决方案之一。

good_set = set(goodvals)
good, bad = [], []
for item in my_list:
    good.append(item) if item in good_set else bad.append(item)

提示:将goodvals转换为一组可以很容易地提高速度。

最快

为了获得最大速度,我们取最快的答案,并通过将good_list转换为一个集合来对其进行涡轮增压。仅这一项就为我们提供了40%以上的速度提升,我们最终得到了比最慢的解决方案快5.5倍以上的解决方案,即使它仍然可读。

good_list_set = set(good_list)  # 40%+ faster than a tuple.

good, bad = [], []
for item in my_origin_list:
    if item in good_list_set:
        good.append(item)
    else:
        bad.append(item)

稍微短一点

这是之前答案的一个更简洁的版本。

good_list_set = set(good_list)  # 40%+ faster than a tuple.

good, bad = [], []
for item in my_origin_list:
    out = good if item in good_list_set else bad
    out.append(item)

优雅可能有点主观,但一些鲁布·戈德堡风格的解决方案很可爱,很巧妙,不应该用于任何语言的产品代码中,更不用说本质上优雅的python了。


基准测试结果:

filter_BJHomer                  80/s       --   -3265%   -5312%   -5900%   -6262%   -7273%   -7363%   -8051%   -8162%   -8244%
zip_Funky                       118/s    4848%       --   -3040%   -3913%   -4450%   -5951%   -6085%   -7106%   -7271%   -7393%
two_lst_tuple_JohnLaRoy         170/s   11332%    4367%       --   -1254%   -2026%   -4182%   -4375%   -5842%   -6079%   -6254%
if_else_DBR                     195/s   14392%    6428%    1434%       --    -882%   -3348%   -3568%   -5246%   -5516%   -5717%
two_lst_compr_Parand            213/s   16750%    8016%    2540%     967%       --   -2705%   -2946%   -4786%   -5083%   -5303%
if_else_1_line_DanSalmo         292/s   26668%   14696%    7189%    5033%    3707%       --    -331%   -2853%   -3260%   -3562%
tuple_if_else                   302/s   27923%   15542%    7778%    5548%    4177%     343%       --   -2609%   -3029%   -3341%
set_1_line                      409/s   41308%   24556%   14053%   11035%    9181%    3993%    3529%       --    -569%    -991%
set_shorter                     434/s   44401%   26640%   15503%   12303%   10337%    4836%    4345%     603%       --    -448%
set_if_else                     454/s   46952%   28358%   16699%   13349%   11290%    5532%    5018%    1100%     469%       --

Python 3.7的完整基准代码(从FunkySayu修改而来):

good_list = ['.jpg','.jpeg','.gif','.bmp','.png']

import random
import string
my_origin_list = []
for i in range(10000):
    fname = ''.join(random.choice(string.ascii_lowercase) for i in range(random.randrange(10)))
    if random.getrandbits(1):
        fext = random.choice(list(good_list))
    else:
        fext = "." + ''.join(random.choice(string.ascii_lowercase) for i in range(3))

    my_origin_list.append((fname + fext, random.randrange(1000), fext))

# Parand
def two_lst_compr_Parand(*_):
    return [e for e in my_origin_list if e[2] in good_list], [e for e in my_origin_list if not e[2] in good_list]

# dbr
def if_else_DBR(*_):
    a, b = list(), list()
    for e in my_origin_list:
        if e[2] in good_list:
            a.append(e)
        else:
            b.append(e)
    return a, b

# John La Rooy
def two_lst_tuple_JohnLaRoy(*_):
    a, b = list(), list()
    for e in my_origin_list:
        (b, a)[e[2] in good_list].append(e)
    return a, b

# # Ants Aasma
# def f4():
#     l1, l2 = tee((e[2] in good_list, e) for e in my_origin_list)
#     return [i for p, i in l1 if p], [i for p, i in l2 if not p]

# My personal way to do
def zip_Funky(*_):
    a, b = zip(*[(e, None) if e[2] in good_list else (None, e) for e in my_origin_list])
    return list(filter(None, a)), list(filter(None, b))

# BJ Homer
def filter_BJHomer(*_):
    return list(filter(lambda e: e[2] in good_list, my_origin_list)), list(filter(lambda e: not e[2] in good_list,                                                                             my_origin_list))

# ChaimG's answer; as a list.
def if_else_1_line_DanSalmo(*_):
    good, bad = [], []
    for e in my_origin_list:
        _ = good.append(e) if e[2] in good_list else bad.append(e)
    return good, bad

# ChaimG's answer; as a set.
def set_1_line(*_):
    good_list_set = set(good_list)
    good, bad = [], []
    for e in my_origin_list:
        _ = good.append(e) if e[2] in good_list_set else bad.append(e)
    return good, bad

# ChaimG set and if else list.
def set_shorter(*_):
    good_list_set = set(good_list)
    good, bad = [], []
    for e in my_origin_list:
        out = good if e[2] in good_list_set else bad
        out.append(e)
    return good, bad

# ChaimG's best answer; if else as a set.
def set_if_else(*_):
    good_list_set = set(good_list)
    good, bad = [], []
    for e in my_origin_list:
        if e[2] in good_list_set:
            good.append(e)
        else:
            bad.append(e)
    return good, bad

# ChaimG's best answer; if else as a set.
def tuple_if_else(*_):
    good_list_tuple = tuple(good_list)
    good, bad = [], []
    for e in my_origin_list:
        if e[2] in good_list_tuple:
            good.append(e)
        else:
            bad.append(e)
    return good, bad

def cmpthese(n=0, functions=None):
    results = {}
    for func_name in functions:
        args = ['%s(range(256))' % func_name, 'from __main__ import %s' % func_name]
        t = Timer(*args)
        results[func_name] = 1 / (t.timeit(number=n) / n) # passes/sec

    functions_sorted = sorted(functions, key=results.__getitem__)
    for f in functions_sorted:
        diff = []
        for func in functions_sorted:
            if func == f:
                diff.append("--")
            else:
                diff.append(f"{results[f]/results[func]*100 - 100:5.0%}")
        diffs = " ".join(f'{x:>8s}' for x in diff)

        print(f"{f:27s} \t{results[f]:,.0f}/s {diffs}")


if __name__=='__main__':
    from timeit import Timer
cmpthese(1000, 'two_lst_compr_Parand if_else_DBR two_lst_tuple_JohnLaRoy zip_Funky filter_BJHomer if_else_1_line_DanSalmo set_1_line set_if_else tuple_if_else set_shorter'.split(" "))

其他回答

为了提高性能,请尝试itertools。

itertools模块标准化了一组快速、内存高效的核心工具,这些工具单独使用或组合使用都很有用。它们一起构成了一个“迭代器代数”,使得用纯Python简洁有效地构造专门的工具成为可能。

出现看到itertools。过滤器或imap。

itertools。iterable ifilter(谓词) 创建一个迭代器,从iterable中过滤元素,只返回谓词为True的元素

例如,按偶数和奇数拆分列表

arr = range(20)
even, odd = reduce(lambda res, next: res[next % 2].append(next) or res, arr, ([], []))

或者概括地说:

def split(predicate, iterable):
    return reduce(lambda res, e: res[predicate(e)].append(e) or res, iterable, ([], []))

优点:

最短路径 Predicate对每个元素只应用一次

缺点

需要函数式编程范例的知识

还有另一个答案,简短但“邪恶”(用于理解列表的副作用)。

digits = list(range(10))
odd = [x.pop(i) for i, x in enumerate(digits) if x % 2]

>>> odd
[1, 3, 5, 7, 9]

>>> digits
[0, 2, 4, 6, 8]
images = [f for f in files if f[2].lower() in IMAGE_TYPES]
anims  = [f for f in files if f not in images]

当条件较长时很好,例如在您的示例中。读者不需要弄清楚否定条件以及它是否适用于所有其他情况。

之前的答案似乎并不能满足我所有的四种强迫症:

尽可能的懒惰, 只对原始Iterable求值一次 每个项只计算谓词一次 提供良好的类型注释(适用于python 3.7)

我的解决方案并不漂亮,我不认为我可以推荐使用它,但它是:

def iter_split_on_predicate(predicate: Callable[[T], bool], iterable: Iterable[T]) -> Tuple[Iterator[T], Iterator[T]]:
    deque_predicate_true = deque()
    deque_predicate_false = deque()
    
    # define a generator function to consume the input iterable
    # the Predicate is evaluated once per item, added to the appropriate deque, and the predicate result it yielded 
    def shared_generator(definitely_an_iterator):
        for item in definitely_an_iterator:
            print("Evaluate predicate.")
            if predicate(item):
                deque_predicate_true.appendleft(item)
                yield True
            else:
                deque_predicate_false.appendleft(item)
                yield False
    
    # consume input iterable only once,
    # converting to an iterator with the iter() function if necessary. Probably this conversion is unnecessary
    shared_gen = shared_generator(
        iterable if isinstance(iterable, collections.abc.Iterator) else iter(iterable)
    )
    
    # define a generator function for each predicate outcome and queue
    def iter_for(predicate_value, hold_queue):
        def consume_shared_generator_until_hold_queue_contains_something():
            if not hold_queue:
                try:
                    while next(shared_gen) != predicate_value:
                        pass
                except:
                    pass
        
        consume_shared_generator_until_hold_queue_contains_something()
        while hold_queue:
            print("Yield where predicate is "+str(predicate_value))
            yield hold_queue.pop()
            consume_shared_generator_until_hold_queue_contains_something()
    
    # return a tuple of two generators  
    return iter_for(predicate_value=True, hold_queue=deque_predicate_true), iter_for(predicate_value=False, hold_queue=deque_predicate_false)

用下面的测试,我们从print语句中得到如下输出:

t,f = iter_split_on_predicate(lambda item:item>=10,[1,2,3,10,11,12,4,5,6,13,14,15])
print(list(zip(t,f)))
# Evaluate predicate.
# Evaluate predicate.
# Evaluate predicate.
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# Evaluate predicate.
# Evaluate predicate.
# Evaluate predicate.
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# Evaluate predicate.
# Yield where predicate is True
# Yield where predicate is False
# [(10, 1), (11, 2), (12, 3), (13, 4), (14, 5), (15, 6)]