在常数平摊时间O(1)中将一个对象追加到R中的列表?

如果我有一些R列表mylist，你可以像这样添加一个obj项:

mylist[[length(mylist)+1]] <- obj

但肯定有一些更紧凑的方式。当我刚在R工作时，我试着像这样写lappend():

lappend <- function(lst, obj) {
    lst[[length(lst)+1]] <- obj
    return(lst)
}

但是，由于R的按名调用语义(lst在调用时被有效地复制，因此对lst的更改在lappend()的作用域之外是不可见的)，这当然是行不通的。我知道您可以在R函数中进行环境入侵，从而超出函数的作用域并改变调用环境，但对于编写一个简单的附加函数来说，这似乎是一个巨大的打击。

有谁能提出一个更漂亮的方法吗?如果它对向量和列表都适用，那就更好了。

当前回答

在其他答案中，只有列表方法会导致O(1)个追加，但它会导致深度嵌套的列表结构，而不是简单的单个列表。我使用了下面的数据结构，它们支持O(1)(平摊)追加，并允许结果转换回一个普通列表。

expandingList <- function(capacity = 10) {
    buffer <- vector('list', capacity)
    length <- 0

    methods <- list()

    methods$double.size <- function() {
        buffer <<- c(buffer, vector('list', capacity))
        capacity <<- capacity * 2
    }

    methods$add <- function(val) {
        if(length == capacity) {
            methods$double.size()
        }

        length <<- length + 1
        buffer[[length]] <<- val
    }

    methods$as.list <- function() {
        b <- buffer[0:length]
        return(b)
    }

    methods
}

and

linkedList <- function() {
    head <- list(0)
    length <- 0

    methods <- list()

    methods$add <- function(val) {
        length <<- length + 1
        head <<- list(head, val)
    }

    methods$as.list <- function() {
        b <- vector('list', length)
        h <- head
        for(i in length:1) {
            b[[i]] <- head[[2]]
            head <- head[[1]]
        }
        return(b)
    }
    methods
}

使用方法如下:

> l <- expandingList()
> l$add("hello")
> l$add("world")
> l$add(101)
> l$as.list()
[[1]]
[1] "hello"

[[2]]
[1] "world"

[[3]]
[1] 101

这些解决方案可以扩展为支持所有列表相关操作的完整对象，但这仍将作为读者的练习。

命名列表的另一种变体:

namedExpandingList <- function(capacity = 10) {
    buffer <- vector('list', capacity)
    names <- character(capacity)
    length <- 0

    methods <- list()

    methods$double.size <- function() {
        buffer <<- c(buffer, vector('list', capacity))
        names <<- c(names, character(capacity))
        capacity <<- capacity * 2
    }

    methods$add <- function(name, val) {
        if(length == capacity) {
            methods$double.size()
        }

        length <<- length + 1
        buffer[[length]] <<- val
        names[length] <<- name
    }

    methods$as.list <- function() {
        b <- buffer[0:length]
        names(b) <- names[0:length]
        return(b)
    }

    methods
}

基准

使用@phonetagger的代码(基于@Cron Arconis的代码)进行性能比较。我还添加了一个better_env_as_container，并对env_as_container_进行了一些更改。原来的env_as_container_被破坏了，实际上并没有存储所有的数字。

library(microbenchmark)
lPtrAppend <- function(lstptr, lab, obj) {lstptr[[deparse(lab)]] <- obj}
### Store list inside new environment
envAppendList <- function(lstptr, obj) {lstptr$list[[length(lstptr$list)+1]] <- obj} 
env2list <- function(env, len) {
    l <- vector('list', len)
    for (i in 1:len) {
        l[[i]] <- env[[as.character(i)]]
    }
    l
}
envl2list <- function(env, len) {
    l <- vector('list', len)
    for (i in 1:len) {
        l[[i]] <- env[[paste(as.character(i), 'L', sep='')]]
    }
    l
}
runBenchmark <- function(n) {
    microbenchmark(times = 5,  
        env_with_list_ = {
            listptr <- new.env(parent=globalenv())
            listptr$list <- NULL
            for(i in 1:n) {envAppendList(listptr, i)}
            listptr$list
        },
        c_ = {
            a <- list(0)
            for(i in 1:n) {a = c(a, list(i))}
        },
        list_ = {
            a <- list(0)
            for(i in 1:n) {a <- list(a, list(i))}
        },
        by_index = {
            a <- list(0)
            for(i in 1:n) {a[length(a) + 1] <- i}
            a
        },
        append_ = { 
            a <- list(0)    
            for(i in 1:n) {a <- append(a, i)} 
            a
        },
        env_as_container_ = {
            listptr <- new.env(hash=TRUE, parent=globalenv())
            for(i in 1:n) {lPtrAppend(listptr, i, i)} 
            envl2list(listptr, n)
        },
        better_env_as_container = {
            env <- new.env(hash=TRUE, parent=globalenv())
            for(i in 1:n) env[[as.character(i)]] <- i
            env2list(env, n)
        },
        linkedList = {
            a <- linkedList()
            for(i in 1:n) { a$add(i) }
            a$as.list()
        },
        inlineLinkedList = {
            a <- list()
            for(i in 1:n) { a <- list(a, i) }
            b <- vector('list', n)
            head <- a
            for(i in n:1) {
                b[[i]] <- head[[2]]
                head <- head[[1]]
            }                
        },
        expandingList = {
            a <- expandingList()
            for(i in 1:n) { a$add(i) }
            a$as.list()
        },
        inlineExpandingList = {
            l <- vector('list', 10)
            cap <- 10
            len <- 0
            for(i in 1:n) {
                if(len == cap) {
                    l <- c(l, vector('list', cap))
                    cap <- cap*2
                }
                len <- len + 1
                l[[len]] <- i
            }
            l[1:len]
        }
    )
}

# We need to repeatedly add an element to a list. With normal list concatenation
# or element setting this would lead to a large number of memory copies and a
# quadratic runtime. To prevent that, this function implements a bare bones
# expanding array, in which list appends are (amortized) constant time.
    expandingList <- function(capacity = 10) {
        buffer <- vector('list', capacity)
        length <- 0

        methods <- list()

        methods$double.size <- function() {
            buffer <<- c(buffer, vector('list', capacity))
            capacity <<- capacity * 2
        }

        methods$add <- function(val) {
            if(length == capacity) {
                methods$double.size()
            }

            length <<- length + 1
            buffer[[length]] <<- val
        }

        methods$as.list <- function() {
            b <- buffer[0:length]
            return(b)
        }

        methods
    }

    linkedList <- function() {
        head <- list(0)
        length <- 0

        methods <- list()

        methods$add <- function(val) {
            length <<- length + 1
            head <<- list(head, val)
        }

        methods$as.list <- function() {
            b <- vector('list', length)
            h <- head
            for(i in length:1) {
                b[[i]] <- head[[2]]
                head <- head[[1]]
            }
            return(b)
        }

        methods
    }

# We need to repeatedly add an element to a list. With normal list concatenation
# or element setting this would lead to a large number of memory copies and a
# quadratic runtime. To prevent that, this function implements a bare bones
# expanding array, in which list appends are (amortized) constant time.
    namedExpandingList <- function(capacity = 10) {
        buffer <- vector('list', capacity)
        names <- character(capacity)
        length <- 0

        methods <- list()

        methods$double.size <- function() {
            buffer <<- c(buffer, vector('list', capacity))
            names <<- c(names, character(capacity))
            capacity <<- capacity * 2
        }

        methods$add <- function(name, val) {
            if(length == capacity) {
                methods$double.size()
            }

            length <<- length + 1
            buffer[[length]] <<- val
            names[length] <<- name
        }

        methods$as.list <- function() {
            b <- buffer[0:length]
            names(b) <- names[0:length]
            return(b)
        }

        methods
    }

结果:

> runBenchmark(1000)
Unit: microseconds
                    expr       min        lq      mean    median        uq       max neval
          env_with_list_  3128.291  3161.675  4466.726  3361.837  3362.885  9318.943     5
                      c_  3308.130  3465.830  6687.985  8578.913  8627.802  9459.252     5
                   list_   329.508   343.615   389.724   370.504   449.494   455.499     5
                by_index  3076.679  3256.588  5480.571  3395.919  8209.738  9463.931     5
                 append_  4292.321  4562.184  7911.882 10156.957 10202.773 10345.177     5
       env_as_container_ 24471.511 24795.849 25541.103 25486.362 26440.591 26511.200     5
 better_env_as_container  7671.338  7986.597  8118.163  8153.726  8335.659  8443.493     5
              linkedList  1700.754  1755.439  1829.442  1804.746  1898.752  1987.518     5
        inlineLinkedList  1109.764  1115.352  1163.751  1115.631  1206.843  1271.166     5
           expandingList  1422.440  1439.970  1486.288  1519.728  1524.268  1525.036     5
     inlineExpandingList   942.916   973.366  1002.461  1012.197  1017.784  1066.044     5
> runBenchmark(10000)
Unit: milliseconds
                    expr        min         lq       mean     median         uq        max neval
          env_with_list_ 357.760419 360.277117 433.810432 411.144799 479.090688 560.779139     5
                      c_ 685.477809 734.055635 761.689936 745.957553 778.330873 864.627811     5
                   list_   3.257356   3.454166   3.505653   3.524216   3.551454   3.741071     5
                by_index 445.977967 454.321797 515.453906 483.313516 560.374763 633.281485     5
                 append_ 610.777866 629.547539 681.145751 640.936898 760.570326 763.896124     5
       env_as_container_ 281.025606 290.028380 303.885130 308.594676 314.972570 324.804419     5
 better_env_as_container  83.944855  86.927458  90.098644  91.335853  92.459026  95.826030     5
              linkedList  19.612576  24.032285  24.229808  25.461429  25.819151  26.223597     5
        inlineLinkedList  11.126970  11.768524  12.216284  12.063529  12.392199  13.730200     5
           expandingList  14.735483  15.854536  15.764204  16.073485  16.075789  16.081726     5
     inlineExpandingList  10.618393  11.179351  13.275107  12.391780  14.747914  17.438096     5
> runBenchmark(20000)
Unit: milliseconds
                    expr         min          lq       mean      median          uq         max neval
          env_with_list_ 1723.899913 1915.003237 1921.23955 1938.734718 1951.649113 2076.910767     5
                      c_ 2759.769353 2768.992334 2810.40023 2820.129738 2832.350269 2870.759474     5
                   list_    6.112919    6.399964    6.63974    6.453252    6.910916    7.321647     5
                by_index 2163.585192 2194.892470 2292.61011 2209.889015 2436.620081 2458.063801     5
                 append_ 2832.504964 2872.559609 2983.17666 2992.634568 3004.625953 3213.558197     5
       env_as_container_  573.386166  588.448990  602.48829  597.645221  610.048314  642.912752     5
 better_env_as_container  154.180531  175.254307  180.26689  177.027204  188.642219  206.230191     5
              linkedList   38.401105   47.514506   46.61419   47.525192   48.677209   50.952958     5
        inlineLinkedList   25.172429   26.326681   32.33312   34.403442   34.469930   41.293126     5
           expandingList   30.776072   30.970438   34.45491   31.752790   38.062728   40.712542     5
     inlineExpandingList   21.309278   22.709159   24.64656   24.290694   25.764816   29.158849     5

我已经添加了linkedList和expandingList和两者的内联版本。inlinedLinkedList基本上是list_的副本，但它也将嵌套结构转换回普通列表。除此之外，内联版本和非内联版本之间的差异是由于函数调用的开销。

expandingList和linkedList的所有变体都显示O(1)附加性能，基准时间随附加项的数量线性扩展。linkedList比expandingList慢，而且函数调用开销也是可见的。所以如果你真的需要所有你能得到的速度(并且想坚持R代码)，使用一个内联版本的expandingList。

我还研究了R的C实现，这两种方法都应该是O(1)追加任何大小，直到耗尽内存。

我也改变了env_as_container_，原始版本将存储索引“I”下的每一项，覆盖先前追加的项。我添加的better_env_as_container非常类似于env_as_container_，但没有离开的东西。两者都表现出O(1)性能，但它们的开销比链接/展开列表要大得多。

内存开销

In the C R implementation there is an overhead of 4 words and 2 ints per allocated object. The linkedList approach allocates one list of length two per append, for a total of (4*8+4+4+2*8=) 56 bytes per appended item on 64-bit computers (excluding memory allocation overhead, so probably closer to 64 bytes). The expandingList approach uses one word per appended item, plus a copy when doubling the vector length, so a total memory usage of up to 16 bytes per item. Since the memory is all in one or two objects the per-object overhead is insignificant. I haven't looked deeply into the env memory usage, but I think it will be closer to linkedList.

2015-09-30 15:41:10

其他回答

为了验证，我运行了@Cron提供的基准测试代码。有一个主要的区别(除了在更新的i7处理器上运行更快):by_index现在的性能几乎和list_一样好:

Unit: milliseconds
              expr        min         lq       mean     median         uq
    env_with_list_ 167.882406 175.969269 185.966143 181.817187 185.933887
                c_ 485.524870 501.049836 516.781689 518.637468 537.355953
             list_   6.155772   6.258487   6.544207   6.269045   6.290925
          by_index   9.290577   9.630283   9.881103   9.672359  10.219533
           append_ 505.046634 543.319857 542.112303 551.001787 553.030110
 env_as_container_ 153.297375 154.880337 156.198009 156.068736 156.800135

这里是从@Cron的回答中逐字复制的基准代码(以防他后来更改内容):

n = 1e+4
library(microbenchmark)
### Using environment as a container
lPtrAppend <- function(lstptr, lab, obj) {lstptr[[deparse(substitute(lab))]] <- obj}
### Store list inside new environment
envAppendList <- function(lstptr, obj) {lstptr$list[[length(lstptr$list)+1]] <- obj}

microbenchmark(times = 5,
        env_with_list_ = {
            listptr <- new.env(parent=globalenv())
            listptr$list <- NULL
            for(i in 1:n) {envAppendList(listptr, i)}
            listptr$list
        },
        c_ = {
            a <- list(0)
            for(i in 1:n) {a = c(a, list(i))}
        },
        list_ = {
            a <- list(0)
            for(i in 1:n) {a <- list(a, list(i))}
        },
        by_index = {
            a <- list(0)
            for(i in 1:n) {a[length(a) + 1] <- i}
            a
        },
        append_ = {
            a <- list(0)
            for(i in 1:n) {a <- append(a, i)}
            a
        },
        env_as_container_ = {
            listptr <- new.env(parent=globalenv())
            for(i in 1:n) {lPtrAppend(listptr, i, i)}
            listptr
        }
)

2018-06-24 22:31:24

这是一个非常有趣的问题，我希望我下面的想法可以为解决这个问题提供一种方式。这个方法给出了一个没有索引的平面列表，但是它有列表和反列表来避免嵌套结构。我不确定速度，因为我不知道如何基准。

a_list<-list()
for(i in 1:3){
  a_list<-list(unlist(list(unlist(a_list,recursive = FALSE),list(rnorm(2))),recursive = FALSE))
}
a_list

[[1]]
[[1]][[1]]
[1] -0.8098202  1.1035517

[[1]][[2]]
[1] 0.6804520 0.4664394

[[1]][[3]]
[1] 0.15592354 0.07424637

2016-04-14 16:43:40

我对这里提到的方法做了一个小的比较。

n = 1e+4
library(microbenchmark)
### Using environment as a container
lPtrAppend <- function(lstptr, lab, obj) {lstptr[[deparse(substitute(lab))]] <- obj}
### Store list inside new environment
envAppendList <- function(lstptr, obj) {lstptr$list[[length(lstptr$list)+1]] <- obj} 

microbenchmark(times = 5,  
        env_with_list_ = {
            listptr <- new.env(parent=globalenv())
            listptr$list <- NULL
            for(i in 1:n) {envAppendList(listptr, i)}
            listptr$list
        },
        c_ = {
            a <- list(0)
            for(i in 1:n) {a = c(a, list(i))}
        },
        list_ = {
            a <- list(0)
            for(i in 1:n) {a <- list(a, list(i))}
        },
        by_index = {
            a <- list(0)
            for(i in 1:n) {a[length(a) + 1] <- i}
            a
        },
        append_ = { 
            a <- list(0)    
            for(i in 1:n) {a <- append(a, i)} 
            a
        },
        env_as_container_ = {
            listptr <- new.env(parent=globalenv())
            for(i in 1:n) {lPtrAppend(listptr, i, i)} 
            listptr
        }   
)

结果:

Unit: milliseconds
              expr       min        lq       mean    median        uq       max neval cld
    env_with_list_  188.9023  198.7560  224.57632  223.2520  229.3854  282.5859     5  a 
                c_ 1275.3424 1869.1064 2022.20984 2191.7745 2283.1199 2491.7060     5   b
             list_   17.4916   18.1142   22.56752   19.8546   20.8191   36.5581     5  a 
          by_index  445.2970  479.9670  540.20398  576.9037  591.2366  607.6156     5  a 
           append_ 1140.8975 1316.3031 1794.10472 1620.1212 1855.3602 3037.8416     5   b
 env_as_container_  355.9655  360.1738  399.69186  376.8588  391.7945  513.6667     5  a

2015-02-20 12:22:39

我认为你要做的实际上是通过引用(指针)传递给函数——创建一个新的环境(通过引用传递给函数)，并添加列表:

listptr=new.env(parent=globalenv())
listptr$list=mylist

#Then the function is modified as:
lPtrAppend <- function(lstptr, obj) {
    lstptr$list[[length(lstptr$list)+1]] <- obj
}

现在您只是在修改现有的列表(而不是创建一个新的列表)

2011-08-24 16:02:11

如果你将列表变量作为一个带引号的字符串传入，你可以像这样从函数内部到达它:

push <- function(l, x) {
  assign(l, append(eval(as.name(l)), x), envir=parent.frame())
}

so:

> a <- list(1,2)
> a
[[1]]
[1] 1

[[2]]
[1] 2

> push("a", 3)
> a
[[1]]
[1] 1

[[2]]
[1] 2

[[3]]
[1] 3

>

或者为了获得额外的学分:

> v <- vector()
> push("v", 1)
> v
[1] 1
> push("v", 2)
> v
[1] 1 2
>

2010-03-13 02:30:07

在常数平摊时间O(1)中将一个对象追加到R中的列表?

推荐文章

最新文章

标签