在我重新发明这个特殊的轮子之前,有没有人有一个很好的用Python计算目录大小的例程?如果该例程能以Mb/Gb等格式格式化大小,那就太好了。


当前回答

使用库sh: du模块执行:

pip install sh

import sh
print( sh.du("-s", ".") )
91154728        .

如果您想传递asterix,请使用此处描述的glob。

要转换human readable中的值,请使用humanize:

pip install humanize

import humanize
print( humanize.naturalsize( 91157384 ) )
91.2 MB

其他回答

Python 3.6+递归文件夹/文件大小使用os.scandir。和@blakev的回答一样强大,但更短,采用EAFP python风格。

import os

def size(path, *, follow_symlinks=False):
    try:
        with os.scandir(path) as it:
            return sum(size(entry, follow_symlinks=follow_symlinks) for entry in it)
    except NotADirectoryError:
        return os.stat(path, follow_symlinks=follow_symlinks).st_size

下面的脚本打印指定目录的所有子目录的目录大小。它还试图(如果可能的话)从缓存递归函数的调用中获益。如果省略一个参数,脚本将在当前目录中工作。输出按目录大小从大到小排序。所以你可以根据自己的需要进行调整。

PS我已经使用配方578019以人性化的格式显示目录大小(http://code.activestate.com/recipes/578019/)

from __future__ import print_function
import os
import sys
import operator

def null_decorator(ob):
    return ob

if sys.version_info >= (3,2,0):
    import functools
    my_cache_decorator = functools.lru_cache(maxsize=4096)
else:
    my_cache_decorator = null_decorator

start_dir = os.path.normpath(os.path.abspath(sys.argv[1])) if len(sys.argv) > 1 else '.'

@my_cache_decorator
def get_dir_size(start_path = '.'):
    total_size = 0
    if 'scandir' in dir(os):
        # using fast 'os.scandir' method (new in version 3.5)
        for entry in os.scandir(start_path):
            if entry.is_dir(follow_symlinks = False):
                total_size += get_dir_size(entry.path)
            elif entry.is_file(follow_symlinks = False):
                total_size += entry.stat().st_size
    else:
        # using slow, but compatible 'os.listdir' method
        for entry in os.listdir(start_path):
            full_path = os.path.abspath(os.path.join(start_path, entry))
            if os.path.isdir(full_path):
                total_size += get_dir_size(full_path)
            elif os.path.isfile(full_path):
                total_size += os.path.getsize(full_path)
    return total_size

def get_dir_size_walk(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size

def bytes2human(n, format='%(value).0f%(symbol)s', symbols='customary'):
    """
    (c) http://code.activestate.com/recipes/578019/

    Convert n bytes into a human readable string based on format.
    symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
    see: http://goo.gl/kTQMs

      >>> bytes2human(0)
      '0.0 B'
      >>> bytes2human(0.9)
      '0.0 B'
      >>> bytes2human(1)
      '1.0 B'
      >>> bytes2human(1.9)
      '1.0 B'
      >>> bytes2human(1024)
      '1.0 K'
      >>> bytes2human(1048576)
      '1.0 M'
      >>> bytes2human(1099511627776127398123789121)
      '909.5 Y'

      >>> bytes2human(9856, symbols="customary")
      '9.6 K'
      >>> bytes2human(9856, symbols="customary_ext")
      '9.6 kilo'
      >>> bytes2human(9856, symbols="iec")
      '9.6 Ki'
      >>> bytes2human(9856, symbols="iec_ext")
      '9.6 kibi'

      >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
      '9.8 K/sec'

      >>> # precision can be adjusted by playing with %f operator
      >>> bytes2human(10000, format="%(value).5f %(symbol)s")
      '9.76562 K'
    """
    SYMBOLS = {
        'customary'     : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
        'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
                           'zetta', 'iotta'),
        'iec'           : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
        'iec_ext'       : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
                           'zebi', 'yobi'),
    }
    n = int(n)
    if n < 0:
        raise ValueError("n < 0")
    symbols = SYMBOLS[symbols]
    prefix = {}
    for i, s in enumerate(symbols[1:]):
        prefix[s] = 1 << (i+1)*10
    for symbol in reversed(symbols[1:]):
        if n >= prefix[symbol]:
            value = float(n) / prefix[symbol]
            return format % locals()
    return format % dict(symbol=symbols[0], value=n)

############################################################
###
###  main ()
###
############################################################
if __name__ == '__main__':
    dir_tree = {}
    ### version, that uses 'slow' [os.walk method]
    #get_size = get_dir_size_walk
    ### this recursive version can benefit from caching the function calls (functools.lru_cache)
    get_size = get_dir_size

    for root, dirs, files in os.walk(start_dir):
        for d in dirs:
            dir_path = os.path.join(root, d)
            if os.path.isdir(dir_path):
                dir_tree[dir_path] = get_size(dir_path)

    for d, size in sorted(dir_tree.items(), key=operator.itemgetter(1), reverse=True):
        print('%s\t%s' %(bytes2human(size, format='%(value).2f%(symbol)s'), d))

    print('-' * 80)
    if sys.version_info >= (3,2,0):
        print(get_dir_size.cache_info())

样例输出:

37.61M  .\subdir_b
2.18M   .\subdir_a
2.17M   .\subdir_a\subdir_a_2
4.41K   .\subdir_a\subdir_a_1
----------------------------------------------------------
CacheInfo(hits=2, misses=4, maxsize=4096, currsize=4)

编辑:根据user2233949的建议,将null_decorator移到上面

def recursive_dir_size(path):
    size = 0

    for x in os.listdir(path):
        if not os.path.isdir(os.path.join(path,x)):
            size += os.stat(os.path.join(path,x)).st_size
        else:
            size += recursive_dir_size(os.path.join(path,x))

    return size

我写了这个函数,它给了我一个目录的准确总体大小,我尝试了其他for循环解决方案与os。行走,但我不知道为什么最终结果总是小于实际大小(在ubuntu 18 env)。我一定是做错了什么,但谁在乎写这个工作完美无缺。

你说一句俏皮话… 这里有一句话:

sum([sum(map(lambda fname: os.path.getsize(os.path.join(directory, fname)), files)) for directory, folders, files in os.walk(path)])

尽管我可能会把它分开,它不执行检查。

要转换为kb,请参阅可重用库以获得人类可读版本的文件大小?然后算进去

接受的答案不考虑硬链接或软链接,并将这些文件计算两次。您可能希望跟踪已看到的inode,而不是为这些文件添加大小。

import os
def get_size(start_path='.'):
    total_size = 0
    seen = {}
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            try:
                stat = os.stat(fp)
            except OSError:
                continue

            try:
                seen[stat.st_ino]
            except KeyError:
                seen[stat.st_ino] = True
            else:
                continue

            total_size += stat.st_size

    return total_size

print get_size()