在我重新发明这个特殊的轮子之前,有没有人有一个很好的用Python计算目录大小的例程?如果该例程能以Mb/Gb等格式格式化大小,那就太好了。
当前回答
不可否认,这有点像黑客,只适用于Unix/Linux。
它匹配du -sb。因为实际上这是一个Python bash包装器,它运行du -sb。命令。
import subprocess
def system_command(cmd):
""""Function executes cmd parameter as a bash command."""
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
stdout, stderr = p.communicate()
return stdout, stderr
size = int(system_command('du -sb . ')[0].split()[0])
其他回答
def recursive_dir_size(path):
size = 0
for x in os.listdir(path):
if not os.path.isdir(os.path.join(path,x)):
size += os.stat(os.path.join(path,x)).st_size
else:
size += recursive_dir_size(os.path.join(path,x))
return size
我写了这个函数,它给了我一个目录的准确总体大小,我尝试了其他for循环解决方案与os。行走,但我不知道为什么最终结果总是小于实际大小(在ubuntu 18 env)。我一定是做错了什么,但谁在乎写这个工作完美无缺。
这有点晚了,但只要安装了glob2和humanize,就行了。注意,在Python 3中,默认的iglob具有递归模式。如何修改Python 3的代码是留给读者的简单练习。
>>> import os
>>> from humanize import naturalsize
>>> from glob2 import iglob
>>> naturalsize(sum(os.path.getsize(x) for x in iglob('/var/**'))))
'546.2 MB'
python3.5 +
from pathlib import Path
def get_size(folder: str) -> int:
return sum(p.stat().st_size for p in Path(folder).rglob('*'))
用法::
In [6]: get_size('/etc/not-exist-path')
Out[6]: 0
In [7]: get_size('.')
Out[7]: 12038689
In [8]: def filesize(size: int) -> str:
...: for unit in ("B", "K", "M", "G", "T"):
...: if size < 1024:
...: break
...: size /= 1024
...: return f"{size:.1f}{unit}"
...:
In [9]: filesize(get_size('.'))
Out[9]: '11.5M'
当计算子目录的大小时,它应该更新其父目录的文件夹大小,这将一直进行下去,直到它到达根父目录。
下面的函数计算文件夹及其所有子文件夹的大小。
import os
def folder_size(path):
parent = {} # path to parent path mapper
folder_size = {} # storing the size of directories
folder = os.path.realpath(path)
for root, _, filenames in os.walk(folder):
if root == folder:
parent[root] = -1 # the root folder will not have any parent
folder_size[root] = 0.0 # intializing the size to 0
elif root not in parent:
immediate_parent_path = os.path.dirname(root) # extract the immediate parent of the subdirectory
parent[root] = immediate_parent_path # store the parent of the subdirectory
folder_size[root] = 0.0 # initialize the size to 0
total_size = 0
for filename in filenames:
filepath = os.path.join(root, filename)
total_size += os.stat(filepath).st_size # computing the size of the files under the directory
folder_size[root] = total_size # store the updated size
temp_path = root # for subdirectories, we need to update the size of the parent till the root parent
while parent[temp_path] != -1:
folder_size[parent[temp_path]] += total_size
temp_path = parent[temp_path]
return folder_size[folder]/1000000.0
下面的脚本打印指定目录的所有子目录的目录大小。它还试图(如果可能的话)从缓存递归函数的调用中获益。如果省略一个参数,脚本将在当前目录中工作。输出按目录大小从大到小排序。所以你可以根据自己的需要进行调整。
PS我已经使用配方578019以人性化的格式显示目录大小(http://code.activestate.com/recipes/578019/)
from __future__ import print_function
import os
import sys
import operator
def null_decorator(ob):
return ob
if sys.version_info >= (3,2,0):
import functools
my_cache_decorator = functools.lru_cache(maxsize=4096)
else:
my_cache_decorator = null_decorator
start_dir = os.path.normpath(os.path.abspath(sys.argv[1])) if len(sys.argv) > 1 else '.'
@my_cache_decorator
def get_dir_size(start_path = '.'):
total_size = 0
if 'scandir' in dir(os):
# using fast 'os.scandir' method (new in version 3.5)
for entry in os.scandir(start_path):
if entry.is_dir(follow_symlinks = False):
total_size += get_dir_size(entry.path)
elif entry.is_file(follow_symlinks = False):
total_size += entry.stat().st_size
else:
# using slow, but compatible 'os.listdir' method
for entry in os.listdir(start_path):
full_path = os.path.abspath(os.path.join(start_path, entry))
if os.path.isdir(full_path):
total_size += get_dir_size(full_path)
elif os.path.isfile(full_path):
total_size += os.path.getsize(full_path)
return total_size
def get_dir_size_walk(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size
def bytes2human(n, format='%(value).0f%(symbol)s', symbols='customary'):
"""
(c) http://code.activestate.com/recipes/578019/
Convert n bytes into a human readable string based on format.
symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
see: http://goo.gl/kTQMs
>>> bytes2human(0)
'0.0 B'
>>> bytes2human(0.9)
'0.0 B'
>>> bytes2human(1)
'1.0 B'
>>> bytes2human(1.9)
'1.0 B'
>>> bytes2human(1024)
'1.0 K'
>>> bytes2human(1048576)
'1.0 M'
>>> bytes2human(1099511627776127398123789121)
'909.5 Y'
>>> bytes2human(9856, symbols="customary")
'9.6 K'
>>> bytes2human(9856, symbols="customary_ext")
'9.6 kilo'
>>> bytes2human(9856, symbols="iec")
'9.6 Ki'
>>> bytes2human(9856, symbols="iec_ext")
'9.6 kibi'
>>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
'9.8 K/sec'
>>> # precision can be adjusted by playing with %f operator
>>> bytes2human(10000, format="%(value).5f %(symbol)s")
'9.76562 K'
"""
SYMBOLS = {
'customary' : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
'zetta', 'iotta'),
'iec' : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
'zebi', 'yobi'),
}
n = int(n)
if n < 0:
raise ValueError("n < 0")
symbols = SYMBOLS[symbols]
prefix = {}
for i, s in enumerate(symbols[1:]):
prefix[s] = 1 << (i+1)*10
for symbol in reversed(symbols[1:]):
if n >= prefix[symbol]:
value = float(n) / prefix[symbol]
return format % locals()
return format % dict(symbol=symbols[0], value=n)
############################################################
###
### main ()
###
############################################################
if __name__ == '__main__':
dir_tree = {}
### version, that uses 'slow' [os.walk method]
#get_size = get_dir_size_walk
### this recursive version can benefit from caching the function calls (functools.lru_cache)
get_size = get_dir_size
for root, dirs, files in os.walk(start_dir):
for d in dirs:
dir_path = os.path.join(root, d)
if os.path.isdir(dir_path):
dir_tree[dir_path] = get_size(dir_path)
for d, size in sorted(dir_tree.items(), key=operator.itemgetter(1), reverse=True):
print('%s\t%s' %(bytes2human(size, format='%(value).2f%(symbol)s'), d))
print('-' * 80)
if sys.version_info >= (3,2,0):
print(get_dir_size.cache_info())
样例输出:
37.61M .\subdir_b
2.18M .\subdir_a
2.17M .\subdir_a\subdir_a_2
4.41K .\subdir_a\subdir_a_1
----------------------------------------------------------
CacheInfo(hits=2, misses=4, maxsize=4096, currsize=4)
编辑:根据user2233949的建议,将null_decorator移到上面
推荐文章
- 如何在Flask-SQLAlchemy中按id删除记录
- 在Python中插入列表的第一个位置
- Python Pandas只合并某些列
- 如何在一行中连接两个集而不使用“|”
- 从字符串中移除前缀
- 代码结束时发出警报
- 如何在Python中按字母顺序排序字符串中的字母
- 在matplotlib中将y轴标签添加到次要y轴
- 如何消除数独方块的凹凸缺陷?
- 为什么出现这个UnboundLocalError(闭包)?
- 使用Python请求的异步请求
- 如何检查一个对象是否是python中的生成器对象?
- 如何从Python包内读取(静态)文件?
- 如何计算一个逻辑sigmoid函数在Python?
- python: SyntaxError: EOL扫描字符串文字