在我重新发明这个特殊的轮子之前,有没有人有一个很好的用Python计算目录大小的例程?如果该例程能以Mb/Gb等格式格式化大小,那就太好了。
当前回答
到目前为止,建议的一些方法实现了递归,其他方法使用shell或不会生成格式整齐的结果。当您的代码对于Linux平台是一次性的,您可以像往常一样获得格式化,包括递归,作为一行程序。除了最后一行的输出,它将适用于当前版本的python2和python3:
du.py
-----
#!/usr/bin/python3
import subprocess
def du(path):
"""disk usage in human readable format (e.g. '2,1GB')"""
return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
if __name__ == "__main__":
print(du('.'))
简单,高效,将工作于文件和多级目录:
$ chmod 750 du.py
$ ./du.py
2,9M
其他回答
import os
def get_size(path):
total_size = 0
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
if os.path.exists(fp):
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size # in megabytes
谢谢monkut & troex!
Chris的回答很好,但可以通过使用set来检查已看到的目录来使其更加惯用,这也避免了对控制流使用异常:
def directory_size(path):
total_size = 0
seen = set()
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
try:
stat = os.stat(fp)
except OSError:
continue
if stat.st_ino in seen:
continue
seen.add(stat.st_ino)
total_size += stat.st_size
return total_size # size in bytes
使用pathlib,我想出了这个一行程序来获取文件夹的大小:
sum(file.stat().st_size for file in Path(folder).rglob('*'))
这是我为一个漂亮的格式化输出:
from pathlib import Path
def get_folder_size(folder):
return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*')))
class ByteSize(int):
_KB = 1024
_suffixes = 'B', 'KB', 'MB', 'GB', 'PB'
def __new__(cls, *args, **kwargs):
return super().__new__(cls, *args, **kwargs)
def __init__(self, *args, **kwargs):
self.bytes = self.B = int(self)
self.kilobytes = self.KB = self / self._KB**1
self.megabytes = self.MB = self / self._KB**2
self.gigabytes = self.GB = self / self._KB**3
self.petabytes = self.PB = self / self._KB**4
*suffixes, last = self._suffixes
suffix = next((
suffix
for suffix in suffixes
if 1 < getattr(self, suffix) < self._KB
), last)
self.readable = suffix, getattr(self, suffix)
super().__init__()
def __str__(self):
return self.__format__('.2f')
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, super().__repr__())
def __format__(self, format_spec):
suffix, val = self.readable
return '{val:{fmt}} {suf}'.format(val=val, fmt=format_spec, suf=suffix)
def __sub__(self, other):
return self.__class__(super().__sub__(other))
def __add__(self, other):
return self.__class__(super().__add__(other))
def __mul__(self, other):
return self.__class__(super().__mul__(other))
def __rsub__(self, other):
return self.__class__(super().__sub__(other))
def __radd__(self, other):
return self.__class__(super().__add__(other))
def __rmul__(self, other):
return self.__class__(super().__rmul__(other))
用法:
>>> size = get_folder_size("c:/users/tdavis/downloads")
>>> print(size)
5.81 GB
>>> size.GB
5.810891855508089
>>> size.gigabytes
5.810891855508089
>>> size.PB
0.005674699077644618
>>> size.MB
5950.353260040283
>>> size
ByteSize(6239397620)
我还遇到了这个问题,它有一些更紧凑、可能更高效的打印文件大小的策略。
import os
def get_size(path = os.getcwd()):
print("Calculating Size: ",path)
total_size = 0
#if path is directory--
if os.path.isdir(path):
print("Path type : Directory/Folder")
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
#if path is a file---
elif os.path.isfile(path):
print("Path type : File")
total_size=os.path.getsize(path)
else:
print("Path Type : Special File (Socket, FIFO, Device File)" )
total_size=0
bytesize=total_size
print(bytesize, 'bytes')
print(bytesize/(1024), 'kilobytes')
print(bytesize/(1024*1024), 'megabytes')
print(bytesize/(1024*1024*1024), 'gegabytes')
return total_size
x=get_size("/content/examples")
我相信这很有帮助!文件夹和文件!
问题的第二部分
def human(size):
B = "B"
KB = "KB"
MB = "MB"
GB = "GB"
TB = "TB"
UNITS = [B, KB, MB, GB, TB]
HUMANFMT = "%f %s"
HUMANRADIX = 1024.
for u in UNITS[:-1]:
if size < HUMANRADIX : return HUMANFMT % (size, u)
size /= HUMANRADIX
return HUMANFMT % (size, UNITS[-1])