从字节大小返回人类可读大小的函数:

>>> human_readable(2048)
'2 kilobytes'
>>>

如何做到这一点?


当前回答

下面是一个使用while的选项:

def number_format(n):
   n2, n3 = n, 0
   while n2 >= 1e3:
      n2 /= 1e3
      n3 += 1
   return '%.3f' % n2 + ('', ' k', ' M', ' G')[n3]

s = number_format(9012345678)
print(s == '9.012 G')

https://docs.python.org/reference/compound_stmts.html#while

其他回答

这个解决方案可能也会吸引你,这取决于你的思维方式:

from pathlib import Path    

def get_size(path = Path('.')):
    """ Gets file size, or total directory size """
    if path.is_file():
        size = path.stat().st_size
    elif path.is_dir():
        size = sum(file.stat().st_size for file in path.glob('*.*'))
    return size

def format_size(path, unit="MB"):
    """ Converts integers to common size units used in computing """
    bit_shift = {"B": 0,
            "kb": 7,
            "KB": 10,
            "mb": 17,
            "MB": 20,
            "gb": 27,
            "GB": 30,
            "TB": 40,}
    return "{:,.0f}".format(get_size(path) / float(1 << bit_shift[unit])) + " " + unit

# Tests and test results
>>> get_size("d:\\media\\bags of fun.avi")
'38 MB'
>>> get_size("d:\\media\\bags of fun.avi","KB")
'38,763 KB'
>>> get_size("d:\\media\\bags of fun.avi","kb")
'310,104 kb'

这是我的版本。它不使用for循环。它具有常数复杂度O(1),理论上比这里使用for循环的答案更有效。

from math import log
unit_list = zip(['bytes', 'kB', 'MB', 'GB', 'TB', 'PB'], [0, 0, 1, 2, 2, 2])
def sizeof_fmt(num):
    """Human friendly file size"""
    if num > 1:
        exponent = min(int(log(num, 1024)), len(unit_list) - 1)
        quotient = float(num) / 1024**exponent
        unit, num_decimals = unit_list[exponent]
        format_string = '{:.%sf} {}' % (num_decimals)
        return format_string.format(quotient, unit)
    if num == 0:
        return '0 bytes'
    if num == 1:
        return '1 byte'

为了更清楚地说明发生了什么,我们可以省略字符串格式化的代码。以下是真正起作用的台词:

exponent = int(log(num, 1024))
quotient = num / 1024**exponent
unit_list[exponent]

这将在几乎任何情况下做你需要做的事情,是可选参数自定义的,正如你所看到的,几乎是自文档化的:

from math import log
def pretty_size(n,pow=0,b=1024,u='B',pre=['']+[p+'i'for p in'KMGTPEZY']):
    pow,n=min(int(log(max(n*b**pow,1),b)),len(pre)-1),n*b**pow
    return "%%.%if %%s%%s"%abs(pow%(-pow-1))%(n/b**float(pow),pre[pow],u)

示例输出:

>>> pretty_size(42)
'42 B'

>>> pretty_size(2015)
'2.0 KiB'

>>> pretty_size(987654321)
'941.9 MiB'

>>> pretty_size(9876543210)
'9.2 GiB'

>>> pretty_size(0.5,pow=1)
'512 B'

>>> pretty_size(0)
'0 B'

高级定制:

>>> pretty_size(987654321,b=1000,u='bytes',pre=['','kilo','mega','giga'])
'987.7 megabytes'

>>> pretty_size(9876543210,b=1000,u='bytes',pre=['','kilo','mega','giga'])
'9.9 gigabytes'

此代码与Python 2和Python 3兼容。对读者来说,遵从PEP8是一个练习。记住,漂亮的是输出。

更新:

如果你需要数千个逗号,只需应用明显的扩展:

def prettier_size(n,pow=0,b=1024,u='B',pre=['']+[p+'i'for p in'KMGTPEZY']):
    r,f=min(int(log(max(n*b**pow,1),b)),len(pre)-1),'{:,.%if} %s%s'
    return (f%(abs(r%(-r-1)),pre[r],u)).format(n*b**pow/b**float(r))

例如:

>>> pretty_units(987654321098765432109876543210)
'816,968.5 YiB'

参考Sridhar Ratnakumar的回答,更新为:

def formatSize(sizeInBytes, decimalNum=1, isUnitWithI=False, sizeUnitSeperator=""):
  """format size to human readable string"""
  # https://en.wikipedia.org/wiki/Binary_prefix#Specific_units_of_IEC_60027-2_A.2_and_ISO.2FIEC_80000
  # K=kilo, M=mega, G=giga, T=tera, P=peta, E=exa, Z=zetta, Y=yotta
  sizeUnitList = ['','K','M','G','T','P','E','Z']
  largestUnit = 'Y'

  if isUnitWithI:
    sizeUnitListWithI = []
    for curIdx, eachUnit in enumerate(sizeUnitList):
      unitWithI = eachUnit
      if curIdx >= 1:
        unitWithI += 'i'
      sizeUnitListWithI.append(unitWithI)

    # sizeUnitListWithI = ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']
    sizeUnitList = sizeUnitListWithI

    largestUnit += 'i'

  suffix = "B"
  decimalFormat = "." + str(decimalNum) + "f" # ".1f"
  finalFormat = "%" + decimalFormat + sizeUnitSeperator + "%s%s" # "%.1f%s%s"
  sizeNum = sizeInBytes
  for sizeUnit in sizeUnitList:
      if abs(sizeNum) < 1024.0:
        return finalFormat % (sizeNum, sizeUnit, suffix)
      sizeNum /= 1024.0
  return finalFormat % (sizeNum, largestUnit, suffix)

示例输出如下:

def testKb():
  kbSize = 3746
  kbStr = formatSize(kbSize)
  print("%s -> %s" % (kbSize, kbStr))

def testI():
  iSize = 87533
  iStr = formatSize(iSize, isUnitWithI=True)
  print("%s -> %s" % (iSize, iStr))

def testSeparator():
  seperatorSize = 98654
  seperatorStr = formatSize(seperatorSize, sizeUnitSeperator=" ")
  print("%s -> %s" % (seperatorSize, seperatorStr))

def testBytes():
  bytesSize = 352
  bytesStr = formatSize(bytesSize)
  print("%s -> %s" % (bytesSize, bytesStr))

def testMb():
  mbSize = 76383285
  mbStr = formatSize(mbSize, decimalNum=2)
  print("%s -> %s" % (mbSize, mbStr))

def testTb():
  tbSize = 763832854988542
  tbStr = formatSize(tbSize, decimalNum=2)
  print("%s -> %s" % (tbSize, tbStr))

def testPb():
  pbSize = 763832854988542665
  pbStr = formatSize(pbSize, decimalNum=4)
  print("%s -> %s" % (pbSize, pbStr))


def demoFormatSize():
  testKb()
  testI()
  testSeparator()
  testBytes()
  testMb()
  testTb()
  testPb()

  # 3746 -> 3.7KB
  # 87533 -> 85.5KiB
  # 98654 -> 96.3 KB
  # 352 -> 352.0B
  # 76383285 -> 72.84MB
  # 763832854988542 -> 694.70TB
  # 763832854988542665 -> 678.4199PB

重复作为匆匆.filesize()替代方案提供的代码段,下面的代码段根据所使用的前缀给出不同的精度数字。它不像某些片段那样简洁,但我喜欢这样的结果。

def human_size(size_bytes):
    """
    format a size in bytes into a 'human' file size, e.g. bytes, KB, MB, GB, TB, PB
    Note that bytes/KB will be reported in whole numbers but MB and above will have greater precision
    e.g. 1 byte, 43 bytes, 443 KB, 4.3 MB, 4.43 GB, etc
    """
    if size_bytes == 1:
        # because I really hate unnecessary plurals
        return "1 byte"

    suffixes_table = [('bytes',0),('KB',0),('MB',1),('GB',2),('TB',2), ('PB',2)]

    num = float(size_bytes)
    for suffix, precision in suffixes_table:
        if num < 1024.0:
            break
        num /= 1024.0

    if precision == 0:
        formatted_size = "%d" % num
    else:
        formatted_size = str(round(num, ndigits=precision))

    return "%s %s" % (formatted_size, suffix)