如何在Python中创建目录结构的zip存档?


您可能想看看zipfile模块;文档位于http://docs.python.org/library/zipfile.html.

您可能还需要os.walk()来索引目录结构。


正如其他人所指出的,您应该使用zipfile。文档告诉哪些函数可用,但并没有真正解释如何使用它们压缩整个目录。我认为用一些示例代码来解释是最简单的:

import os
import zipfile
    
def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), 
                       os.path.relpath(os.path.join(root, file), 
                                       os.path.join(path, '..')))

with zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir('tmp/', zipf)

要将mydirectory的内容添加到新的zip文件中,包括所有文件和子目录:

import os
import zipfile

zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
    zf.write(dirname)
    for filename in files:
        zf.write(os.path.join(dirname, filename))
zf.close()

要向生成的zip文件添加压缩,请查看此链接。

您需要更改:

zip = zipfile.ZipFile('Python.zip', 'w')

to

zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)

我对Mark Byers给出的代码做了一些修改。如果您有空目录,下面的函数也会添加它们。示例应该更清楚添加到zip的路径是什么。

#!/usr/bin/env python
import os
import zipfile

def addDirToZip(zipHandle, path, basePath=""):
    """
    Adding directory given by \a path to opened zip file \a zipHandle

    @param basePath path that will be removed from \a path when adding to archive

    Examples:
        # add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir')
        zipHandle.close()

        # add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir', 'dir')
        zipHandle.close()

        # add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')
        zipHandle.close()

        # add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir/subdir', 'dir')
        zipHandle.close()

        # add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir/subdir')
        zipHandle.close()

        # add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")
        zipHandle = zipfile.ZipFile('test.zip', 'w')
        addDirToZip(zipHandle, 'dir')
        addDirToZip(zipHandle, 'otherDir')
        zipHandle.close()
    """
    basePath = basePath.rstrip("\\/") + ""
    basePath = basePath.rstrip("\\/")
    for root, dirs, files in os.walk(path):
        # add dir itself (needed for empty dirs
        zipHandle.write(os.path.join(root, "."))
        # add files
        for file in files:
            filePath = os.path.join(root, file)
            inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")
            #print filePath + " , " + inZipPath
            zipHandle.write(filePath, inZipPath)

以上是一个简单的函数,适用于简单的情况。你可以在我的Gist中找到更优雅的课程:https://gist.github.com/Eccenux/17526123107ca0ac28e6


此函数将递归地压缩目录树,压缩文件,并在存档中记录正确的相对文件名。存档条目与zip-r output.zip source_dir生成的条目相同。

import os
import zipfile
def make_zipfile(output_filename, source_dir):
    relroot = os.path.abspath(os.path.join(source_dir, os.pardir))
    with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:
        for root, dirs, files in os.walk(source_dir):
            # add directory (needed for empty dirs)
            zip.write(root, os.path.relpath(root, relroot))
            for file in files:
                filename = os.path.join(root, file)
                if os.path.isfile(filename): # regular files only
                    arcname = os.path.join(os.path.relpath(root, relroot), file)
                    zip.write(filename, arcname)

下面是Nux给出的答案的一个变体,对我有用:

def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):
    basePath = os.path.split( srcPath )[ 0 ]
    for root, dirs, files in os.walk( srcPath ):
        p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )
        # add dir
        zipHandle.write( root, p, zipOperation )
        # add files
        for f in files:
            filePath = os.path.join( root, f )
            fileInZipPath = os.path.join( p, f )
            zipHandle.write( filePath, fileInZipPath, zipOperation )

最简单的方法是使用shutil.make_archive。它支持zip和tar格式。

import shutil
shutil.make_archive(output_filename, 'zip', dir_name)

如果您需要做一些比压缩整个目录更复杂的事情(例如跳过某些文件),那么您需要像其他人建议的那样深入到zipfile模块。


试试下面的一个。它对我有用。

import zipfile, os
zipf = "compress.zip"  
def main():
    directory = r"Filepath"
    toZip(directory)
def toZip(directory):
    zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )

    list = os.listdir(directory)
    for file_list in list:
        file_name = os.path.join(directory,file_list)

        if os.path.isfile(file_name):
            print file_name
            zippedHelp.write(file_name)
        else:
            addFolderToZip(zippedHelp,file_list,directory)
            print "---------------Directory Found-----------------------"
    zippedHelp.close()

def addFolderToZip(zippedHelp,folder,directory):
    path=os.path.join(directory,folder)
    print path
    file_list=os.listdir(path)
    for file_name in file_list:
        file_path=os.path.join(path,file_name)
        if os.path.isfile(file_path):
            zippedHelp.write(file_path)
        elif os.path.isdir(file_name):
            print "------------------sub directory found--------------------"
            addFolderToZip(zippedHelp,file_name,path)


if __name__=="__main__":
    main()

我还有另一个代码示例可能会有所帮助,使用python3、pathlib和zipfile。它应该可以在任何操作系统中工作。

from pathlib import Path
import zipfile
from datetime import datetime

DATE_FORMAT = '%y%m%d'


def date_str():
    """returns the today string year, month, day"""
    return '{}'.format(datetime.now().strftime(DATE_FORMAT))


def zip_name(path):
    """returns the zip filename as string"""
    cur_dir = Path(path).resolve()
    parent_dir = cur_dir.parents[0]
    zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
    p_zip = Path(zip_filename)
    n = 1
    while p_zip.exists():
        zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
                                             date_str(), n))
        p_zip = Path(zip_filename)
        n += 1
    return zip_filename


def all_files(path):
    """iterator returns all files and folders from path as absolute path string
    """
    for child in Path(path).iterdir():
        yield str(child)
        if child.is_dir():
            for grand_child in all_files(str(child)):
                yield str(Path(grand_child))


def zip_dir(path):
    """generate a zip"""
    zip_filename = zip_name(path)
    zip_file = zipfile.ZipFile(zip_filename, 'w')
    print('create:', zip_filename)
    for file in all_files(path):
        print('adding... ', file)
        zip_file.write(file)
    zip_file.close()


if __name__ == '__main__':
    zip_dir('.')
    print('end!')

如果您想要一个类似于任何通用图形文件管理器的压缩文件夹的功能,可以使用以下代码,它使用zipfile模块。使用这段代码,您将得到以路径为根文件夹的zip文件。

import os
import zipfile

def zipdir(path, ziph):
    # Iterate all the directories and files
    for root, dirs, files in os.walk(path):
        # Create a prefix variable with the folder structure inside the path folder. 
        # So if a file is at the path directory will be at the root directory of the zip file
        # so the prefix will be empty. If the file belongs to a containing folder of path folder 
        # then the prefix will be that folder.
        if root.replace(path,'') == '':
                prefix = ''
        else:
                # Keep the folder structure after the path folder, append a '/' at the end 
                # and remome the first character, if it is a '/' in order to have a path like 
                # folder1/folder2/file.txt
                prefix = root.replace(path, '') + '/'
                if (prefix[0] == '/'):
                        prefix = prefix[1:]
        for filename in files:
                actual_file_path = root + '/' + filename
                zipped_file_path = prefix + filename
                zipf.write( actual_file_path, zipped_file_path)


zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/tmp/justtest/', zipf)
zipf.close()

如何在Python中创建目录结构的zip存档?

在Python脚本中

在Python 2.7+中,shutil有一个make_archive函数。

from shutil import make_archive
make_archive(
  'zipfile_name', 
  'zip',           # the archive format - or tar, bztar, gztar 
  root_dir=None,   # root for archive - current working dir if None
  base_dir=None)   # start archiving from here - cwd if None too

在这里,压缩的归档文件将命名为zipfile_name.zip。如果base_dir距离root_dir更远,它将排除不在base_dir中的文件,但仍将父目录中的文件归档到root_dir。

我在Cygwin上用2.7测试这个时确实遇到了问题——它需要一个root_dir参数,用于cwd:

make_archive('zipfile_name', 'zip', root_dir='.')

从shell使用Python

您也可以使用zipfile模块从shell中使用Python执行此操作:

$ python -m zipfile -c zipname sourcedir

其中zipname是所需目标文件的名称(如果需要,请添加.zip,它不会自动执行),sourcedir是目录的路径。

压缩Python(或者只是不需要父目录):

如果您试图用__init__.py和__main__.py压缩一个python包,并且不需要父目录

$ python -m zipfile -c zipname sourcedir/*

And

$ python zipname

将运行程序包。(请注意,不能将子包作为压缩存档的入口点运行。)

压缩Python应用程序:

如果您有python3.5+,并且特别想压缩Python包,请使用ziapp:

$ python -m zipapp myapp
$ python myapp.pyz

这是一种现代方法,使用pathlib和上下文管理器。将文件直接放在zip文件中,而不是放在子文件夹中。

def zip_dir(filename: str, dir_to_zip: pathlib.Path):
    with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Use glob instead of iterdir(), to cover all subdirectories.
        for directory in dir_to_zip.glob('**'):
            for file in directory.iterdir():
                if not file.is_file():
                    continue
                # Strip the first component, so we don't create an uneeded subdirectory
                # containing everything.
                zip_path = pathlib.Path(*file.parts[1:])
                # Use a string, since zipfile doesn't support pathlib  directly.
                zipf.write(str(file), str(zip_path))

我通过将Mark Byers的解决方案与Reimund和Morten Zilmer的评论(相对路径和包括空目录)合并,准备了一个函数。作为最佳实践,在ZipFile的文件构造中使用。

该函数还准备一个带有压缩目录名和“.zip”扩展名的默认zip文件名。因此,它只使用一个参数:要压缩的源目录。

import os
import zipfile

def zip_dir(path_dir, path_file_zip=''):
if not path_file_zip:
    path_file_zip = os.path.join(
        os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')
with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:
    for root, dirs, files in os.walk(path_dir):
        for file_or_dir in files + dirs:
            zip_file.write(
                os.path.join(root, file_or_dir),
                os.path.relpath(os.path.join(root, file_or_dir),
                                os.path.join(path_dir, os.path.pardir)))

现代Python(3.6+)使用pathlib模块对路径进行类似OOP的简洁处理,使用pathlib.Path.rglob()进行递归globing。据我所知,这相当于乔治·V·莱利的回答:压缩压缩,最顶层的元素是目录,保持空目录,使用相对路径。

from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile

from os import PathLike
from typing import Union


def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):
    src_path = Path(source_dir).expanduser().resolve(strict=True)
    with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:
        for file in src_path.rglob('*'):
            zf.write(file, file.relative_to(src_path.parent))

注意:如可选类型提示所示,zip_name不能是Path对象(将在3.6.2+中修复)。


# import required python modules
# You have to install zipfile package using pip install

import os,zipfile

# Change the directory where you want your new zip file to be

os.chdir('Type your destination')

# Create a new zipfile ( I called it myfile )

zf = zipfile.ZipFile('myfile.zip','w')

# os.walk gives a directory tree. Access the files using a for loop

for dirnames,folders,files in os.walk('Type your directory'):
    zf.write('Type your Directory')
    for file in files:
        zf.write(os.path.join('Type your directory',file))

好吧,在阅读了这些建议之后,我想出了一个非常类似的方法,它可以在2.7.x中工作,而不会创建“有趣”的目录名(绝对类似的名称),并且只会在zip中创建指定的文件夹。

或者只是为了以防万一,您需要在zip中包含一个包含所选目录内容的文件夹。

def zipDir( path, ziph ) :
 """
 Inserts directory (path) into zipfile instance (ziph)
 """
 for root, dirs, files in os.walk( path ) :
  for file in files :
   ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )

def makeZip( pathToFolder ) :
 """
 Creates a zip file with the specified folder
 """
 zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
 zipDir( pathToFolder, zipf )
 zipf.close()
 print( "Zip file saved to: " + pathToFolder)

makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )

要提供更大的灵活性,例如按名称选择目录/文件,请使用:

import os
import zipfile

def zipall(ob, path, rel=""):
    basename = os.path.basename(path)
    if os.path.isdir(path):
        if rel == "":
            rel = basename
        ob.write(path, os.path.join(rel))
        for root, dirs, files in os.walk(path):
            for d in dirs:
                zipall(ob, os.path.join(root, d), os.path.join(rel, d))
            for f in files:
                ob.write(os.path.join(root, f), os.path.join(rel, f))
            break
    elif os.path.isfile(path):
        ob.write(path, os.path.join(rel, basename))
    else:
        pass

对于文件树:

.
├── dir
│   ├── dir2
│   │   └── file2.txt
│   ├── dir3
│   │   └── file3.txt
│   └── file.txt
├── dir4
│   ├── dir5
│   └── file4.txt
├── listdir.zip
├── main.py
├── root.txt
└── selective.zip

例如,您可以只选择dir4和root.txt:

cwd = os.getcwd()
files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]

with zipfile.ZipFile("selective.zip", "w" ) as myzip:
    for f in files:
        zipall(myzip, f)

或者只需在脚本调用目录中列出目录,然后从中添加所有内容:

with zipfile.ZipFile("listdir.zip", "w" ) as myzip:
    for f in os.listdir():
        if f == "listdir.zip":
            # Creating a listdir.zip in the same directory
            # will include listdir.zip inside itself, beware of this
            continue
        zipall(myzip, f)

使用shutil,它是python标准库集的一部分。使用shutil非常简单(参见下面的代码):

第一个参数:结果zip/tar文件的文件名,第二个参数:zip/tar,第三个参数:目录名

代码:

import shutil
shutil.make_archive('/home/user/Desktop/Filename','zip','/home/username/Desktop/Directory')

函数创建zip文件。

def CREATEZIPFILE(zipname, path):
    #function to create a zip file
    #Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file

    zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)
    zipf.setpassword(b"password") #if you want to set password to zipfile

    #checks if the path is file or directory
    if os.path.isdir(path):
        for files in os.listdir(path):
            zipf.write(os.path.join(path, files), files)

    elif os.path.isfile(path):
        zipf.write(os.path.join(path), path)
    zipf.close()

假设要压缩当前目录中的所有文件夹(子目录)。

for root, dirs, files in os.walk("."):
    for sub_dir in dirs:
        zip_you_want = sub_dir+".zip"
        zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)
        zip_process.write(file_you_want_to_include)
        zip_process.close()

        print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))

要保留要归档的父目录下的文件夹层次结构,请执行以下操作:

import glob
import os
import zipfile

with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:
    for fp in glob(os.path.join(parent, "**/*")):
        base = os.path.commonpath([parent, fp])
        zipf.write(fp, arcname=fp.replace(base, ""))

如果需要,可以将其更改为使用pathlib进行文件globbing。


这里有这么多答案,我希望我可以贡献我自己的版本,它基于原始答案(顺便提一下),但具有更图形化的视角,也为每个zipfile设置使用上下文并对os.walk()进行排序,以便获得有序的输出。

有了这些文件夹和文件(以及其他文件夹),我想为每个cap_文件夹创建一个.zip:

$ tree -d
.
├── cap_01
|    ├── 0101000001.json
|    ├── 0101000002.json
|    ├── 0101000003.json
|
├── cap_02
|    ├── 0201000001.json
|    ├── 0201000002.json
|    ├── 0201001003.json
|
├── cap_03
|    ├── 0301000001.json
|    ├── 0301000002.json
|    ├── 0301000003.json
| 
├── docs
|    ├── map.txt
|    ├── main_data.xml
|
├── core_files
     ├── core_master
     ├── core_slave

以下是我应用的内容,并附有评论,以更好地理解流程。

$ cat zip_cap_dirs.py 
""" Zip 'cap_*' directories. """           
import os                                                                       
import zipfile as zf                                                            


for root, dirs, files in sorted(os.walk('.')):                                                                                               
    if 'cap_' in root:                                                          
        print(f"Compressing: {root}")                                           
        # Defining .zip name, according to Capítulo.                            
        cap_dir_zip = '{}.zip'.format(root)                                     
        # Opening zipfile context for current root dir.                         
        with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:          
            # Iterating over os.walk list of files for the current root dir.    
            for f in files:                                                     
                # Defining relative path to files from current root dir.        
                f_path = os.path.join(root, f)                                  
                # Writing the file on the .zip file of the context              
                new_zip.write(f_path) 

基本上,对于os.walk(路径)上的每一次迭代,我都会打开一个用于zipfile设置的上下文,然后对文件进行迭代,这是根目录中的文件列表,根据当前根目录形成每个文件的相对路径,并附加到正在运行的zipfile上下文。

输出如下所示:

$ python3 zip_cap_dirs.py
Compressing: ./cap_01
Compressing: ./cap_02
Compressing: ./cap_03

要查看每个.zip目录的内容,可以使用less命令:

$ less cap_01.zip

Archive:  cap_01.zip
 Length   Method    Size  Cmpr    Date    Time   CRC-32   Name
--------  ------  ------- ---- ---------- ----- --------  ----
  22017  Defl:N     2471  89% 2019-09-05 08:05 7a3b5ec6  cap_01/0101000001.json
  21998  Defl:N     2471  89% 2019-09-05 08:05 155bece7  cap_01/0101000002.json
  23236  Defl:N     2573  89% 2019-09-05 08:05 55fced20  cap_01/0101000003.json
--------          ------- ---                           -------
  67251             7515  89%                            3 files

压缩文件或树(目录及其子目录)。

from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED

def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):
    with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:
        paths = [Path(tree_path)]
        while paths:
            p = paths.pop()
            if p.is_dir():
                paths.extend(p.iterdir())
                if skip_empty_dir:
                    continue
            zf.write(p)

要附加到现有存档,请传递mode='a',以创建新的存档mode='w'(上面的默认值)。因此,假设您希望将3个不同的目录树捆绑在同一归档文件下。

make_zip(path_to_tree1, path_to_arch, mode='w')
make_zip(path_to_tree2, path_to_arch, mode='a')
make_zip(path_to_file3, path_to_arch, mode='a')

使用pathlib.Path的解决方案,它独立于所使用的操作系统:

import zipfile
from pathlib import Path

def zip_dir(path: Path, zip_file_path: Path):
    """Zip all contents of path to zip_file"""
    files_to_zip = [
        file for file in path.glob('*') if file.is_file()]
    with zipfile.ZipFile(
        zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:
        for file in files_to_zip:
            print(file.name)
            zip_f.write(file, file.name)

current_dir = Path.cwd()  
zip_dir = current_dir / "test"
tools.zip_dir(
    zip_dir, current_dir / 'Zipped_dir.zip')

显而易见的方法是使用shutil,就像第二个顶级答案所说的那样,但如果出于某种原因,您仍然希望使用ZipFile,并且如果您在执行此操作时遇到一些问题(如Windows等中的ERR 13),您可以使用此修复程序:

import os
import zipfile

def retrieve_file_paths(dirName):
  filePaths = []
  for root, directories, files in os.walk(dirName):
    for filename in files:
        filePath = os.path.join(root, filename)
        filePaths.append(filePath)
  return filePaths
 
def main(dir_name, output_filename):
  filePaths = retrieve_file_paths(dir_name)
   
  zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')
  with zip_file:
    for file in filePaths:
      zip_file.write(file)

main("my_dir", "my_dir_archived")

该方法递归地遍历给定文件夹中的每个子文件夹/文件,并将它们写入zip文件,而不是尝试直接压缩文件夹。


使用python3.9、pathlib&zipfile模块,您可以从系统中的任何位置创建zip文件。

def zip_dir(dir: Union[Path, str], filename: Union[Path, str]):
    """Zip the provided directory without navigating to that directory using `pathlib` module"""

    # Convert to Path object
    dir = Path(dir)

    with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zip_file:
        for entry in dir.rglob("*"):
            zip_file.write(entry, entry.relative_to(dir))

它整洁、打字,代码更少。


前面的答案完全忽略了一点,即当您在Windows上运行代码时,使用os.path.join()可以很容易地返回POSIX不兼容的路径。当使用Linux上的任何常用归档软件处理文件时,生成的归档文件将包含名称中带有反斜杠的文件,这不是您想要的。请改用path.as_posix()作为arcname参数!

import zipfile
from pathlib import Path
with zipfile.ZipFile("archive.zip", "w", zipfile.ZIP_DEFLATED) as zf:
    for path in Path("include_all_of_this_folder").rglob("*"):
        zf.write(path, path.as_posix())