是否有可能,使用Python,合并单独的PDF文件?

假设是这样,我需要进一步扩展它。我希望循环通过目录中的文件夹,并重复此过程。

我可能是得过其实了,但是否可以排除每个pdf文件中包含的一页(我的报告生成总是创建一个额外的空白页)。


当前回答

Giovanni G. PY以一种简单易用的方式(至少对我来说)给出了答案:

import os
from PyPDF2 import PdfFileMerger

def merge_pdfs(export_dir, input_dir, folder):
    current_dir = os.path.join(input_dir, folder)
    pdfs = os.listdir(current_dir)
    
    merger = PdfFileMerger()
    for pdf in pdfs:
        merger.append(open(os.path.join(current_dir, pdf), 'rb'))

    with open(os.path.join(export_dir, folder + ".pdf"), "wb") as fout:
        merger.write(fout)

export_dir = r"E:\Output"
input_dir = r"E:\Input"
folders = os.listdir(input_dir)
[merge_pdfs(export_dir, input_dir, folder) for folder in folders];

其他回答

http://pieceofpy.com/2009/03/05/concatenating-pdf-with-python/提供了一个解决方案。

类似的:

from pyPdf import PdfFileWriter, PdfFileReader

def append_pdf(input,output):
    [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)]

output = PdfFileWriter()

append_pdf(PdfFileReader(file("C:\\sample.pdf","rb")),output)
append_pdf(PdfFileReader(file("c:\\sample1.pdf","rb")),output)
append_pdf(PdfFileReader(file("c:\\sample2.pdf","rb")),output)
append_pdf(PdfFileReader(file("c:\\sample3.pdf","rb")),output)

output.write(file("c:\\combined.pdf","wb"))

------ 11月25日更新------

------似乎以上代码不再工作------

------请使用以下:------

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

merger = PdfFileMerger()

file_folder = "C:\\My Ducoments\\"

root, dirs, files = next(os.walk(file_folder))

for path, subdirs, files in os.walk(root):
    for f in files:
        if f.endswith(".pdf"):
            merger.append(file_folder + f)

merger.write(file_folder + "Economists-1.pdf")

它是可能的,使用Python,合并单独的PDF文件?

Yes.

下面的例子将一个文件夹中的所有文件合并为一个新的PDF文件:

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from argparse import ArgumentParser
from glob import glob
from pyPdf import PdfFileReader, PdfFileWriter
import os

def merge(path, output_filename):
    output = PdfFileWriter()

    for pdffile in glob(path + os.sep + '*.pdf'):
        if pdffile == output_filename:
            continue
        print("Parse '%s'" % pdffile)
        document = PdfFileReader(open(pdffile, 'rb'))
        for i in range(document.getNumPages()):
            output.addPage(document.getPage(i))

    print("Start writing '%s'" % output_filename)
    with open(output_filename, "wb") as f:
        output.write(f)

if __name__ == "__main__":
    parser = ArgumentParser()

    # Add more options if you like
    parser.add_argument("-o", "--output",
                        dest="output_filename",
                        default="merged.pdf",
                        help="write merged PDF to FILE",
                        metavar="FILE")
    parser.add_argument("-p", "--path",
                        dest="path",
                        default=".",
                        help="path of source PDF files")

    args = parser.parse_args()
    merge(args.path, args.output_filename)

合并目录下的所有pdf文件

把pdf文件放到目录下。启动程序。你会得到一个合并了所有pdf文件的pdf。

import os
from PyPDF2 import PdfMerger

x = [a for a in os.listdir() if a.endswith(".pdf")]

merger = PdfMerger()

for pdf in x:
    merger.append(open(pdf, 'rb'))

with open("result.pdf", "wb") as fout:
    merger.write(fout)

今天我该如何编写上面相同的代码呢

from glob import glob
from PyPDF2 import PdfMerger



def pdf_merge():
    ''' Merges all the pdf files in current directory '''
    merger = PdfMerger()
    allpdfs = [a for a in glob("*.pdf")]
    [merger.append(pdf) for pdf in allpdfs]
    with open("Merged_pdfs.pdf", "wb") as new_file:
        merger.write(new_file)


if __name__ == "__main__":
    pdf_merge()

使用字典以获得更大的灵活性(例如sort, dedup):

import os
from PyPDF2 import PdfFileMerger
# use dict to sort by filepath or filename
file_dict = {}
for subdir, dirs, files in os.walk("<dir>"):
    for file in files:
        filepath = subdir + os.sep + file
        # you can have multiple endswith
        if filepath.endswith((".pdf", ".PDF")):
            file_dict[file] = filepath
# use strict = False to ignore PdfReadError: Illegal character error
merger = PdfFileMerger(strict=False)

for k, v in file_dict.items():
    print(k, v)
    merger.append(v)

merger.write("combined_result.pdf")

Giovanni G. PY以一种简单易用的方式(至少对我来说)给出了答案:

import os
from PyPDF2 import PdfFileMerger

def merge_pdfs(export_dir, input_dir, folder):
    current_dir = os.path.join(input_dir, folder)
    pdfs = os.listdir(current_dir)
    
    merger = PdfFileMerger()
    for pdf in pdfs:
        merger.append(open(os.path.join(current_dir, pdf), 'rb'))

    with open(os.path.join(export_dir, folder + ".pdf"), "wb") as fout:
        merger.write(fout)

export_dir = r"E:\Output"
input_dir = r"E:\Input"
folders = os.listdir(input_dir)
[merge_pdfs(export_dir, input_dir, folder) for folder in folders];