如何在python中找到扩展名为.txt的目录中的所有文件?


当前回答

你可以使用glob:

import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
    print(file)

或者简单的os.listdir:

import os
for file in os.listdir("/mydir"):
    if file.endswith(".txt"):
        print(os.path.join("/mydir", file))

或者如果你想遍历目录,使用os.walk:

import os
for root, dirs, files in os.walk("/mydir"):
    for file in files:
        if file.endswith(".txt"):
             print(os.path.join(root, file))

其他回答

一个类似于ghostdog的复制粘贴解决方案:

def get_all_filepaths(root_path, ext):
    """
    Search all files which have a given extension within root_path.

    This ignores the case of the extension and searches subdirectories, too.

    Parameters
    ----------
    root_path : str
    ext : str

    Returns
    -------
    list of str

    Examples
    --------
    >>> get_all_filepaths('/run', '.lock')
    ['/run/unattended-upgrades.lock',
     '/run/mlocate.daily.lock',
     '/run/xtables.lock',
     '/run/mysqld/mysqld.sock.lock',
     '/run/postgresql/.s.PGSQL.5432.lock',
     '/run/network/.ifstate.lock',
     '/run/lock/asound.state.lock']
    """
    import os
    all_files = []
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.lower().endswith(ext):
                all_files.append(os.path.join(root, filename))
    return all_files

你也可以使用yield来创建一个生成器,从而避免组装完整的列表:

def get_all_filepaths(root_path, ext):
    import os
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.lower().endswith(ext):
                yield os.path.join(root, filename)

子目录的功能性解决方案:

from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk

print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))

Python v3.5 +

使用os的快速方法。递归函数中的Scandir。在文件夹和子文件夹中搜索具有指定扩展名的所有文件。它的速度非常快,甚至可以找到10,000个文件。

我还包含了一个将输出转换为Pandas数据框架的函数。

import os
import re
import pandas as pd
import numpy as np


def findFilesInFolderYield(path,  extension, containsTxt='', subFolders = True, excludeText = ''):
    """  Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)

    path:               Base directory to find files
    extension:          File extension to find.  e.g. 'txt'.  Regular expression. Or  'ls\d' to match ls1, ls2, ls3 etc
    containsTxt:        List of Strings, only finds file if it contains this text.  Ignore if '' (or blank)
    subFolders:         Bool.  If True, find files in all subfolders under path. If False, only searches files in the specified folder
    excludeText:        Text string.  Ignore if ''. Will exclude if text string is in path.
    """
    if type(containsTxt) == str: # if a string and not in a list
        containsTxt = [containsTxt]
    
    myregexobj = re.compile('\.' + extension + '$')    # Makes sure the file extension is at the end and is preceded by a .
    
    try:   # Trapping a OSError or FileNotFoundError:  File permissions problem I believe
        for entry in os.scandir(path):
            if entry.is_file() and myregexobj.search(entry.path): # 
    
                bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]
    
                if len(bools)== len(containsTxt):
                    yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path
    
            elif entry.is_dir() and subFolders:   # if its a directory, then repeat process as a nested function
                yield from findFilesInFolderYield(entry.path,  extension, containsTxt, subFolders)
    except OSError as ose:
        print('Cannot access ' + path +'. Probably a permissions error ', ose)
    except FileNotFoundError as fnf:
        print(path +' not found ', fnf)

def findFilesInFolderYieldandGetDf(path,  extension, containsTxt, subFolders = True, excludeText = ''):
    """  Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
    Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)

    path:               Base directory to find files
    extension:          File extension to find.  e.g. 'txt'.  Regular expression. Or  'ls\d' to match ls1, ls2, ls3 etc
    containsTxt:        List of Strings, only finds file if it contains this text.  Ignore if '' (or blank)
    subFolders:         Bool.  If True, find files in all subfolders under path. If False, only searches files in the specified folder
    excludeText:        Text string.  Ignore if ''. Will exclude if text string is in path.
    """
    
    fileSizes, accessTimes, modificationTimes, creationTimes , paths  = zip(*findFilesInFolderYield(path,  extension, containsTxt, subFolders))
    df = pd.DataFrame({
            'FLS_File_Size':fileSizes,
            'FLS_File_Access_Date':accessTimes,
            'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
            'FLS_File_Creation_Date':creationTimes,
            'FLS_File_PathName':paths,
                  })
    
    df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
    df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
    df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)

    return df

ext =   'txt'  # regular expression 
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path,  ext, containsTxt, subFolders = True)

使用Python OS模块查找具有特定扩展名的文件。

简单的例子如下:

import os

# This is the path where you want to search
path = r'd:'  

# this is extension you want to detect
extension = '.txt'   # this can be : .jpg  .png  .xls  .log .....

for root, dirs_list, files_list in os.walk(path):
    for file_name in files_list:
        if os.path.splitext(file_name)[-1] == extension:
            file_name_path = os.path.join(root, file_name)
            print file_name
            print file_name_path   # This is the full path of the filter file

我建议你使用fnmatch和上面的方法。通过这种方式,你可以找到以下任何一个:

Name.txt; Name.TXT; Name.Txt

.

import fnmatch
import os

    for file in os.listdir("/Users/Johnny/Desktop/MyTXTfolder"):
        if fnmatch.fnmatch(file.upper(), '*.TXT'):
            print(file)