如何在python中找到扩展名为.txt的目录中的所有文件?
当前回答
一个类似于ghostdog的复制粘贴解决方案:
def get_all_filepaths(root_path, ext):
"""
Search all files which have a given extension within root_path.
This ignores the case of the extension and searches subdirectories, too.
Parameters
----------
root_path : str
ext : str
Returns
-------
list of str
Examples
--------
>>> get_all_filepaths('/run', '.lock')
['/run/unattended-upgrades.lock',
'/run/mlocate.daily.lock',
'/run/xtables.lock',
'/run/mysqld/mysqld.sock.lock',
'/run/postgresql/.s.PGSQL.5432.lock',
'/run/network/.ifstate.lock',
'/run/lock/asound.state.lock']
"""
import os
all_files = []
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
all_files.append(os.path.join(root, filename))
return all_files
你也可以使用yield来创建一个生成器,从而避免组装完整的列表:
def get_all_filepaths(root_path, ext):
import os
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
yield os.path.join(root, filename)
其他回答
import os
import sys
if len(sys.argv)==2:
print('no params')
sys.exit(1)
dir = sys.argv[1]
mask= sys.argv[2]
files = os.listdir(dir);
res = filter(lambda x: x.endswith(mask), files);
print res
这样做是可行的:
>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']
Python v3.5 +
使用os的快速方法。递归函数中的Scandir。在文件夹和子文件夹中搜索具有指定扩展名的所有文件。它的速度非常快,甚至可以找到10,000个文件。
我还包含了一个将输出转换为Pandas数据框架的函数。
import os
import re
import pandas as pd
import numpy as np
def findFilesInFolderYield(path, extension, containsTxt='', subFolders = True, excludeText = ''):
""" Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
if type(containsTxt) == str: # if a string and not in a list
containsTxt = [containsTxt]
myregexobj = re.compile('\.' + extension + '$') # Makes sure the file extension is at the end and is preceded by a .
try: # Trapping a OSError or FileNotFoundError: File permissions problem I believe
for entry in os.scandir(path):
if entry.is_file() and myregexobj.search(entry.path): #
bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]
if len(bools)== len(containsTxt):
yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path
elif entry.is_dir() and subFolders: # if its a directory, then repeat process as a nested function
yield from findFilesInFolderYield(entry.path, extension, containsTxt, subFolders)
except OSError as ose:
print('Cannot access ' + path +'. Probably a permissions error ', ose)
except FileNotFoundError as fnf:
print(path +' not found ', fnf)
def findFilesInFolderYieldandGetDf(path, extension, containsTxt, subFolders = True, excludeText = ''):
""" Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
fileSizes, accessTimes, modificationTimes, creationTimes , paths = zip(*findFilesInFolderYield(path, extension, containsTxt, subFolders))
df = pd.DataFrame({
'FLS_File_Size':fileSizes,
'FLS_File_Access_Date':accessTimes,
'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
'FLS_File_Creation_Date':creationTimes,
'FLS_File_PathName':paths,
})
df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)
return df
ext = 'txt' # regular expression
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path, ext, containsTxt, subFolders = True)
子目录的功能性解决方案:
from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk
print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))
你可以简单地使用pathlibs glob 1:
import pathlib
list(pathlib.Path('your_directory').glob('*.txt'))
或在循环中:
for txt_file in pathlib.Path('your_directory').glob('*.txt'):
# do something with "txt_file"
如果你想递归你可以使用。glob('**/*.txt')
1 pathlib模块被包含在python 3.4的标准库中。但是你甚至可以在旧的Python版本(即使用conda或pip)上安装该模块的反向端口:pathlib和pathlib2。