如何在python中找到扩展名为.txt的目录中的所有文件?
当前回答
你可以使用glob:
import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
print(file)
或者简单的os.listdir:
import os
for file in os.listdir("/mydir"):
if file.endswith(".txt"):
print(os.path.join("/mydir", file))
或者如果你想遍历目录,使用os.walk:
import os
for root, dirs, files in os.walk("/mydir"):
for file in files:
if file.endswith(".txt"):
print(os.path.join(root, file))
其他回答
要从同一个目录中名为“data”的文件夹中获取一个“。txt”文件名的数组,我通常使用以下简单的代码行:
import os
fileNames = [fileName for fileName in os.listdir("data") if fileName.endswith(".txt")]
import os
import sys
if len(sys.argv)==2:
print('no params')
sys.exit(1)
dir = sys.argv[1]
mask= sys.argv[2]
files = os.listdir(dir);
res = filter(lambda x: x.endswith(mask), files);
print res
我喜欢os.walk():
import os
for root, dirs, files in os.walk(dir):
for f in files:
if os.path.splitext(f)[1] == '.txt':
fullpath = os.path.join(root, f)
print(fullpath)
或者使用生成器:
import os
fileiter = (os.path.join(root, f)
for root, _, files in os.walk(dir)
for f in files)
txtfileiter = (f for f in fileiter if os.path.splitext(f)[1] == '.txt')
for txt in txtfileiter:
print(txt)
这样做是可行的:
>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']
Python v3.5 +
使用os的快速方法。递归函数中的Scandir。在文件夹和子文件夹中搜索具有指定扩展名的所有文件。它的速度非常快,甚至可以找到10,000个文件。
我还包含了一个将输出转换为Pandas数据框架的函数。
import os
import re
import pandas as pd
import numpy as np
def findFilesInFolderYield(path, extension, containsTxt='', subFolders = True, excludeText = ''):
""" Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
if type(containsTxt) == str: # if a string and not in a list
containsTxt = [containsTxt]
myregexobj = re.compile('\.' + extension + '$') # Makes sure the file extension is at the end and is preceded by a .
try: # Trapping a OSError or FileNotFoundError: File permissions problem I believe
for entry in os.scandir(path):
if entry.is_file() and myregexobj.search(entry.path): #
bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]
if len(bools)== len(containsTxt):
yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path
elif entry.is_dir() and subFolders: # if its a directory, then repeat process as a nested function
yield from findFilesInFolderYield(entry.path, extension, containsTxt, subFolders)
except OSError as ose:
print('Cannot access ' + path +'. Probably a permissions error ', ose)
except FileNotFoundError as fnf:
print(path +' not found ', fnf)
def findFilesInFolderYieldandGetDf(path, extension, containsTxt, subFolders = True, excludeText = ''):
""" Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
fileSizes, accessTimes, modificationTimes, creationTimes , paths = zip(*findFilesInFolderYield(path, extension, containsTxt, subFolders))
df = pd.DataFrame({
'FLS_File_Size':fileSizes,
'FLS_File_Access_Date':accessTimes,
'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
'FLS_File_Creation_Date':creationTimes,
'FLS_File_PathName':paths,
})
df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)
return df
ext = 'txt' # regular expression
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path, ext, containsTxt, subFolders = True)
推荐文章
- 如何在交互式Python中查看整个命令历史?
- 如何显示有两个小数点后的浮点数?
- 如何用OpenCV2.0和Python2.6调整图像大小
- 在每个列表元素上调用int()函数?
- 当使用代码存储库时,如何引用资源的相对路径
- 如何在Flask-SQLAlchemy中按id删除记录
- 在Python中插入列表的第一个位置
- Python Pandas只合并某些列
- 如何在一行中连接两个集而不使用“|”
- 从字符串中移除前缀
- 代码结束时发出警报
- 如何在Python中按字母顺序排序字符串中的字母
- 在matplotlib中将y轴标签添加到次要y轴
- 如何消除数独方块的凹凸缺陷?
- 为什么出现这个UnboundLocalError(闭包)?