your programing

Python에서 확장자가 .txt 인 디렉토리의 모든 파일 찾기

lovepro 2020. 9. 27. 13:34
반응형

Python에서 확장자가 .txt 인 디렉토리의 모든 파일 찾기


.txt파이썬으로 확장자 가진 디렉토리의 모든 파일을 어떻게 찾을 수 있습니까?


다음을 사용할 수 있습니다 glob.

import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
    print(file)

또는 간단히 os.listdir:

import os
for file in os.listdir("/mydir"):
    if file.endswith(".txt"):
        print(os.path.join("/mydir", file))

또는 디렉토리를 탐색하려면 os.walk다음을 사용하십시오 .

import os
for root, dirs, files in os.walk("/mydir"):
    for file in files:
        if file.endswith(".txt"):
             print(os.path.join(root, file))

glob을 사용하십시오 .

>>> import glob
>>> glob.glob('./*.txt')
['./outline.txt', './pip-log.txt', './test.txt', './testingvim.txt']

그런 일이 일을해야합니다

for root, dirs, files in os.walk(directory):
    for file in files:
        if file.endswith('.txt'):
            print file

다음과 같이 작동합니다.

>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']

import os

path = 'mypath/path' 
files = os.listdir(path)

files_txt = [i for i in files if i.endswith('.txt')]

나는 os.walk () 좋아 한다 .

import os, os.path

for root, dirs, files in os.walk(dir):
    for f in files:
        fullpath = os.path.join(root, f)
        if os.path.splitext(fullpath)[1] == '.txt':
            print fullpath

또는 발전기 사용 :

import os, os.path

fileiter = (os.path.join(root, f)
    for root, _, files in os.walk(dir)
    for f in files)
txtfileiter = (f for f in fileiter if os.path.splitext(f)[1] == '.txt')
for txt in txtfileiter:
    print txt

간단히 pathlibs 1을 사용할 수 있습니다 .glob

import pathlib

list(pathlib.Path('your_directory').glob('*.txt'))

또는 루프에서 :

for txt_file in pathlib.Path('your_directory').glob('*.txt'):
    # do something with "txt_file"

재귀를 원한다면 사용할 수 있습니다. .glob('**/*.txt)


1pathlib 모듈 파이썬 3.4 표준 라이브러리에 포함시켰다. 그러나 이전 Python 버전 (예 : conda또는 사용 pip) 에서도 해당 모듈의 백 포트를 설치할 수 있습니다 . pathlibpathlib2.


약간 다른 결과를 생성하는 동일한 버전이 더 있습니다.

glob.iglob ()

import glob
for f in glob.iglob("/mydir/*/*.txt"): # generator, search immediate subdirectories 
    print f

glob.glob1 ()

print glob.glob1("/mydir", "*.tx?")  # literal_directory, basename_pattern

fnmatch.filter ()

import fnmatch, os
print fnmatch.filter(os.listdir("/mydir"), "*.tx?") # include dot-files

path.py는 또 다른 대안입니다 : https://github.com/jaraco/path.py

from path import path
p = path('/path/to/the/directory')
for f in p.files(pattern='*.txt'):
    print f

Python에는이를위한 모든 도구가 있습니다.

import os

the_dir = 'the_dir_that_want_to_search_in'
all_txt_files = filter(lambda x: x.endswith('.txt'), os.listdir(the_dir))

Pythonic 방식으로 'dataPath'폴더 내의 모든 '.txt'파일 이름을 목록으로 가져 오려면

from os import listdir
from os.path import isfile, join
path = "/dataPath/"
onlyTxtFiles = [f for f in listdir(path) if isfile(join(path, f)) and  f.endswith(".txt")]
print onlyTxtFiles

Python v3.5 이상

재귀 함수에서 os.scandir을 사용하는 빠른 방법. 폴더 및 하위 폴더에서 지정된 확장자를 가진 모든 파일을 검색합니다.

import os

def findFilesInFolder(path, pathList, extension, subFolders = True):
    """  Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)

    path:        Base directory to find files
    pathList:    A list that stores all paths
    extension:   File extension to find
    subFolders:  Bool.  If True, find files in all subfolders under path. If False, only searches files in the specified folder
    """

    try:   # Trapping a OSError:  File permissions problem I believe
        for entry in os.scandir(path):
            if entry.is_file() and entry.path.endswith(extension):
                pathList.append(entry.path)
            elif entry.is_dir() and subFolders:   # if its a directory, then repeat process as a nested function
                pathList = findFilesInFolder(entry.path, pathList, extension, subFolders)
    except OSError:
        print('Cannot access ' + path +'. Probably a permissions error')

    return pathList

dir_name = r'J:\myDirectory'
extension = ".txt"

pathList = []
pathList = findFilesInFolder(dir_name, pathList, extension, True)

2019 년 4 월 업데이트

10,000 개의 파일이 포함 된 디렉토리를 검색하는 경우 목록에 추가하는 것은 비효율적입니다. 결과를 '수확'하는 것이 더 나은 솔루션입니다. 출력을 Pandas Dataframe으로 변환하는 함수도 포함했습니다.

import os
import re
import pandas as pd
import numpy as np


def findFilesInFolderYield(path,  extension, containsTxt='', subFolders = True, excludeText = ''):
    """  Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)

    path:               Base directory to find files
    extension:          File extension to find.  e.g. 'txt'.  Regular expression. Or  'ls\d' to match ls1, ls2, ls3 etc
    containsTxt:        List of Strings, only finds file if it contains this text.  Ignore if '' (or blank)
    subFolders:         Bool.  If True, find files in all subfolders under path. If False, only searches files in the specified folder
    excludeText:        Text string.  Ignore if ''. Will exclude if text string is in path.
    """
    if type(containsTxt) == str: # if a string and not in a list
        containsTxt = [containsTxt]

    myregexobj = re.compile('\.' + extension + '$')    # Makes sure the file extension is at the end and is preceded by a .

    try:   # Trapping a OSError or FileNotFoundError:  File permissions problem I believe
        for entry in os.scandir(path):
            if entry.is_file() and myregexobj.search(entry.path): # 

                bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]

                if len(bools)== len(containsTxt):
                    yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path

            elif entry.is_dir() and subFolders:   # if its a directory, then repeat process as a nested function
                yield from findFilesInFolderYield(entry.path,  extension, containsTxt, subFolders)
    except OSError as ose:
        print('Cannot access ' + path +'. Probably a permissions error ', ose)
    except FileNotFoundError as fnf:
        print(path +' not found ', fnf)

def findFilesInFolderYieldandGetDf(path,  extension, containsTxt, subFolders = True, excludeText = ''):
    """  Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
    Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)

    path:               Base directory to find files
    extension:          File extension to find.  e.g. 'txt'.  Regular expression. Or  'ls\d' to match ls1, ls2, ls3 etc
    containsTxt:        List of Strings, only finds file if it contains this text.  Ignore if '' (or blank)
    subFolders:         Bool.  If True, find files in all subfolders under path. If False, only searches files in the specified folder
    excludeText:        Text string.  Ignore if ''. Will exclude if text string is in path.
    """

    fileSizes, accessTimes, modificationTimes, creationTimes , paths  = zip(*findFilesInFolderYield(path,  extension, containsTxt, subFolders))
    df = pd.DataFrame({
            'FLS_File_Size':fileSizes,
            'FLS_File_Access_Date':accessTimes,
            'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
            'FLS_File_Creation_Date':creationTimes,
            'FLS_File_PathName':paths,
                  })

    df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
    df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
    df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)

    return df

ext =   'txt'  # regular expression 
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path,  ext, containsTxt, subFolders = True)

import os
import sys 

if len(sys.argv)==2:
    print('no params')
    sys.exit(1)

dir = sys.argv[1]
mask= sys.argv[2]

files = os.listdir(dir); 

res = filter(lambda x: x.endswith(mask), files); 

print res

특정 확장자를 가진 파일에 대한 전체 파일 경로 목록을 얻기 위해 하나의 폴더에 대해 가장 빠른 솔루션 (하위 디렉터리 없음)을 확인하기 위해 테스트 (Python 3.6.4, W7x64)를 수행했습니다.

간단히 말해서,이 작업 os.listdir()은 가장 빠르며 차선책보다 1.7 배 빠릅니다. os.walk()(휴식 포함!), 2.7 pathlib배 빠르다, 3.2 배 빠르다 os.scandir(), 3.3 배 빠르다 glob.
재귀 결과가 필요할 때 이러한 결과가 변경된다는 점을 명심하십시오. 아래의 방법 중 하나를 복사 / 붙여 넣기하는 경우 .lower ()를 추가하십시오. 그렇지 않으면 .ext를 검색 할 때 .EXT를 찾을 수 없습니다.

import os
import pathlib
import timeit
import glob

def a():
    path = pathlib.Path().cwd()
    list_sqlite_files = [str(f) for f in path.glob("*.sqlite")]

def b(): 
    path = os.getcwd()
    list_sqlite_files = [f.path for f in os.scandir(path) if os.path.splitext(f)[1] == ".sqlite"]

def c():
    path = os.getcwd()
    list_sqlite_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".sqlite")]

def d():
    path = os.getcwd()
    os.chdir(path)
    list_sqlite_files = [os.path.join(path, f) for f in glob.glob("*.sqlite")]

def e():
    path = os.getcwd()
    list_sqlite_files = [os.path.join(path, f) for f in glob.glob1(str(path), "*.sqlite")]

def f():
    path = os.getcwd()
    list_sqlite_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".sqlite"):
                list_sqlite_files.append( os.path.join(root, file) )
        break



print(timeit.timeit(a, number=1000))
print(timeit.timeit(b, number=1000))
print(timeit.timeit(c, number=1000))
print(timeit.timeit(d, number=1000))
print(timeit.timeit(e, number=1000))
print(timeit.timeit(f, number=1000))

결과 :

# Python 3.6.4
0.431
0.515
0.161
0.548
0.537
0.274

이것을 시도하면 모든 파일을 재귀 적으로 찾을 수 있습니다.

import glob, os
os.chdir("H:\\wallpaper")# use whatever you directory 

#double\\ no single \

for file in glob.glob("**/*.psd", recursive = True):#your format
    print(file)

이 코드는 내 삶을 더 간단하게 만듭니다.

import os
fnames = ([file for root, dirs, files in os.walk(dir)
    for file in files
    if file.endswith('.txt') #or file.endswith('.png') or file.endswith('.pdf')
    ])
for fname in fnames: print(fname)

fnmatch 사용 : https://docs.python.org/2/library/fnmatch.html

import fnmatch
import os

for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*.txt'):
        print file

fnmatch 와 상위 방법 을 사용하는 것이 좋습니다 . 이 방법으로 다음 중 하나를 찾을 수 있습니다.

  1. 이름. txt ;
  2. 이름. TXT ;
  3. 이름. TXT

.

import fnmatch
import os

    for file in os.listdir("/Users/Johnny/Desktop/MyTXTfolder"):
        if fnmatch.fnmatch(file.upper(), '*.TXT'):
            print(file)

같은 디렉토리에있는 "data"라는 폴더에서 ".txt"파일 이름 배열을 얻으려면 일반적으로 다음과 같은 간단한 코드 줄을 사용합니다.

import os
fileNames = [fileName for fileName in os.listdir("data") if fileName.endswith(".txt")]

하위 디렉터리가있는 기능 솔루션 :

from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk

print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))

폴더에 많은 파일이 포함되어 있거나 메모리가 제약 인 경우 생성기를 사용하는 것이 좋습니다.

def yield_files_with_extensions(folder_path, file_extension):
   for _, _, files in os.walk(folder_path):
       for file in files:
           if file.endswith(file_extension):
               yield file

옵션 A : 반복

for f in yield_files_with_extensions('.', '.txt'): 
    print(f)

옵션 B : 모두 가져 오기

files = [f for f in yield_files_with_extensions('.', '.txt')]

ghostdog과 유사한 복사-붙여 넣기 가능한 솔루션 :

def get_all_filepaths(root_path, ext):
    """
    Search all files which have a given extension within root_path.

    This ignores the case of the extension and searches subdirectories, too.

    Parameters
    ----------
    root_path : str
    ext : str

    Returns
    -------
    list of str

    Examples
    --------
    >>> get_all_filepaths('/run', '.lock')
    ['/run/unattended-upgrades.lock',
     '/run/mlocate.daily.lock',
     '/run/xtables.lock',
     '/run/mysqld/mysqld.sock.lock',
     '/run/postgresql/.s.PGSQL.5432.lock',
     '/run/network/.ifstate.lock',
     '/run/lock/asound.state.lock']
    """
    import os
    all_files = []
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.lower().endswith(ext):
                all_files.append(os.path.join(root, filename))
    return all_files

여기에 extend()

types = ('*.jpg', '*.png')
images_list = []
for files in types:
    images_list.extend(glob.glob(os.path.join(path, files)))

Python OS 모듈을 사용 하여 특정 확장자를 가진 파일을 찾습니다.

간단한 예는 다음과 같습니다.

import os

# This is the path where you want to search
path = r'd:'  

# this is extension you want to detect
extension = '.txt'   # this can be : .jpg  .png  .xls  .log .....

for root, dirs_list, files_list in os.walk(path):
    for file_name in files_list:
        if os.path.splitext(file_name)[-1] == extension:
            file_name_path = os.path.join(root, file_name)
            print file_name
            print file_name_path   # This is the full path of the filter file

Many users have replied with os.walk answers, which includes all files but also all directories and subdirectories and their files.

import os


def files_in_dir(path, extension=''):
    """
       Generator: yields all of the files in <path> ending with
       <extension>

       \param   path       Absolute or relative path to inspect,
       \param   extension  [optional] Only yield files matching this,

       \yield              [filenames]
    """


    for _, dirs, files in os.walk(path):
        dirs[:] = []  # do not recurse directories.
        yield from [f for f in files if f.endswith(extension)]

# Example: print all the .py files in './python'
for filename in files_in_dir('./python', '*.py'):
    print("-", filename)

Or for a one off where you don't need a generator:

path, ext = "./python", ext = ".py"
for _, _, dirfiles in os.walk(path):
    matches = (f for f in dirfiles if f.endswith(ext))
    break

for filename in matches:
    print("-", filename)

If you are going to use matches for something else, you may want to make it a list rather than a generator expression:

    matches = [f for f in dirfiles if f.endswith(ext)]

A simple method by using for loop :

import os

dir = ["e","x","e"]

p = os.listdir('E:')  #path

for n in range(len(p)):
   name = p[n]
   myfile = [name[-3],name[-2],name[-1]]  #for .txt
   if myfile == dir :
      print(name)
   else:
      print("nops")

Though this can be made more generalised .

참고URL : https://stackoverflow.com/questions/3964681/find-all-files-in-a-directory-with-extension-txt-in-python

반응형