查找最大的py文件

1、查找当前目录下的最小和最大的py文件

2、查找当前python目录下Lib目录下最小和最大的Py文件

3、查找这个sys.path下所有module的最小和最大的py文件

  • 遍历目录使用os.walk,非常方便
  • 查找匹配文件名使用,glob模块
  • os.path模型包含很多有用的api,比如getsize(获取大小)、normalpath(规范化路径)、normalcase(路径都变成小写)、join(拼接)

import os, glob, sys, pprint
#Find the largest Python Source File in a signle dirctory

def Find_Largest_File():
    dirname = '.'
    allsizes = []
    allpy = glob.glob(dirname + os.sep + '*.py')

    for filename in allpy:
        filesize = os.path.getsize(filename)
        allsizes.append((filesize, filename))
    
    allsizes.sort()
    print(allsizes[:2])
    print(allsizes[-2:])

#遍历文件,请用os.walk

def Find_Largest_File_In_Tree():
    trace = True
    if sys.platform.startswith('win'):
        dirname = 'D:\\python\\python35\\Lib'
    else:
        dirname = 'use/lib/python'
    
    allsizes = []

    for (thisDir, subsHere, filesHere) in os.walk(dirname):
        if trace: print(thisDir)
        if trace: print(subsHere)
        for filename in filesHere:
            if filename.endswith('.py'):
                if trace: print('...', filename)
                fullname  = os.path.join(thisDir, filename)
                fullsize = os.path.getsize(fullname)
                allsizes.append((fullsize, fullname))
    
    allsizes.sort()
    
    pprint.pprint(allsizes[:2])
    pprint.pprint(allsizes[-2:])

#优化有使用加入visited来去重

def Find_Largest_File_In_Search_Path():
    trace = 0
    visited = {}
    allsizes = []
    for srcdir in sys.path:
        for (thisDir, subsHere, filesHere) in os.walk(srcdir):
            if trace > 0:
                print(thisDir)
            
            thisDir = os.path.normpath(thisDir)
            fixcase = os.path.normcase(thisDir)
            if fixcase in visited:
                continue
            else:
                visited[fixcase] = True
            
            for filename in filesHere:
                if filename.endswith('.py'):
                    if trace > 1:
                        print('...', filename)
                    pypath = os.path.join(thisDir, filename)
                    try:
                        pysize = os.path.getsize(pypath)
                    except os.error:
                        print('skipping', pypath, sys.exc_info()[0])
                    else:
                        pylines = len(open(pypath, 'rb').readlines())
                        allsizes.append((pysize, pylines, pypath))
    
    print('By size')
    allsizes.sort()
    pprint.pprint(allsizes[:3])
    pprint.pprint(allsizes[-3:])

    print('By lines')
    allsizes.sort(key=lambda x : x[1])
    pprint.pprint(allsizes[:3])
    pprint.pprint(allsizes[-3:])


if  __name__ == '__main__':
    #Find_Largest_File()
    #Find_Largest_File_In_Tree()
    Find_Largest_File_In_Search_Path()