打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
Python3批量转换文本文件编码


001#-*- coding: utf-8 -*-
002try:
003    from chardet.universaldetector import UniversalDetector
004    IsAuto = True
005except  ImportError:
006    IsAuto = False
007import os
008import os.path
009import glob
010 
011def Convert_Auto( filename,out_enc="utf-8" ): 
012    ''' Re-encode text file with auto detec current encode. Need chardet Lib.
013Input Parameter:
014        filename: full path and file name, e.g. c:\dir1\file.txt
015        out_enc: new encode. Default as 'utf-8'
016Output Parameter
017        None'''
018    try
019        f=open(filename,'rb')
020        b= b' '
021        b+=f.read(1024)
022        u=UniversalDetector()
023        u.reset()
024        u.feed(b)
025        u.close()
026        f.seek(0)
027        b=f.read()
028        f.close()
029        in_enc=u.result['encoding']
030        new_content=b.decode(in_enc, 'ignore')
031        f=open(filename, 'w', encoding=out_enc)
032        f.write(new_content)
033        f.close()
034        print ("Success: "+filename+" converted from "+ in_enc+" to "+out_enc +" !")
035    except IOError:
036        print ("Error: "+filename+" FAIL to converted from "+ in_enc+" to "+out_enc+" !" )
037 
038def Convert_Manu( filename,in_enc='gbk', out_enc="utf-8" ): 
039    ''' Re-encode text file with manual decide input text encode.
040Input Parameter:
041        filename: full path and file name, e.g. c:\dir1\file.txt
042        in_enc:  current encode. Default as 'gbk'
043        out_enc: new encode. Default as 'utf-8'
044Output Parameter
045        None'''
046    try
047        print ("convert " + filename)
048        f=open(filename,'rb')
049        b=f.read()
050        f.close()
051        new_content=b.decode(in_enc, 'ignore')
052        f=open(filename, 'w', encoding=out_enc)
053        f.write(new_content)
054        f.close()
055        print ("Success: "+filename+" converted from "+ in_enc+" to "+out_enc +" !")
056    except IOError:
057        print ("Error: "+filename+" FAIL to converted from "+ in_enc+" to "+out_enc+" !" )
058 
059 
060def explore(dir, IsLoopSubDIR=True):
061    '''Convert files encoding.
062    Input: 
063        dir         : Current folder
064        IsLoopSubDIR:   True -- Include files in sub folder
065                        False-- Only include files in current folder
066    Output:
067        NONE
068    '''
069    if IsLoopSubDIR:
070        flist=getSubFileList(dir, '.txt')
071    else:
072        flist=getCurrFileList(dir, '.txt')
073    for fname in flist:
074        if IsAuto:
075            Convert_Auto(fname, 'utf-8')
076        else:
077            Convert_Manu(fname, 'gbk', 'utf-8')
078 
079     
080def getSubFileList(dir, suffix=''):
081    '''Get all file list with specified  suffix under current folder(Include sub folder)
082    Input: 
083        dir     :   Current folder
084        suffix  :   default to blank, means select all files.
085    Output:
086        File list
087    '''
088    flist=[]
089    for root, dirs, files in os.walk(os.getcwd()):
090        for name in files:
091            if name.endswith(suffix):
092                flist.append(os.path.join(root,  name))
093    return flist
094 
095def getCurrFileList(dir, suffix=''):
096    '''Get all file list with specified suffix under current level folder
097    Input: 
098        dir     :   Current folder
099        suffix  :   default to blank, means select all files.
100    Output:
101        File list
102    '''
103    if suffix=='':  
104        files=glob.glob('*')
105    else:
106        files=glob.glob('*'+suffix)
107    flist=[]   
108    for f in files:
109        flist.append(os.path.join(os.getcwd(), f))
110    return flist
111         
112         
113def main(): 
114    explore(os.getcwd(), True)
115     
116if __name__ == "__main__"
117   main()  
本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
【热】打开小程序,算一算2024你的财运
文件转换为utf
让Python在后台自动解压各种压缩文件!
高性能ASP.NET开发:自动压缩CSS、JS
IDEA正确设置编码统一为UTF-8
OpenWrt自动备份文件到SAE(离线上传)
java web 文件上传与下载
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服