In [54]:
 
       
x
            
import os, sys 
×
…
  
In [55]:
 
 
            
# 获取当前目录os.getcwd() 
         
×
    
Out[55]:
 
      
'C:\\Users\\rHotD\\Documents\\GitHub\\Machine_Learning_In_Action\\Machine Learning In Action With Pandas and Scikit-learn\\chapter-04'    
…
  
In [56]:
 
            
# 获取当前目录的上级目录os.path.dirname(os.getcwd())
×
Out[56]:
 
      
'C:\\Users\\rHotD\\Documents\\GitHub\\Machine_Learning_In_Action\\Machine Learning In Action With Pandas and Scikit-learn' 
    
…
In [57]:
 
       
x
            
# 列出当前目录下的所有文件os.listdir(os.getcwd()) 
×
    
Out[57]:
 
      
['.ipynb_checkpoints',
 '01-CountVectorizer 和 伯努利贝叶斯测试.ipynb',
 '02- 获取当前目录.ipynb',
 'email',
 'text_content.txt'] 
…
 
  
In [58]:
            
# 获取当前 脚本文件 的名称os.path.basename(sys.argv[0]) 
×
Out[58]:
 
      
'__main__.py'
    
…
  
In [59]:
 
            
dir_list = os.listdir(os.getcwd())dir_list 
×
 
    
Out[59]:
 
      
['.ipynb_checkpoints',
 '01-CountVectorizer 和 伯努利贝叶斯测试.ipynb',
 '02- 获取当前目录.ipynb',
 'email',
 'text_content.txt'] 
…
  
In [60]:
            
# 获取目录下指定后缀的文件名组成的 list# -------------# 方式1:使用正则表达式,姿势好像不是很标准,,.,import repattern = re.compile(r'.*.txt')xxx = list(map(pattern.findall, dir_list))[kkk[0] for kkk in xxx if len(kkk)>0] 
   
×
     
    
Out[60]:
 
['text_content.txt'] 
    
…
 
  
In [61]:
            
dir_list 
×
Out[61]:
 
      
['.ipynb_checkpoints', '01-CountVectorizer 和 伯努利贝叶斯测试.ipynb', '02- 获取当前目录.ipynb', 'email', 'text_content.txt']
 
…
 
  
In [62]:
 
# -------------# 方式2:使用 os.path.splittext() 函数[x for x in dir_list if os.path.splitext(x)[1] == '.txt']
×
    
Out[62]:
 
['text_content.txt'] 
    
…
 
 
                










