首先,我们先看下面一个情况:
import pandas as pd
### 直接读取会报错,这是为什么呢?
df=pd.read_table('G:/python入门/Example.csv',sep=',')
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<>input-37-f802989947b5> in module>()
----> 1 df=pd.read_table('G:/python入门/Example.csv',sep=',')
E:\Anconda\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
676 skip_blank_lines=skip_blank_lines)
677
--> 678 return _read(filepath_or_buffer, kwds)
679
680 parser_f.__name__ = name
…
df=pd.read_csv('G:Example.csv')
df.head()
```
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
f=open('G:python入门/Example.csv')
df=pd.read_csv(f)
df
···
a | b | c | d | message | |
---|---|---|---|---|---|
0 | 1 | 2 | 3 | 4 | hello |
1 | 5 | 6 | 7 | 8 | world |
2 | 9 | 10 | 11 | 12 | foo |
···
df=pd.read_csv('G:Example.csv',header=None)
df
···
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
0 | a | b | c | d | message |
1 | 1 | 2 | 3 | 4 | hello |
2 | 5 | 6 | 7 | 8 | world |
3 | 9 | 10 | 11 | 12 | foo |
df=pd.read_csv('G:Example.csv',header=None,names=['A','B','C','D','E'])
df
···
A | B | C | D | E | |
---|---|---|---|---|---|
0 | a | b | c | d | message |
1 | 1 | 2 | 3 | 4 | hello |
2 | 5 | 6 | 7 | 8 | world |
3 | 9 | 10 | 11 | 12 | foo |
···
df.shape
(4, 5)
str(df)
' A B C D E\n0 a b c d message\n1 1 2 3 4 hello\n2 5 6 7 8 world\n3 9 10 11 12 foo'
df=pd.read_csv('G:Example.csv',header=None,names=['A','B','C','D','E'],index_col='E')
df
···
A | B | C | D | |
---|---|---|---|---|
E | ||||
message | a | b | c | d |
hello | 1 | 2 | 3 | 4 |
world | 5 | 6 | 7 | 8 |
foo | 9 | 10 | 11 | 12 |
df2=pd.read_csv('G:test.csv',index_col=['key1','key2'])
df2
···
value1 | value2 | ||
---|---|---|---|
key1 | key2 | ||
one | a | 1 | 2 |
b | 3 | 4 | |
c | 5 | 6 | |
d | 7 | 8 | |
two | a | 9 | 10 |
b | 11 | 12 | |
c | 13 | 14 | |
d | 15 | 16 |
···
df2=pd.read_csv('G:test.csv',index_col=['key2','key1'])
df2
···
value1 | value2 | ||
---|---|---|---|
key2 | key1 | ||
a | one | 1 | 2 |
b | one | 3 | 4 |
c | one | 5 | 6 |
d | one | 7 | 8 |
a | two | 9 | 10 |
b | two | 11 | 12 |
c | two | 13 | 14 |
d | two | 15 | 16 |
···
rf=pd.read_table('G:/test1.txt',sep='\s+')
rf
···
A | B | C | |
---|---|---|---|
aaa | -0.264438 | -1.026059 | -0.619500 |
bbb | 0.927272 | 0.302904 | -0.032399 |
ccc | -0.264273 | -0.386314 | -0.217601 |
ddd | -0.871858 | -0.348382 | 1.100491 |
df=pd.read_table('G:/test1.txt',sep='\s+')
df['A']
aaa -0.264438
bbb 0.927272
ccc -0.264273
ddd -0.871858
Name: A, dtype: float64
df=pd.read_table('G:/test1.txt',sep='\s+',skiprows=[1])
df
···
A | B | C | |
---|---|---|---|
bbb | 0.927272 | 0.302904 | -0.032399 |
ccc | -0.264273 | -0.386314 | -0.217601 |
ddd | -0.871858 | -0.348382 | 1.100491 |
kk1=pd.read_table('G:/test2.txt',sep=',')
pd.isnull(kk1)
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | False | False | False | False | False | True |
1 | False | False | False | True | False | False |
2 | False | False | False | False | False | False |
kk=pd.read_table('G:/test2.txt',sep=',', na_values=['NULL'])
pd.isnull(kk)
something | a | b | c | d | message | |
---|---|---|---|---|---|---|
0 | False | False | False | False | False | True |
1 | False | False | False | True | False | False |
2 | False | False | False | False | False | False |
pd.options.display.max_rows = 10 #显示最多展示10行
kk1.to_csv('G:/kk.csv')
kk1.to_csv('G:/kk2.txt',sep='|')
import sys
kk1.to_csv(sys.stdout, sep='|')
|something|a|b|c|d|message
0|one|1|2|3.0|4|
1|two|5|6||8|world
2|three|9|10|11.0|12|foo
kk1.to_csv(sys.stdout, sep=',')
,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo
kk1.to_csv(sys.stdout, sep=':')
:something:a:b:c:d:message
0:one:1:2:3.0:4:
1:two:5:6::8:world
2:three:9:10:11.0:12:foo
缺失值在输出结果中会被表示为空字符串。你可能希望将其表示为别的标记值:
kk.to_csv(sys.stdout,na_rep='NULL')
,something,a,b,c,d,message
0,one,1,2,3.0,4,NULL
1,two,5,6,NULL,8,world
2,three,9,10,11.0,12,foo
如果没有设置其他选项,则会写出行和列的标签。当然,它们也都可以被禁用:
kk.to_csv(sys.stdout,na_rep='NULL',index=False,header=False)
one,1,2,3.0,4,NULL
two,5,6,NULL,8,world
three,9,10,11.0,12,foo
此外,你还可以只写出一部分的列,并以你指定的顺序排列:
kk.to_csv(sys.stdout,index=False,columns=['a','c'])
a,c
1,3.0
5,
9,11.0
大部分存储在磁盘上的表格型数据都能用pandas.read_table进行加载。然而,有时还是需要做一些手工处理。
由于接收到含有畸形行的文件而使read_table 出毛病的情况并不少见。为了说明这些基本工具,看看下面这个简单的CSV文件:
pd.read_table('G:/kk3.txt',sep=',')
···
a | b | c | |
---|---|---|---|
0 | 1 | 2 | 3 |
1 | 1 | 2 | 3 |
···
import csv
f = open('G:/kk3.txt')
reader = csv.reader(f)
for line in reader:
print(line)
['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3']
lines = list(reader)
lines
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '2', '3']]
with open('G:/kk3.txt') as f:
lines = list(csv.reader(f))
lines
[['a', 'b', 'c'], ['1', '2', '3'], ['1', '2', '3']]
header, values = lines[0], lines[1:]
header
['a', 'b', 'c']
values
[['1', '2', '3'], ['1', '2', '3']]
data_dict = {h: v for h, v in zip(header, zip(*values))}
data_dict
{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}
with open('G:/mydata.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(('one', 'two', 'three'))
writer.writerow(('1', '2', '3'))
writer.writerow(('4', '5', '6'))
writer.writerow(('7', '8', '9'))
联系客服