这用pandas包进行数据处理的时候,会遇到一些文件的输出与写入。比如csv文件,是一种比较常见的文件,它以逗号作为分隔符。所以,总结一下运用pandas数据处理包,处理csv文件的输出与写入方式。
# 数据读取 import pandas as pd # 这是文件有标头的 df = pd.read_table("ex1.csv", sep= ",") # print df # 文件没有标题行 df1 = pd.read_csv("ex2.csv", header= None) # print df1 df11 = pd.read_csv("ex2.csv", names= ["a","b","c","d","message"]) # print df11 # 如果想要message列做索引 df111 = pd.read_csv("ex2.csv", names= ["a","b","c","d","message"], index_col= "message") # print df111 # 取某值 # print df111.ix["hello","a"] # 取行 # print df111.ix["hello"] # 取列 # print df111["a"] # 读入成层次化索引 parsed = pd.read_csv("ex3.csv", index_col= ["key1","key2"]) # print parsed # 处理读入文件特殊符号 l = list(open("ex3.txt")) # print l result = pd.read_table("ex3.txt", sep = "s+") # print result # 处理有些异形文件格式 # skiprows 跳过文件的某些行 df2 = pd.read_csv("ex4.csv", skiprows=[0,2,3], nrows=2) # print df2 # 处理缺失值 df3 = pd.read_csv("ex5.csv") # print df3 # print pd.isnull(df3) # na_values可以接受一组表示缺失值的字符串 result1 = pd.read_csv("ex5.csv", na_values=["NULL"]) # print result1 sentinels = {"message":["foo","NA"], "somethig":["two"]} result2 = pd.read_csv("ex5.csv", na_values=sentinels) # print result2
# 数据的写入
from pandas import Series
import numpy as np
data = pd.read_csv("ex5.csv")
# print data
# data.to_csv("out.csv")
# 禁用行和列得标签
# data.to_csv("out.csv",index=False, header=False)
# 也可以写出一部分列
# data.to_csv("out1.csv",index=False,columns=['a','b','c'])
datas = pd.date_range('1/1/2000', periods=7)
# print datas
ts = Series(np.arange(7), index=datas)
# ts.to_csv("out2.csv")
# Series的特殊读取文件方法
read = Series.from_csv("out2.csv")
# print read
# 再来看如果手动处理CSV文件。
import csv
# 读取
f = open("ex7.csv")
read = csv.reader(f)
'''for line in read:
print line
'''
# 现在为了格式很好看,做一下修改
lines = list(csv.reader(open("ex7.csv")))
header , values = lines[0], lines[1:]
data_dict = {h:v for h, v in zip(header, zip(*values))}
print data_dict
#写入
# 先定义一个csv文件的合适
class my_dialect(csv.Dialect):
lineterminator = " "
delimiter = ";"
quotechar = '""'
with open("ex7.csv", "w") as f:
writer = csv.writer(f, dialect=my_dialect)
writer.writerow(('one','two','three'))
writer.writerow(('1','2','3'))
writer.writerow(('4','5','6'))
引用的csv文件如下:
ex1.csv
a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
ex2.csv
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
ex3.csv
key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16
ex3.txt
A B C
aaa 1 5 9
bbb 2 6 10
ccc 3 7 11
ddd 4 8 12
ex4.csv
# hey!
a,b,c,d,message
# just wanted to make things more difficult for you
# who reads CSV files with computers, anyway?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
ex5.csv
something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo
ex7.csv
"a","b","c"
"1","2","3"
"1","2","3","4"