csv文件读取¶
In [1]:
import pandas as pd
import sys
In [2]:
%cat examples/ex2.csv
In [3]:
#文件没有标签数据
pd.read_csv('examples/ex2.csv',header=None)
Out[3]:
In [4]:
pd.read_csv('examples/ex2.csv',names=['a','b','c','d','massage'])
Out[4]:
In [5]:
#将其中的一列设为索引列
pd.read_csv('examples/ex2.csv',names=['a','b','c','d','massage'],index_col='massage')
Out[5]:
In [6]:
list(open('examples/ex3.txt'))
Out[6]:
In [7]:
#多出的一列数据自动识别为索引,分隔符不同使用正则表达式
pd.read_csv('examples/ex3.csv',sep='s+')
Out[7]:
将读取的非空数据设为NaN¶
In [8]:
%cat examples/ex5.csv
In [9]:
pd.read_csv('examples/ex5.csv',na_values={'something':'two','massage':['NA','foo']})
Out[9]:
In [10]:
#只读取一部分数据
pd.read_csv('examples/ex6.csv',nrows=10)
Out[10]:
In [11]:
#将数据分块读取
chunker = pd.read_csv('examples/ex6.csv',chunksize=1000)
for piece in chunker:
print(piece.iloc[0])
写入csv文件¶
In [12]:
data = pd.read_csv('examples/ex5.csv')
data.to_csv(sys.stdout)
In [13]:
data.to_csv(sys.stdout,sep='|')
In [14]:
#对缺失值进行标识
data.to_csv(sys.stdout,na_rep='NULL')
In [15]:
data.to_csv(sys.stdout,index=False,header=False)
In [16]:
#只写入子集
data.to_csv(sys.stdout,index=False,columns=['a','b','c'])
json文件¶
In [17]:
%cat examples/example.json
In [18]:
data = pd.read_json('examples/example.json')
data
Out[18]:
In [19]:
data.to_json(sys.stdout)
In [20]:
#按行写入
data.to_json(sys.stdout,orient='records')
HTML¶
In [21]:
#搜索并解析包含在table标签中的数据
tables = pd.read_html('examples/fdic_failed_bank_list.html')
#只有一张表格
len(tables)
Out[21]:
In [22]:
data = tables[0]
data.head()
Out[22]:
In [24]:
data.to_excel('examples/ex2.xlsx')
Web API¶
In [25]:
import requests
url = 'https://api.github.com/repos/pandas-dev/pandas/issues'
resp = requests.get(url)
resp
Out[25]:
In [26]:
data = resp.json()#data为字典数组
issues = pd.DataFrame(data,columns=['title','url','state','labels'])#提取部分字段
issues.head()
Out[26]: