Let's import our datafile mpg.csv, which contains fuel economy data for 234 cars.
- mpg : miles per gallon
- class : car classification
- cty : city mpg
- cyl : # of cylinders
- displ : engine displacement in liters
- drv : f = front-wheel drive, r = rear wheel drive, 4 = 4wd
- fl : fuel (e = ethanol E85, d = diesel, r = regular, p = premium, c = CNG)
- hwy : highway mpg
- manufacturer : automobile manufacturer
- model : model of car
- trans : type of transmission
- year : model year
1 import csv 2 3 %precision 2 4 5 with open('mpg.csv') as csvfile: 6 mpg = list(csv.DictReader(csvfile)) 7 8 mpg[:3] # The first three dictionaries in our list.
[OrderedDict([('', '1'), ('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'auto(l5)'), ('drv', 'f'), ('cty', '18'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('', '2'), ('manufacturer', 'audi'), ('model', 'a4'), ('displ', '1.8'), ('year', '1999'), ('cyl', '4'), ('trans', 'manual(m5)'), ('drv', 'f'), ('cty', '21'), ('hwy', '29'), ('fl', 'p'), ('class', 'compact')]), OrderedDict([('', '3'), ('manufacturer', 'audi'), ('model', 'a4'), ('displ', '2'), ('year', '2008'), ('cyl', '4'), ('trans', 'manual(m6)'), ('drv', 'f'), ('cty', '20'), ('hwy', '31'), ('fl', 'p'), ('class', 'compact')])]
1 len(mpg)
234
%得到列的名字
1 mpg[0].keys()
odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])
%这是如何找到所有汽车的平均燃料经济关系。字典中的所有值都是字符串,所以我们需要转换成浮点数。
1 sum(float(d['cty']) for d in mpg) / len(mpg)
16.86
1 sum(float(d['hwy']) for d in mpg) / len(mpg)
23.44
%使用set去掉重复的项
1 cylinders = set(d['cyl'] for d in mpg) 2 cylinders
{'4', '5', '6', '8'}
%通过cylinder的数量划分组别,并计算每个组别的平均值
1 CtyMpgByCyl = [] 2 3 for c in cylinders: # iterate over all the cylinder levels 4 summpg = 0 5 cyltypecount = 0 6 for d in mpg: # iterate over all dictionaries 7 if d['cyl'] == c: # if the cylinder level type matches, 8 summpg += float(d['cty']) # add the cty mpg 9 cyltypecount += 1 # increment the count 10 CtyMpgByCyl.append((c, summpg / cyltypecount)) # append the tuple ('cylinder', 'avg mpg') 11 12 CtyMpgByCyl.sort(key=lambda x: x[0]) 13 CtyMpgByCyl
[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]