linux kernel 如何处理大小端

linux kernel 如何处理大小端
暂时在用MPC8309,不太清楚大小端内核是什么时候给转的。

今天看了关于readl和writel具体实现的文章

今天就主要来分析下readl/writel如何实现高效的数据swap和寄存器读写。我们就以readl为例，针对big-endian处理器，如何来对寄存器数据进行处理。

kernel下readl定义如下，在include/asm-generic/io.h

#define readw(addr) __le32_to_cpu(__raw_readw(addr))

__raw_readl是最底层的寄存器读写函数，很简单，就从直接获取寄存器数据。来看__le32_to_cpu的实现，该函数针对字节序有不同的实现，对于小端处理器，在./include/linux/byteorder/little_endian.h中，如下：

#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))

相当于什么都没做。而对于大端处理器，在./include/linux/byteorder/big_endian.h中，如下：

#define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))

看字面意思也可以看出，__swab32实现数据翻转。等下我们就来分析__swab32的实现，精髓就在这个函数。

但是这之前先考虑一个问题，对于不同CPU，如arm mips ppc，怎么来选择使用little_endian.h还是big_endian.h的呢。

答案是，针对不同处理器平台，有arch/xxx/include/asm/byteorder.h头文件，来看下arm mips ppc的byteorder.h分别是什么。

arch/arm/include/asm/byteorder.h
1. * arch/arm/include/asm/byteorder.h
2. *
3. * ARM Endian-ness. In little endian mode, the data bus is connected such
4. * that byte accesses appear as:
5. * 0 = d0...d7, 1 = d8...d15, 2 = d16...d23, 3 = d24...d31
6. * and word accesses (data or instruction) appear as:
7. * d0...d31
8. *
9. * When in big endian mode, byte accesses appear as:
10. * 0 = d24...d31, 1 = d16...d23, 2 = d8...d15, 3 = d0...d7
11. * and word accesses (data or instruction) appear as:
12. * d0...d31
13. */
14. #ifndef __ASM_ARM_BYTEORDER_H
15. #define __ASM_ARM_BYTEORDER_H
17. #ifdef __ARMEB__
18. #include <linux/byteorder/big_endian.h>
19. #else
20. #include <linux/byteorder/little_endian.h>
21. #endif
23. #endif
arch/mips/include/asm/byteorder.h
1. /*
2. * This file is subject to the terms and conditions of the GNU General Public
3. * License. See the file "COPYING" in the main directory of this archive
4. * for more details.
5. *
6. * Copyright (C) 1996, 99, 2003 by Ralf Baechle
7. */
8. #ifndef _ASM_BYTEORDER_H
9. #define _ASM_BYTEORDER_H
11. #if defined(__MIPSEB__)
12. #include <linux/byteorder/big_endian.h>
13. #elif defined(__MIPSEL__)
14. #include <linux/byteorder/little_endian.h>
15. #else
16. # error "MIPS, but neither __MIPSEB__, nor __MIPSEL__???"
17. #endif
19. #endif /* _ASM_BYTEORDER_H */
arch/powerpc/include/asm/byteorder.h
1. #ifndef _ASM_POWERPC_BYTEORDER_H
2. #define _ASM_POWERPC_BYTEORDER_H
4. /*
5. * This program is free software; you can redistribute it and/or
6. * modify it under the terms of the GNU General Public License
7. * as published by the Free Software Foundation; either version
8. * 2 of the License, or (at your option) any later version.
9. */
10. #include <linux/byteorder/big_endian.h>
12. #endif /* _ASM_POWERPC_BYTEORDER_H */
可以看出arm mips在kernel下大小端都支持，arm mips也的确是可以选择处理器字节序。ppc仅支持big-endian。（其实ppc也是支持选择字节序的）

各个处理器平台的byteorder.h将littlie_endian.h/big_endian.h又包了一层，我们在编写driver时不需要关心处理器的字节序，只需要包含byteorder.h即可。

接下来看下最关键的__swab32函数，如下：

在include/linux/swab.h中
1. /**
2. * __swab32 - return a byteswapped 32-bit value
3. * @x: value to byteswap
4. */
5. #define __swab32(x)
6. (__builtin_constant_p((__u32)(x)) ?
7. ___constant_swab32(x) :
8. __fswab32(x))
宏定义展开，是一个条件判断符。

__builtin_constant_p是一个gcc的内建函数，用于判断一个值在编译时是否是常数，如果参数是常数，函数返回 1，否则返回 0。
如果数据是常数，则__constant_swab32，实现如下：
1. #define ___constant_swab32(x) ((__u32)(
2. (((__u32)(x) & (__u32)0x000000ffUL) << 24) |
3. (((__u32)(x) & (__u32)0x0000ff00UL) << 8) |
4. (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) |
5. (((__u32)(x) & (__u32)0xff000000UL) >> 24)))
对于常数数据，采用的是普通的位移然后拼接的方法，对于常数，这样的消耗是有必要的（这是kernel的解释，不是很理解）

如果数据是运行时计算数据，则使用__fswab32，实现如下：
1. static inline __attribute_const__ __u32 __fswab32(__u32 val)
2. {
3. #ifdef __arch_swab32
4. return __arch_swab32(val);
5. #else
6. return ___constant_swab32(val);
7. #endif
8. }
如果未定义__arch_swab32，则还是采用__constant_swab32方法翻转数据，但是arm mips ppc都定义了各自平台的__arch_swab32，来实现一个针对自己平台的高效的swap，分别定义如下：

arch/arm/include/asm/swab.h
1. static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
2. {
3. __asm__ ("rev %0, %1" : "=r" (x) : "r" (x));
4. return x;
5. }
arch/mips/include/asm/swab.h
1. static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
2. {
3. __asm__(
4. " wsbh %0, %1 "
5. " rotr %0, %0, 16 "
6. : "=r" (x)
7. : "r" (x));
9. return x;
10. }
arch/powerpc/include/asm/swab.h
1. static inline __attribute_const__ __u32 __arch_swab32(__u32 value)
2. {
3. __u32 result;
5. __asm__("rlwimi %0,%1,24,16,23 "
6. "rlwimi %0,%1,8,8,15 "
7. "rlwimi %0,%1,24,0,7"
8. : "=r" (result)
9. : "r" (value), "0" (value >> 24));
10. return result;
11. }
可以看出，arm使用1条指令（rev数据翻转指令），mips使用2条指令（wsbh rotr数据交换指令），ppc使用3条指令（rlwimi数据位移指令），来完成了32 bit数据的翻转。这相对于普通的位移拼接的方法要高效的多！

其实从函数名__fswab也可以看出是要实现fast swap的。
相关阅读:
tensorflow学习笔记五----------逻辑回归
 tensorflow学习笔记四----------构造线性回归模型
 tensorflow学习笔记三----------基本操作
 tensorflow学习笔记二----------变量
 tensorflow学习笔记一----------tensorflow安装
 大屏某区域滚动效果循环
 echarts轨迹图，各个城市线路图轨迹如何取值
 react组件，样式添加的方法
 微信小程序中hidden属性不生效
 react组件渲染编程html不成功
原文地址：https://www.cnblogs.com/yangv/p/5553717.html