Interesting Tricks

Preface

其实内核有非常有意思的宏,函数。它们的实现都值得好好学习,这里把遇到的整合在一起吧,也做一个参考~

Hoooh

大小端

first, uapi/linux/byteorder/little_endian.h
or, xxx/big_endian.h

会定义不同的宏,比如 __le32_to_cpu 对于小端的CPU
自然就是不做处理,反而则做处理
这个宏会在这俩个文件都定义,然后只会 include 一个
很容易理解  so, xxx_to_cpu 具体的宏在编译的时候就确定了~
对于 小端
#define __cpu_to_le64s(x) do { (void)(x); } while (0)
#define __le64_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_le32s(x) do { (void)(x); } while (0)
#define __le32_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_le16s(x) do { (void)(x); } while (0)
#define __le16_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_be64s(x) __swab64s((x))
#define __be64_to_cpus(x) __swab64s((x))
#define __cpu_to_be32s(x) __swab32s((x))
#define __be32_to_cpus(x) __swab32s((x))
#define __cpu_to_be16s(x) __swab16s((x))
#define __be16_to_cpus(x) __swab16s((x))

而 大端
#define __cpu_to_le64s(x) __swab64s((x))
#define __le64_to_cpus(x) __swab64s((x))
#define __cpu_to_le32s(x) __swab32s((x))
#define __le32_to_cpus(x) __swab32s((x))
#define __cpu_to_le16s(x) __swab16s((x))
#define __le16_to_cpus(x) __swab16s((x))
#define __cpu_to_be64s(x) do { (void)(x); } while (0)
#define __be64_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_be32s(x) do { (void)(x); } while (0)
#define __be32_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_be16s(x) do { (void)(x); } while (0)
#define __be16_to_cpus(x) do { (void)(x); } while (0)

正好相反

大小端的切换是一样的操作,所以两者函数都调用的一样,最终来到这,如果没有CPU自带的指令切换大小端的话(有些架构有自带的,就采用内嵌汇编)

/**
 * __swab64 - return a byteswapped 64-bit value
 * @x: value to byteswap
 */
#define __swab64(x)				\
	(__builtin_constant_p((__u64)(x)) ?	\
	___constant_swab64(x) :			\
	__fswab64(x))

以 64bit 作为例子,其他都是一样的

#define ___constant_swab64(x) ((__u64)(				\
	(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) |	\
	(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) |	\
	(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) |	\
	(((__u64)(x) & (__u64)0x00000000ff000000ULL) <<  8) |	\
	(((__u64)(x) & (__u64)0x000000ff00000000ULL) >>  8) |	\
	(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) |	\
	(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) |	\
	(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
	
static inline __attribute_const__ __u64 __fswab64(__u64 val)
{
#ifdef __HAVE_BUILTIN_BSWAP64__
	return __builtin_bswap64(val);
#elif defined (__arch_swab64)
	return __arch_swab64(val);
#elif defined(__SWAB_64_THRU_32__)
	__u32 h = val >> 32;
	__u32 l = val & ((1ULL << 32) - 1);
	return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
#else
	return ___constant_swab64(val);
#endif
}

static inline __attribute_const__ __u32 __fswab32(__u32 val)
{
#ifdef __HAVE_BUILTIN_BSWAP32__
	return __builtin_bswap32(val);
#elif defined(__arch_swab32)
	return __arch_swab32(val);
#else
	return ___constant_swab32(val);
#endif
}

是很粗暴的就解决,对于常数,也就是说编译就确定的数值,直接处理就好了,对于传进来的参数是不确定的,我们调用函数处理,但是本质还是一样的,都是做位移处理,也就是不是我们想象的做什么交换,那些都太复杂了,直接用位与操作即可解决,这也说明了,对于一些复杂的位操作,就应该直接暴力解决,而不是思考算法。

IO_操作

这里提提 io mmio 那些resource吧,其实只是设置一块内存是不可用的而已(page结构),因为页表做映射不可能再页内继续修改映射的,而物理内存的映射,一般也是以页为单位的,x86 的 IO 地址一般也是固定的,对于 ARM 来说,没有IO地址,一般都是硬布线改变的,而且所谓的 IO 地址,只是地址线上加一根使能线而已,独立或者统一编址都差不多。

/*
 *  IO port access primitives
 *  -------------------------
 *
 * The ARM doesn't have special IO access instructions; all IO is memory
 * mapped.  Note that these are defined to perform little endian accesses
 * only.  Their primary purpose is to access PCI and ISA peripherals.
 *
 */

先看 ARM 吧,最近再看这方面的,本质就是下面几个函数,可能加上一些内存屏障,当然,上面也说了,ARM 也对 x86 特有的 inb() outb() 等也定义了相关宏。

/arch/arm/include/asm/io.h

/*
 * When running under a hypervisor, we want to avoid I/O accesses with
 * writeback addressing modes as these incur a significant performance
 * overhead (the address generation must be emulated in software).
 */
static inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
	asm volatile("strh %1, %0"
		     : "+Q" (*(volatile u16 __force *)addr)
		     : "r" (val));
}

static inline u16 __raw_readw(const volatile void __iomem *addr)
{
	u16 val;
	asm volatile("ldrh %1, %0"
		     : "+Q" (*(volatile u16 __force *)addr),
		       "=r" (val));
	return val;
}
#endif

static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
	asm volatile("strb %1, %0"
		     : "+Qo" (*(volatile u8 __force *)addr)
		     : "r" (val));
}

static inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
	asm volatile("str %1, %0"
		     : "+Qo" (*(volatile u32 __force *)addr)
		     : "r" (val));
}

static inline u8 __raw_readb(const volatile void __iomem *addr)
{
	u8 val;
	asm volatile("ldrb %1, %0"
		     : "+Qo" (*(volatile u8 __force *)addr),
		       "=r" (val));
	return val;
}

static inline u32 __raw_readl(const volatile void __iomem *addr)
{
	u32 val;
	asm volatile("ldr %1, %0"
		     : "+Qo" (*(volatile u32 __force *)addr),
		       "=r" (val));
	return val;
}

Inline Assembly - msr_s

一些内置汇编的 Tricks 也是宏用到的

/* Indirect stringification.  Doing two levels allows the parameter to be a
 * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
 * converts to "bar".
 */

#define __stringify_1(x...)	#x
#define __stringify(x...)	__stringify_1(x)

之所以要俩层的原因已经解释了,下次来试试

#define __ACCESS_CP15(CRn, Op1, CRm, Op2)	\
	"mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32
#define __ACCESS_CP15_64(Op1, CRm)		\
	"mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64

#define __read_sysreg(r, w, c, t) ({				\
	t __val;						\
	asm volatile(r " " c : "=r" (__val));			\
	__val;							\
})
#define read_sysreg(...)		__read_sysreg(__VA_ARGS__)

#define __write_sysreg(v, r, w, c, t)	asm volatile(w " " c : : "r" ((t)(v)))
#define write_sysreg(v, ...)		__write_sysreg(v, __VA_ARGS__)

这个 Macro 写的非常的漂亮,这是 4.19 的代码,在最早阅读的 3.17 的时候,使用的是自己定义的宏命令,非常难阅读,似乎是利用了硬指令编码来生成的。



/* Low level accessors */
static u64 __maybe_unused gic_read_iar(void)
{
	u64 irqstat;

	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
	return irqstat;
}

#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)


arm64/include/asm/sysreg.h

#define sys_reg(op0, op1, crn, crm, op2) \
	((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))

#ifdef __ASSEMBLY__

	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
	.equ	__reg_num_x\num, \num
	.endr
	.equ	__reg_num_xzr, 31

	.macro	mrs_s, rt, sreg
	.inst	0xd5300000|(\sreg)|(__reg_num_\rt)
	.endm

	.macro	msr_s, sreg, rt
	.inst	0xd5100000|(\sreg)|(__reg_num_\rt)
	.endm

#else

asm(
"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
"	.equ	__reg_num_x\\num, \\num\n"
"	.endr\n"
"	.equ	__reg_num_xzr, 31\n"
"\n"
"	.macro	mrs_s, rt, sreg\n"
"	.inst	0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
"	.endm\n"
"\n"
"	.macro	msr_s, sreg, rt\n"
"	.inst	0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
"	.endm\n"
);

#endif

对比另外一个版本,只需要

#define ICC_IAR1			__ACCESS_CP15(c12, 0, c12, 0)

非常容易阅读,所以宏也是一门技术啊~~

CPU id

# ## __VA_ARGS__


#define AA aa
#define BB bb

#define M_STR(A) #A
// #define M_STR(...) #__VA_ARGS__  it's the same~
#define M_STR_1(A) M_STR(A)

#define M_CAT(B) M_STR(test##B)
#define M_CAT_1(B) M_CAT(B)

#define STR_VAR(...) M_STR_1(__VA_ARGS__)
#define CAT_VAR(...) M_CAT_1(__VA_ARGS__)

M_STR(AA) == "AA"
M_CAT(BB)  == "testBB"

STR_VAR(AA) == M_STR_1(AA) == "aa"
CAT_VAR(BB) == M_CAT_1(BB) == "testbb"

特殊的处理# ## __VA_ARGS__ 都是不会对宏进行展开处理的,所以想要使用这些功能同时又要展开宏,就要嵌套一层

Last updated