原子变量的底层实现原理
原子变量的底层实现原理
int main()
{
int value = 0;
__sync_fetch_and_add(&value, 1);
}
.file "test2.cpp"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $0, -4(%rbp)
lock addl $1, -4(%rbp)
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (GNU) 11.3.1 20221121 (Red Hat 11.3.1-4)"
.section .note.GNU-stack,"",@progbits
//n++
type __sync_fetch_and_add (type *ptr, type value); //m + n
type __sync_fetch_and_sub (type *ptr, type value); //m - n
type __sync_fetch_and_or (type *ptr, type value); //m | n
type __sync_fetch_and_and (type *ptr, type value); //m & n
type __sync_fetch_and_xor (type *ptr, type value); //m ^ n
type __sync_fetch_and_nand (type *ptr, type value);//(!m)& n
//++n
type __sync_add_and_fetch (type *ptr, type value); //m + n
type __sync_sub_and_fetch (type *ptr, type value); //m - n
type __sync_or_and_fetch (type *ptr, type value); //m | n
type __sync_and_and_fetch (type *ptr, type value); //m & n
type __sync_xor_and_fetch (type *ptr, type value); //m ^ n
type __sync_nand_and_fetch (type *ptr, type value);//(!m)& n
//CAS类
bool__sync_bool_compare_and_swap(type* ptr, type oldval, type newval, ...);
type __sync_val_compare_and_swap(type* ptr, type oldval, type newval, ...);
/*
对应的伪代码
{if (*ptr == oldval) { *ptr = newval; returntrue; } else { returnfalse; }}
{if (*ptr == oldval) { *ptr = newval; }returnoldval; }
*/
Lock指令
User level locks involve utilizing the atomic instructions of processor to atomically update a memory space. The atomic instructions involve utilizing a lock prefix on the instruction and having the destination operand assigned to a memory address. The following instructions can run atomically with a lock prefix on current Intel processors: ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCH8B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, and XCHG. EnterCriticalSection utilizes atomic instructions to attempt to get a user-land lock before jumping into the kernel. On most instructions a lock prefix must be explicitly used except for the xchg instruction where the lock prefix is implied if the instruction involves a memory address.
In the days of Intel 486 processors, the lock prefix used to assert a lock on the bus along with a large hit in performance. Starting with the Intel Pentium Pro architecture, the bus lock is transformed into a cache lock. A lock will still be asserted on the bus in the most modern architectures if the lock resides in uncacheable memory or if the lock extends beyond a cache line boundary splitting cache lines.
Both of these scenarios are unlikely, so most lock prefixes will be transformed into a cache lock which is much less expensive.