akaros/kern/arch/x86/bitops.h
<<
>>
Prefs
   1#pragma once
   2
   3/*
   4 * Copyright 1992, Linus Torvalds.
   5 *
   6 * Note: inlines with more than a single statement should be marked
   7 * __always_inline to avoid problems with older gcc's inlining heuristics.
   8 */
   9
  10#define BIT_64(n)                       (U64_C(1) << (n))
  11#define DECLARE_BITMAP(name,bits) \
  12        unsigned long name[(bits+sizeof(unsigned long)*8 - 1)/(sizeof(unsigned long)*8)/*BITS_TO_LONGS(bits)*/]
  13/*
  14 * These have to be done with inline assembly: that way the bit-setting
  15 * is guaranteed to be atomic. All bit operations return 0 if the bit
  16 * was cleared before the operation and != 0 if it was not.
  17 *
  18 * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  19 */
  20
  21#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
  22#error "Get a gcc newer than 4.4.0"
  23#else
  24#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
  25#endif
  26
  27#define ADDR                            BITOP_ADDR(addr)
  28
  29#define LOCK_PREFIX "lock "
  30/*
  31 * We do the locked ops that don't return the old value as
  32 * a mask operation on a byte.
  33 */
  34#define IS_IMMEDIATE(nr)                (__builtin_constant_p(nr))
  35#define CONST_MASK_ADDR(nr, addr)       BITOP_ADDR((void *)(addr) + ((nr)>>3))
  36#define CONST_MASK(nr)                  (1 << ((nr) & 7))
  37
  38/**
  39 * set_bit - Atomically set a bit in memory
  40 * @nr: the bit to set
  41 * @addr: the address to start counting from
  42 *
  43 * This function is atomic and may not be reordered.  See __set_bit()
  44 * if you do not require the atomic guarantees.
  45 *
  46 * Note: there are no guarantees that this function will not be reordered
  47 * on non x86 architectures, so if you are writing portable code,
  48 * make sure not to rely on its reordering guarantees.
  49 *
  50 * Note that @nr may be almost arbitrarily large; this function is not
  51 * restricted to acting on a single-word quantity.
  52 */
  53static __always_inline void
  54set_bit(unsigned int nr, volatile unsigned long *addr)
  55{
  56        if (IS_IMMEDIATE(nr)) {
  57                asm volatile(LOCK_PREFIX "orb %1,%0"
  58                        : CONST_MASK_ADDR(nr, addr)
  59                        : "iq" ((uint8_t)CONST_MASK(nr))
  60                        : "memory");
  61        } else {
  62                asm volatile(LOCK_PREFIX "bts %1,%0"
  63                        : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
  64        }
  65}
  66
  67/**
  68 * __set_bit - Set a bit in memory
  69 * @nr: the bit to set
  70 * @addr: the address to start counting from
  71 *
  72 * Unlike set_bit(), this function is non-atomic and may be reordered.
  73 * If it's called on the same region of memory simultaneously, the effect
  74 * may be that only one operation succeeds.
  75 */
  76static inline void __set_bit(int nr, volatile unsigned long *addr)
  77{
  78        asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
  79}
  80
  81/**
  82 * clear_bit - Clears a bit in memory
  83 * @nr: Bit to clear
  84 * @addr: Address to start counting from
  85 *
  86 * clear_bit() is atomic and may not be reordered.  However, it does
  87 * not contain a memory barrier, so if it is used for locking purposes,
  88 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  89 * in order to ensure changes are visible on other processors.
  90 *
  91 * Note from brho: I think the use of LOCK_PREFIX (assuming it is "lock")
  92 * provides a memory barrier against hardware reordering accesses around the
  93 * LOCK ("lock" serializes).  This lacks a cmb() (called a barrier() in Linux),
  94 * which would prevent the compiler from reordering the instructions.  The
  95 * above-mentioned smp_mb__before_clear_bit appears to be this cmb(), so it's
  96 * not clear what the usage of "memory barrier" means exactly here and
  97 * elsewhere in this file. */
  98static __always_inline void
  99clear_bit(int nr, volatile unsigned long *addr)
 100{
 101        if (IS_IMMEDIATE(nr)) {
 102                asm volatile(LOCK_PREFIX "andb %1,%0"
 103                        : CONST_MASK_ADDR(nr, addr)
 104                        : "iq" ((uint8_t)~CONST_MASK(nr)));
 105        } else {
 106                asm volatile(LOCK_PREFIX "btr %1,%0"
 107                        : BITOP_ADDR(addr)
 108                        : "Ir" (nr));
 109        }
 110}
 111
 112/*
 113 * clear_bit_unlock - Clears a bit in memory
 114 * @nr: Bit to clear
 115 * @addr: Address to start counting from
 116 *
 117 * clear_bit() is atomic and implies release semantics before the memory
 118 * operation. It can be used for an unlock.
 119 */
 120static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 121{
 122        cmb();
 123        clear_bit(nr, addr);
 124}
 125
 126static inline void __clear_bit(int nr, volatile unsigned long *addr)
 127{
 128        asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 129}
 130
 131/*
 132 * __clear_bit_unlock - Clears a bit in memory
 133 * @nr: Bit to clear
 134 * @addr: Address to start counting from
 135 *
 136 * __clear_bit() is non-atomic and implies release semantics before the memory
 137 * operation. It can be used for an unlock if no other CPUs can concurrently
 138 * modify other bits in the word.
 139 *
 140 * No memory barrier is required here, because x86 cannot reorder stores past
 141 * older loads. Same principle as spin_unlock.
 142 */
 143static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 144{
 145        cmb();
 146        __clear_bit(nr, addr);
 147}
 148
 149#define smp_mb__before_clear_bit()      cmb()
 150#define smp_mb__after_clear_bit()       cmb()
 151
 152/**
 153 * __change_bit - Toggle a bit in memory
 154 * @nr: the bit to change
 155 * @addr: the address to start counting from
 156 *
 157 * Unlike change_bit(), this function is non-atomic and may be reordered.
 158 * If it's called on the same region of memory simultaneously, the effect
 159 * may be that only one operation succeeds.
 160 */
 161static inline void __change_bit(int nr, volatile unsigned long *addr)
 162{
 163        asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 164}
 165
 166/**
 167 * change_bit - Toggle a bit in memory
 168 * @nr: Bit to change
 169 * @addr: Address to start counting from
 170 *
 171 * change_bit() is atomic and may not be reordered.
 172 * Note that @nr may be almost arbitrarily large; this function is not
 173 * restricted to acting on a single-word quantity.
 174 */
 175static inline void change_bit(int nr, volatile unsigned long *addr)
 176{
 177        if (IS_IMMEDIATE(nr)) {
 178                asm volatile(LOCK_PREFIX "xorb %1,%0"
 179                        : CONST_MASK_ADDR(nr, addr)
 180                        : "iq" ((uint8_t)CONST_MASK(nr)));
 181        } else {
 182                asm volatile(LOCK_PREFIX "btc %1,%0"
 183                        : BITOP_ADDR(addr)
 184                        : "Ir" (nr));
 185        }
 186}
 187
 188/**
 189 * test_and_set_bit - Set a bit and return its old value
 190 * @nr: Bit to set
 191 * @addr: Address to count from
 192 *
 193 * This operation is atomic and cannot be reordered.
 194 * It also implies a memory barrier.
 195 */
 196static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 197{
 198        int oldbit;
 199
 200        asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
 201                     "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
 202
 203        return oldbit;
 204}
 205
 206/**
 207 * test_and_set_bit_lock - Set a bit and return its old value for lock
 208 * @nr: Bit to set
 209 * @addr: Address to count from
 210 *
 211 * This is the same as test_and_set_bit on x86.
 212 */
 213static __always_inline int
 214test_and_set_bit_lock(int nr, volatile unsigned long *addr)
 215{
 216        return test_and_set_bit(nr, addr);
 217}
 218
 219/**
 220 * __test_and_set_bit - Set a bit and return its old value
 221 * @nr: Bit to set
 222 * @addr: Address to count from
 223 *
 224 * This operation is non-atomic and can be reordered.
 225 * If two examples of this operation race, one can appear to succeed
 226 * but actually fail.  You must protect multiple accesses with a lock.
 227 */
 228static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 229{
 230        int oldbit;
 231
 232        asm("bts %2,%1\n\t"
 233            "sbb %0,%0"
 234            : "=r" (oldbit), ADDR
 235            : "Ir" (nr));
 236        return oldbit;
 237}
 238
 239/**
 240 * test_and_clear_bit - Clear a bit and return its old value
 241 * @nr: Bit to clear
 242 * @addr: Address to count from
 243 *
 244 * This operation is atomic and cannot be reordered.
 245 * It also implies a memory barrier.
 246 */
 247static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 248{
 249        int oldbit;
 250
 251        asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
 252                     "sbb %0,%0"
 253                     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
 254
 255        return oldbit;
 256}
 257
 258/**
 259 * __test_and_clear_bit - Clear a bit and return its old value
 260 * @nr: Bit to clear
 261 * @addr: Address to count from
 262 *
 263 * This operation is non-atomic and can be reordered.
 264 * If two examples of this operation race, one can appear to succeed
 265 * but actually fail.  You must protect multiple accesses with a lock.
 266 *
 267 * Note: the operation is performed atomically with respect to
 268 * the local CPU, but not other CPUs. Portable code should not
 269 * rely on this behaviour.
 270 * KVM relies on this behaviour on x86 for modifying memory that is also
 271 * accessed from a hypervisor on the same CPU if running in a VM: don't change
 272 * this without also updating arch/x86/kernel/kvm.c
 273 */
 274static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 275{
 276        int oldbit;
 277
 278        asm volatile("btr %2,%1\n\t"
 279                     "sbb %0,%0"
 280                     : "=r" (oldbit), ADDR
 281                     : "Ir" (nr));
 282        return oldbit;
 283}
 284
 285/* WARNING: non atomic and it can be reordered! */
 286static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 287{
 288        int oldbit;
 289
 290        asm volatile("btc %2,%1\n\t"
 291                     "sbb %0,%0"
 292                     : "=r" (oldbit), ADDR
 293                     : "Ir" (nr) : "memory");
 294
 295        return oldbit;
 296}
 297
 298/**
 299 * test_and_change_bit - Change a bit and return its old value
 300 * @nr: Bit to change
 301 * @addr: Address to count from
 302 *
 303 * This operation is atomic and cannot be reordered.
 304 * It also implies a memory barrier.
 305 */
 306static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 307{
 308        int oldbit;
 309
 310        asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
 311                     "sbb %0,%0"
 312                     : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
 313
 314        return oldbit;
 315}
 316
 317static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
 318{
 319        return ((1UL << (nr % BITS_PER_LONG)) &
 320                (addr[nr / BITS_PER_LONG])) != 0;
 321}
 322
 323static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
 324{
 325        int oldbit;
 326
 327        asm volatile("bt %2,%1\n\t"
 328                     "sbb %0,%0"
 329                     : "=r" (oldbit)
 330                     : "m" (*(unsigned long *)addr), "Ir" (nr));
 331
 332        return oldbit;
 333}
 334
 335#define test_bit(nr, addr)                      \
 336        (__builtin_constant_p((nr))             \
 337         ? constant_test_bit((nr), (addr))      \
 338         : variable_test_bit((nr), (addr)))
 339/**
 340 * __ffs - find first set bit in word
 341 * @word: The word to search
 342 *
 343 * Undefined if no bit exists, so code should check against 0 first.
 344 */
 345static inline unsigned long __ffs(unsigned long word)
 346{
 347        asm("rep; bsf %1,%0"
 348                : "=r" (word)
 349                : "rm" (word));
 350        return word;
 351}
 352
 353/**
 354 * ffz - find first zero bit in word
 355 * @word: The word to search
 356 *
 357 * Undefined if no zero exists, so code should check against ~0UL first.
 358 */
 359static inline unsigned long ffz(unsigned long word)
 360{
 361        asm("rep; bsf %1,%0"
 362                : "=r" (word)
 363                : "r" (~word));
 364        return word;
 365}
 366
 367/*
 368 * __fls: find last set bit in word
 369 * @word: The word to search
 370 *
 371 * Undefined if no set bit exists, so code should check against 0 first.
 372 */
 373static inline unsigned long __fls(unsigned long word)
 374{
 375        asm("bsr %1,%0"
 376            : "=r" (word)
 377            : "rm" (word));
 378        return word;
 379}
 380
 381#undef ADDR
 382
 383/**
 384 * ffs - find first set bit in word
 385 * @x: the word to search
 386 *
 387 * This is defined the same way as the libc and compiler builtin ffs
 388 * routines, therefore differs in spirit from the other bitops.
 389 *
 390 * ffs(value) returns 0 if value is 0 or the position of the first
 391 * set bit if value is nonzero. The first (least significant) bit
 392 * is at position 1.
 393 */
 394static inline int ffs(int x)
 395{
 396        int r;
 397
 398        /*
 399         * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
 400         * dest reg is undefined if x==0, but their CPU architect says its
 401         * value is written to set it to the same as before, except that the
 402         * top 32 bits will be cleared.
 403         *
 404         * We cannot do this on 32 bits because at the very least some
 405         * 486 CPUs did not behave this way.
 406         */
 407        asm("bsfl %1,%0"
 408            : "=r" (r)
 409            : "rm" (x), "0" (-1));
 410        return r + 1;
 411}
 412
 413/**
 414 * fls - find last set bit in word
 415 * @x: the word to search
 416 *
 417 * This is defined in a similar way as the libc and compiler builtin
 418 * ffs, but returns the position of the most significant set bit.
 419 *
 420 * fls(value) returns 0 if value is 0 or the position of the last
 421 * set bit if value is nonzero. The last (most significant) bit is
 422 * at position 32.
 423 */
 424static inline int fls(int x)
 425{
 426        int r;
 427
 428        /*
 429         * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
 430         * dest reg is undefined if x==0, but their CPU architect says its
 431         * value is written to set it to the same as before, except that the
 432         * top 32 bits will be cleared.
 433         *
 434         * We cannot do this on 32 bits because at the very least some
 435         * 486 CPUs did not behave this way.
 436         */
 437        asm("bsrl %1,%0"
 438            : "=r" (r)
 439            : "rm" (x), "0" (-1));
 440        return r + 1;
 441}
 442
 443/**
 444 * fls64 - find last set bit in a 64-bit word
 445 * @x: the word to search
 446 *
 447 * This is defined in a similar way as the libc and compiler builtin
 448 * ffsll, but returns the position of the most significant set bit.
 449 *
 450 * fls64(value) returns 0 if value is 0 or the position of the last
 451 * set bit if value is nonzero. The last (most significant) bit is
 452 * at position 64.
 453 */
 454static __always_inline int fls64(uint64_t x)
 455{
 456        int bitpos = -1;
 457        /*
 458         * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
 459         * dest reg is undefined if x==0, but their CPU architect says its
 460         * value is written to set it to the same as before.
 461         */
 462        asm("bsrq %1,%q0"
 463            : "+r" (bitpos)
 464            : "rm" (x));
 465        return bitpos + 1;
 466}
 467