lib: sbi: Replace test-and-set locks by ticket locks

Replace the test-and-set spinlock implementation with ticket locks
in order to get fairness (in form of FIFO order).

The implementation uses a 32-bit wide struct, which consists of
two 16-bit counters (owner and next). This is inspired by similar
spinlock implementations on other architectures.
This allows that the code works for both, RV32 and RV64.

Signed-off-by: Christoph Muellner <cmuellner@linux.com>
Reviewed-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Xiang W <wxjstz@126.com>
diff --git a/lib/sbi/riscv_locks.c b/lib/sbi/riscv_locks.c
index 4d1d9c0..75b443b 100644
--- a/lib/sbi/riscv_locks.c
+++ b/lib/sbi/riscv_locks.c
@@ -2,44 +2,76 @@
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- *
- * Authors:
- *   Anup Patel <anup.patel@wdc.com>
+ * Copyright (c) 2021 Christoph Müllner <cmuellner@linux.com>
  */
 
 #include <sbi/riscv_barrier.h>
 #include <sbi/riscv_locks.h>
 
-int spin_lock_check(spinlock_t *lock)
+static inline int spin_lock_unlocked(spinlock_t lock)
 {
-	return (lock->lock == __RISCV_SPIN_UNLOCKED) ? 0 : 1;
+	return lock.owner == lock.next;
+}
+
+bool spin_lock_check(spinlock_t *lock)
+{
+	RISCV_FENCE(r, rw);
+	return !spin_lock_unlocked(*lock);
 }
 
 int spin_trylock(spinlock_t *lock)
 {
-	int tmp = 1, busy;
+	unsigned long inc = 1u << TICKET_SHIFT;
+	unsigned long mask = 0xffffu << TICKET_SHIFT;
+	u32 l0, tmp1, tmp2;
 
 	__asm__ __volatile__(
-		"	amoswap.w %0, %2, %1\n" RISCV_ACQUIRE_BARRIER
-		: "=r"(busy), "+A"(lock->lock)
-		: "r"(tmp)
+		/* Get the current lock counters. */
+		"1:	lr.w.aq	%0, %3\n"
+		"	slli	%2, %0, %6\n"
+		"	and	%2, %2, %5\n"
+		"	and	%1, %0, %5\n"
+		/* Is the lock free right now? */
+		"	bne	%1, %2, 2f\n"
+		"	add	%0, %0, %4\n"
+		/* Acquire the lock. */
+		"	sc.w.rl	%0, %0, %3\n"
+		"	bnez	%0, 1b\n"
+		"2:"
+		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
+		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
 		: "memory");
 
-	return !busy;
+	return !l0;
 }
 
 void spin_lock(spinlock_t *lock)
 {
-	while (1) {
-		if (spin_lock_check(lock))
-			continue;
+	unsigned long inc = 1u << TICKET_SHIFT;
+	unsigned long mask = 0xffffu;
+	u32 l0, tmp1, tmp2;
 
-		if (spin_trylock(lock))
-			break;
-	}
+	__asm__ __volatile__(
+		/* Atomically increment the next ticket. */
+		"	amoadd.w.aqrl	%0, %4, %3\n"
+
+		/* Did we get the lock? */
+		"	srli	%1, %0, %6\n"
+		"	and	%1, %1, %5\n"
+		"1:	and	%2, %0, %5\n"
+		"	beq	%1, %2, 2f\n"
+
+		/* If not, then spin on the lock. */
+		"	lw	%0, %3\n"
+		RISCV_ACQUIRE_BARRIER
+		"	j	1b\n"
+		"2:"
+		: "=&r"(l0), "=&r"(tmp1), "=&r"(tmp2), "+A"(*lock)
+		: "r"(inc), "r"(mask), "I"(TICKET_SHIFT)
+		: "memory");
 }
 
 void spin_unlock(spinlock_t *lock)
 {
-	__smp_store_release(&lock->lock, __RISCV_SPIN_UNLOCKED);
+	__smp_store_release(&lock->owner, lock->owner + 1);
 }