blob: 5da36946b1b6deb90621f4d64378586c6c8c2670 [file] [log] [blame]
Paolo Bonzini51dee5e2017-01-12 19:07:52 +01001/*
2 * QemuLockCnt implementation
3 *
4 * Copyright Red Hat, Inc. 2017
5 *
6 * Author:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 */
9#include "qemu/osdep.h"
10#include "qemu/thread.h"
11#include "qemu/atomic.h"
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +010012#include "trace.h"
Paolo Bonzini51dee5e2017-01-12 19:07:52 +010013
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +010014#ifdef CONFIG_LINUX
15#include "qemu/futex.h"
16
17/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
18 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
19 * this is not the most relaxing citation I could make...). It is similar
20 * to mutex2 in the paper.
21 */
22
23#define QEMU_LOCKCNT_STATE_MASK 3
24#define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */
25#define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */
26#define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */
27
28#define QEMU_LOCKCNT_COUNT_STEP 4
29#define QEMU_LOCKCNT_COUNT_SHIFT 2
30
31void qemu_lockcnt_init(QemuLockCnt *lockcnt)
32{
33 lockcnt->count = 0;
34}
35
36void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
37{
38}
39
40/* *val is the current value of lockcnt->count.
41 *
42 * If the lock is free, try a cmpxchg from *val to new_if_free; return
43 * true and set *val to the old value found by the cmpxchg in
44 * lockcnt->count.
45 *
46 * If the lock is taken, wait for it to be released and return false
47 * *without trying again to take the lock*. Again, set *val to the
48 * new value of lockcnt->count.
49 *
50 * If *waited is true on return, new_if_free's bottom two bits must not
51 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
52 * does not know if there are other waiters. Furthermore, after *waited
53 * is set the caller has effectively acquired the lock. If it returns
54 * with the lock not taken, it must wake another futex waiter.
55 */
56static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
57 int new_if_free, bool *waited)
58{
59 /* Fast path for when the lock is free. */
60 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
61 int expected = *val;
62
63 trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +010064 *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +010065 if (*val == expected) {
66 trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
67 *val = new_if_free;
68 return true;
69 }
70 }
71
72 /* The slow path moves from locked to waiting if necessary, then
73 * does a futex wait. Both steps can be repeated ad nauseam,
74 * only getting out of the loop if we can have another shot at the
75 * fast path. Once we can, get out to compute the new destination
76 * value for the fast path.
77 */
78 while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
79 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
80 int expected = *val;
81 int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
82
83 trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +010084 *val = qatomic_cmpxchg(&lockcnt->count, expected, new);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +010085 if (*val == expected) {
86 *val = new;
87 }
88 continue;
89 }
90
91 if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
92 *waited = true;
93 trace_lockcnt_futex_wait(lockcnt, *val);
94 qemu_futex_wait(&lockcnt->count, *val);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +010095 *val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +010096 trace_lockcnt_futex_wait_resume(lockcnt, *val);
97 continue;
98 }
99
100 abort();
101 }
102 return false;
103}
104
105static void lockcnt_wake(QemuLockCnt *lockcnt)
106{
107 trace_lockcnt_futex_wake(lockcnt);
108 qemu_futex_wake(&lockcnt->count, 1);
109}
110
111void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
112{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100113 int val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100114 bool waited = false;
115
116 for (;;) {
117 if (val >= QEMU_LOCKCNT_COUNT_STEP) {
118 int expected = val;
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100119 val = qatomic_cmpxchg(&lockcnt->count, val,
120 val + QEMU_LOCKCNT_COUNT_STEP);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100121 if (val == expected) {
122 break;
123 }
124 } else {
125 /* The fast path is (0, unlocked)->(1, unlocked). */
126 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
127 &waited)) {
128 break;
129 }
130 }
131 }
132
133 /* If we were woken by another thread, we should also wake one because
134 * we are effectively releasing the lock that was given to us. This is
135 * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
136 * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
137 * wake someone.
138 */
139 if (waited) {
140 lockcnt_wake(lockcnt);
141 }
142}
143
144void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
145{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100146 qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100147}
148
149/* Decrement a counter, and return locked if it is decremented to zero.
150 * If the function returns true, it is impossible for the counter to
151 * become nonzero until the next qemu_lockcnt_unlock.
152 */
153bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
154{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100155 int val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100156 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
157 bool waited = false;
158
159 for (;;) {
160 if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
161 int expected = val;
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100162 val = qatomic_cmpxchg(&lockcnt->count, val,
163 val - QEMU_LOCKCNT_COUNT_STEP);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100164 if (val == expected) {
165 break;
166 }
167 } else {
168 /* If count is going 1->0, take the lock. The fast path is
169 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
170 */
171 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
172 return true;
173 }
174
175 if (waited) {
176 /* At this point we do not know if there are more waiters. Assume
177 * there are.
178 */
179 locked_state = QEMU_LOCKCNT_STATE_WAITING;
180 }
181 }
182 }
183
184 /* If we were woken by another thread, but we're returning in unlocked
185 * state, we should also wake a thread because we are effectively
186 * releasing the lock that was given to us. This is the case where
187 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
188 * bits, and qemu_lockcnt_unlock would find it and wake someone.
189 */
190 if (waited) {
191 lockcnt_wake(lockcnt);
192 }
193 return false;
194}
195
196/* If the counter is one, decrement it and return locked. Otherwise do
197 * nothing.
198 *
199 * If the function returns true, it is impossible for the counter to
200 * become nonzero until the next qemu_lockcnt_unlock.
201 */
202bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
203{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100204 int val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100205 int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
206 bool waited = false;
207
208 while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
209 /* If count is going 1->0, take the lock. The fast path is
210 * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
211 */
212 if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
213 return true;
214 }
215
216 if (waited) {
217 /* At this point we do not know if there are more waiters. Assume
218 * there are.
219 */
220 locked_state = QEMU_LOCKCNT_STATE_WAITING;
221 }
222 }
223
224 /* If we were woken by another thread, but we're returning in unlocked
225 * state, we should also wake a thread because we are effectively
226 * releasing the lock that was given to us. This is the case where
227 * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
228 * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
229 */
230 if (waited) {
231 lockcnt_wake(lockcnt);
232 }
233 return false;
234}
235
236void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
237{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100238 int val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100239 int step = QEMU_LOCKCNT_STATE_LOCKED;
240 bool waited = false;
241
242 /* The third argument is only used if the low bits of val are 0
243 * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
244 * state.
245 */
246 while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
247 if (waited) {
248 /* At this point we do not know if there are more waiters. Assume
249 * there are.
250 */
251 step = QEMU_LOCKCNT_STATE_WAITING;
252 }
253 }
254}
255
256void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
257{
258 int expected, new, val;
259
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100260 val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100261 do {
262 expected = val;
263 new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
264 trace_lockcnt_unlock_attempt(lockcnt, val, new);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100265 val = qatomic_cmpxchg(&lockcnt->count, val, new);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100266 } while (val != expected);
267
268 trace_lockcnt_unlock_success(lockcnt, val, new);
269 if (val & QEMU_LOCKCNT_STATE_WAITING) {
270 lockcnt_wake(lockcnt);
271 }
272}
273
274void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
275{
276 int expected, new, val;
277
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100278 val = qatomic_read(&lockcnt->count);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100279 do {
280 expected = val;
281 new = val & ~QEMU_LOCKCNT_STATE_MASK;
282 trace_lockcnt_unlock_attempt(lockcnt, val, new);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100283 val = qatomic_cmpxchg(&lockcnt->count, val, new);
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100284 } while (val != expected);
285
286 trace_lockcnt_unlock_success(lockcnt, val, new);
287 if (val & QEMU_LOCKCNT_STATE_WAITING) {
288 lockcnt_wake(lockcnt);
289 }
290}
291
292unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
293{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100294 return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100295}
296#else
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100297void qemu_lockcnt_init(QemuLockCnt *lockcnt)
298{
299 qemu_mutex_init(&lockcnt->mutex);
300 lockcnt->count = 0;
301}
302
303void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
304{
305 qemu_mutex_destroy(&lockcnt->mutex);
306}
307
308void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
309{
310 int old;
311 for (;;) {
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100312 old = qatomic_read(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100313 if (old == 0) {
314 qemu_lockcnt_lock(lockcnt);
315 qemu_lockcnt_inc_and_unlock(lockcnt);
316 return;
317 } else {
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100318 if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100319 return;
320 }
321 }
322 }
323}
324
325void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
326{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100327 qatomic_dec(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100328}
329
330/* Decrement a counter, and return locked if it is decremented to zero.
331 * It is impossible for the counter to become nonzero while the mutex
332 * is taken.
333 */
334bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
335{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100336 int val = qatomic_read(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100337 while (val > 1) {
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100338 int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100339 if (old != val) {
340 val = old;
341 continue;
342 }
343
344 return false;
345 }
346
347 qemu_lockcnt_lock(lockcnt);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100348 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100349 return true;
350 }
351
352 qemu_lockcnt_unlock(lockcnt);
353 return false;
354}
355
356/* Decrement a counter and return locked if it is decremented to zero.
357 * Otherwise do nothing.
358 *
359 * It is impossible for the counter to become nonzero while the mutex
360 * is taken.
361 */
362bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
363{
364 /* No need for acquire semantics if we return false. */
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100365 int val = qatomic_read(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100366 if (val > 1) {
367 return false;
368 }
369
370 qemu_lockcnt_lock(lockcnt);
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100371 if (qatomic_fetch_dec(&lockcnt->count) == 1) {
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100372 return true;
373 }
374
375 qemu_lockcnt_inc_and_unlock(lockcnt);
376 return false;
377}
378
379void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
380{
381 qemu_mutex_lock(&lockcnt->mutex);
382}
383
384void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
385{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100386 qatomic_inc(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100387 qemu_mutex_unlock(&lockcnt->mutex);
388}
389
390void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
391{
392 qemu_mutex_unlock(&lockcnt->mutex);
393}
394
395unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
396{
Stefan Hajnoczid73415a2020-09-23 11:56:46 +0100397 return qatomic_read(&lockcnt->count);
Paolo Bonzini51dee5e2017-01-12 19:07:52 +0100398}
Paolo Bonzinifbcc3e52017-01-12 19:07:54 +0100399#endif