1 /*
2 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
3 *
4 *
5 *
6 *
7 *
8 *
9 *
10 *
11 *
12 *
13 *
14 *
15 *
16 *
17 *
18 *
19 *
20 *
21 *
22 *
23 */
24
25 /*
26 *
27 *
28 *
29 *
30 *
31 * Written by Doug Lea with assistance from members of JCP JSR-166
32 * Expert Group and released to the public domain, as explained at
33 * http://creativecommons.org/publicdomain/zero/1.0/
34 */
35
36 package java.util.concurrent.atomic;
37 import java.util.function.LongBinaryOperator;
38 import java.util.function.DoubleBinaryOperator;
39 import java.util.concurrent.ThreadLocalRandom;
40
41 /**
42 * A package-local class holding common representation and mechanics
43 * for classes supporting dynamic striping on 64bit values. The class
44 * extends Number so that concrete subclasses must publicly do so.
45 */
46 @SuppressWarnings("serial")
47 abstract class Striped64 extends Number {
48 /*
49 * This class maintains a lazily-initialized table of atomically
50 * updated variables, plus an extra "base" field. The table size
51 * is a power of two. Indexing uses masked per-thread hash codes.
52 * Nearly all declarations in this class are package-private,
53 * accessed directly by subclasses.
54 *
55 * Table entries are of class Cell; a variant of AtomicLong padded
56 * (via @sun.misc.Contended) to reduce cache contention. Padding
57 * is overkill for most Atomics because they are usually
58 * irregularly scattered in memory and thus don't interfere much
59 * with each other. But Atomic objects residing in arrays will
60 * tend to be placed adjacent to each other, and so will most
61 * often share cache lines (with a huge negative performance
62 * impact) without this precaution.
63 *
64 * In part because Cells are relatively large, we avoid creating
65 * them until they are needed. When there is no contention, all
66 * updates are made to the base field. Upon first contention (a
67 * failed CAS on base update), the table is initialized to size 2.
68 * The table size is doubled upon further contention until
69 * reaching the nearest power of two greater than or equal to the
70 * number of CPUS. Table slots remain empty (null) until they are
71 * needed.
72 *
73 * A single spinlock ("cellsBusy") is used for initializing and
74 * resizing the table, as well as populating slots with new Cells.
75 * There is no need for a blocking lock; when the lock is not
76 * available, threads try other slots (or the base). During these
77 * retries, there is increased contention and reduced locality,
78 * which is still better than alternatives.
79 *
80 * The Thread probe fields maintained via ThreadLocalRandom serve
81 * as per-thread hash codes. We let them remain uninitialized as
82 * zero (if they come in this way) until they contend at slot
83 * 0. They are then initialized to values that typically do not
84 * often conflict with others. Contention and/or table collisions
85 * are indicated by failed CASes when performing an update
86 * operation. Upon a collision, if the table size is less than
87 * the capacity, it is doubled in size unless some other thread
88 * holds the lock. If a hashed slot is empty, and lock is
89 * available, a new Cell is created. Otherwise, if the slot
90 * exists, a CAS is tried. Retries proceed by "double hashing",
91 * using a secondary hash (Marsaglia XorShift) to try to find a
92 * free slot.
93 *
94 * The table size is capped because, when there are more threads
95 * than CPUs, supposing that each thread were bound to a CPU,
96 * there would exist a perfect hash function mapping threads to
97 * slots that eliminates collisions. When we reach capacity, we
98 * search for this mapping by randomly varying the hash codes of
99 * colliding threads. Because search is random, and collisions
100 * only become known via CAS failures, convergence can be slow,
101 * and because threads are typically not bound to CPUS forever,
102 * may not occur at all. However, despite these limitations,
103 * observed contention rates are typically low in these cases.
104 *
105 * It is possible for a Cell to become unused when threads that
106 * once hashed to it terminate, as well as in the case where
107 * doubling the table causes no thread to hash to it under
108 * expanded mask. We do not try to detect or remove such cells,
109 * under the assumption that for long-running instances, observed
110 * contention levels will recur, so the cells will eventually be
111 * needed again; and for short-lived ones, it does not matter.
112 */
113
114 /**
115 * Padded variant of AtomicLong supporting only raw accesses plus CAS.
116 *
117 * JVM intrinsics note: It would be possible to use a release-only
118 * form of CAS here, if it were provided.
119 */
120 @sun.misc.Contended static final class Cell {
121 volatile long value;
122 Cell(long x) { value = x; }
123 final boolean cas(long cmp, long val) {
124 return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
125 }
126
127 // Unsafe mechanics
128 private static final sun.misc.Unsafe UNSAFE;
129 private static final long valueOffset;
130 static {
131 try {
132 UNSAFE = sun.misc.Unsafe.getUnsafe();
133 Class<?> ak = Cell.class;
134 valueOffset = UNSAFE.objectFieldOffset
135 (ak.getDeclaredField("value"));
136 } catch (Exception e) {
137 throw new Error(e);
138 }
139 }
140 }
141
142 /** Number of CPUS, to place bound on table size */
143 static final int NCPU = Runtime.getRuntime().availableProcessors();
144
145 /**
146 * Table of cells. When non-null, size is a power of 2.
147 */
148 transient volatile Cell[] cells;
149
150 /**
151 * Base value, used mainly when there is no contention, but also as
152 * a fallback during table initialization races. Updated via CAS.
153 */
154 transient volatile long base;
155
156 /**
157 * Spinlock (locked via CAS) used when resizing and/or creating Cells.
158 */
159 transient volatile int cellsBusy;
160
161 /**
162 * Package-private default constructor
163 */
164 Striped64() {
165 }
166
167 /**
168 * CASes the base field.
169 */
170 final boolean casBase(long cmp, long val) {
171 return UNSAFE.compareAndSwapLong(this, BASE, cmp, val);
172 }
173
174 /**
175 * CASes the cellsBusy field from 0 to 1 to acquire lock.
176 */
177 final boolean casCellsBusy() {
178 return UNSAFE.compareAndSwapInt(this, CELLSBUSY, 0, 1);
179 }
180
181 /**
182 * Returns the probe value for the current thread.
183 * Duplicated from ThreadLocalRandom because of packaging restrictions.
184 */
185 static final int getProbe() {
186 return UNSAFE.getInt(Thread.currentThread(), PROBE);
187 }
188
189 /**
190 * Pseudo-randomly advances and records the given probe value for the
191 * given thread.
192 * Duplicated from ThreadLocalRandom because of packaging restrictions.
193 */
194 static final int advanceProbe(int probe) {
195 probe ^= probe << 13; // xorshift
196 probe ^= probe >>> 17;
197 probe ^= probe << 5;
198 UNSAFE.putInt(Thread.currentThread(), PROBE, probe);
199 return probe;
200 }
201
202 /**
203 * Handles cases of updates involving initialization, resizing,
204 * creating new Cells, and/or contention. See above for
205 * explanation. This method suffers the usual non-modularity
206 * problems of optimistic retry code, relying on rechecked sets of
207 * reads.
208 *
209 * @param x the value
210 * @param fn the update function, or null for add (this convention
211 * avoids the need for an extra field or function in LongAdder).
212 * @param wasUncontended false if CAS failed before call
213 */
214 final void longAccumulate(long x, LongBinaryOperator fn,
215 boolean wasUncontended) {
216 int h;
217 if ((h = getProbe()) == 0) {
218 ThreadLocalRandom.current(); // force initialization
219 h = getProbe();
220 wasUncontended = true;
221 }
222 boolean collide = false; // True if last slot nonempty
223 for (;;) {
224 Cell[] as; Cell a; int n; long v;
225 if ((as = cells) != null && (n = as.length) > 0) {
226 if ((a = as[(n - 1) & h]) == null) {
227 if (cellsBusy == 0) { // Try to attach new Cell
228 Cell r = new Cell(x); // Optimistically create
229 if (cellsBusy == 0 && casCellsBusy()) {
230 boolean created = false;
231 try { // Recheck under lock
232 Cell[] rs; int m, j;
233 if ((rs = cells) != null &&
234 (m = rs.length) > 0 &&
235 rs[j = (m - 1) & h] == null) {
236 rs[j] = r;
237 created = true;
238 }
239 } finally {
240 cellsBusy = 0;
241 }
242 if (created)
243 break;
244 continue; // Slot is now non-empty
245 }
246 }
247 collide = false;
248 }
249 else if (!wasUncontended) // CAS already known to fail
250 wasUncontended = true; // Continue after rehash
251 else if (a.cas(v = a.value, ((fn == null) ? v + x :
252 fn.applyAsLong(v, x))))
253 break;
254 else if (n >= NCPU || cells != as)
255 collide = false; // At max size or stale
256 else if (!collide)
257 collide = true;
258 else if (cellsBusy == 0 && casCellsBusy()) {
259 try {
260 if (cells == as) { // Expand table unless stale
261 Cell[] rs = new Cell[n << 1];
262 for (int i = 0; i < n; ++i)
263 rs[i] = as[i];
264 cells = rs;
265 }
266 } finally {
267 cellsBusy = 0;
268 }
269 collide = false;
270 continue; // Retry with expanded table
271 }
272 h = advanceProbe(h);
273 }
274 else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
275 boolean init = false;
276 try { // Initialize table
277 if (cells == as) {
278 Cell[] rs = new Cell[2];
279 rs[h & 1] = new Cell(x);
280 cells = rs;
281 init = true;
282 }
283 } finally {
284 cellsBusy = 0;
285 }
286 if (init)
287 break;
288 }
289 else if (casBase(v = base, ((fn == null) ? v + x :
290 fn.applyAsLong(v, x))))
291 break; // Fall back on using base
292 }
293 }
294
295 /**
296 * Same as longAccumulate, but injecting long/double conversions
297 * in too many places to sensibly merge with long version, given
298 * the low-overhead requirements of this class. So must instead be
299 * maintained by copy/paste/adapt.
300 */
301 final void doubleAccumulate(double x, DoubleBinaryOperator fn,
302 boolean wasUncontended) {
303 int h;
304 if ((h = getProbe()) == 0) {
305 ThreadLocalRandom.current(); // force initialization
306 h = getProbe();
307 wasUncontended = true;
308 }
309 boolean collide = false; // True if last slot nonempty
310 for (;;) {
311 Cell[] as; Cell a; int n; long v;
312 if ((as = cells) != null && (n = as.length) > 0) {
313 if ((a = as[(n - 1) & h]) == null) {
314 if (cellsBusy == 0) { // Try to attach new Cell
315 Cell r = new Cell(Double.doubleToRawLongBits(x));
316 if (cellsBusy == 0 && casCellsBusy()) {
317 boolean created = false;
318 try { // Recheck under lock
319 Cell[] rs; int m, j;
320 if ((rs = cells) != null &&
321 (m = rs.length) > 0 &&
322 rs[j = (m - 1) & h] == null) {
323 rs[j] = r;
324 created = true;
325 }
326 } finally {
327 cellsBusy = 0;
328 }
329 if (created)
330 break;
331 continue; // Slot is now non-empty
332 }
333 }
334 collide = false;
335 }
336 else if (!wasUncontended) // CAS already known to fail
337 wasUncontended = true; // Continue after rehash
338 else if (a.cas(v = a.value,
339 ((fn == null) ?
340 Double.doubleToRawLongBits
341 (Double.longBitsToDouble(v) + x) :
342 Double.doubleToRawLongBits
343 (fn.applyAsDouble
344 (Double.longBitsToDouble(v), x)))))
345 break;
346 else if (n >= NCPU || cells != as)
347 collide = false; // At max size or stale
348 else if (!collide)
349 collide = true;
350 else if (cellsBusy == 0 && casCellsBusy()) {
351 try {
352 if (cells == as) { // Expand table unless stale
353 Cell[] rs = new Cell[n << 1];
354 for (int i = 0; i < n; ++i)
355 rs[i] = as[i];
356 cells = rs;
357 }
358 } finally {
359 cellsBusy = 0;
360 }
361 collide = false;
362 continue; // Retry with expanded table
363 }
364 h = advanceProbe(h);
365 }
366 else if (cellsBusy == 0 && cells == as && casCellsBusy()) {
367 boolean init = false;
368 try { // Initialize table
369 if (cells == as) {
370 Cell[] rs = new Cell[2];
371 rs[h & 1] = new Cell(Double.doubleToRawLongBits(x));
372 cells = rs;
373 init = true;
374 }
375 } finally {
376 cellsBusy = 0;
377 }
378 if (init)
379 break;
380 }
381 else if (casBase(v = base,
382 ((fn == null) ?
383 Double.doubleToRawLongBits
384 (Double.longBitsToDouble(v) + x) :
385 Double.doubleToRawLongBits
386 (fn.applyAsDouble
387 (Double.longBitsToDouble(v), x)))))
388 break; // Fall back on using base
389 }
390 }
391
392 // Unsafe mechanics
393 private static final sun.misc.Unsafe UNSAFE;
394 private static final long BASE;
395 private static final long CELLSBUSY;
396 private static final long PROBE;
397 static {
398 try {
399 UNSAFE = sun.misc.Unsafe.getUnsafe();
400 Class<?> sk = Striped64.class;
401 BASE = UNSAFE.objectFieldOffset
402 (sk.getDeclaredField("base"));
403 CELLSBUSY = UNSAFE.objectFieldOffset
404 (sk.getDeclaredField("cellsBusy"));
405 Class<?> tk = Thread.class;
406 PROBE = UNSAFE.objectFieldOffset
407 (tk.getDeclaredField("threadLocalRandomProbe"));
408 } catch (Exception e) {
409 throw new Error(e);
410 }
411 }
412
413 }
414