7 | * The classes in this package demonstrate simple patterns for configuring event
8 | * loops and pausers. These classes are illustrative only and should not be used
9 | * as production code.
10 | */
11 | package net.openhft.chronicle.threads.example;
12 |
--------------------------------------------------------------------------------
/system.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | #
4 |
5 | # Tracing if resources are closed/released correctly.
6 | jvm.resource.tracing=true
7 | disable.resource.warning=true
8 |
9 | disable.discard.warning=false
10 | # for profiling
11 | jvm.safepoint.enabled=false
12 | # reduce logging of the announcer
13 | chronicle.announcer.disable=true
14 | pauser.minProcessors=1
15 | # to monitor disk space every 1 second in testing
16 | chronicle.disk.monitor.period=1
17 |
18 | # check it can be changed in a test
19 | chronicle.disk.monitor.threshold.percent=5
20 |
--------------------------------------------------------------------------------
/LICENSE.adoc:
--------------------------------------------------------------------------------
1 |
2 | == Copyright 2016-2025 chronicle.software
3 |
4 | Licensed under the *Apache License, Version 2.0* (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/internal/ThreadsThreadHolderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import org.junit.jupiter.api.Test;
7 |
8 | import static org.junit.jupiter.api.Assertions.assertEquals;
9 |
10 | class ThreadsThreadHolderTest extends net.openhft.chronicle.threads.ThreadsTestCommon {
11 |
12 | @Test
13 | void testNanosecondsToMillisWithTenthsPrecision() {
14 | assertEquals(1.2d, ThreadsThreadHolder.nanosecondsToMillisWithTenthsPrecision(1_234_567), 0.000000001);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/EventHandlers.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.EventHandler;
7 |
8 | /**
9 | * Placeholder enum that holds simple {@link EventHandler} constants.
10 | * The only entry is {@link #NOOP}, whose {@code action()} method always
11 | * returns {@code false}.
12 | */
13 | enum EventHandlers implements EventHandler {
14 | NOOP {
15 | @Override
16 | public boolean action() {
17 | return false;
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/TimingPauser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import java.util.concurrent.TimeUnit;
7 | import java.util.concurrent.TimeoutException;
8 |
9 | /**
10 | * Marker interface to show we support {@link #pause(long, TimeUnit)}
11 | */
12 | public interface TimingPauser extends Pauser {
13 |
14 | /**
15 | * Pauses but keep tracks of accumulated pause time and throws if timeout exceeded
16 | *
17 | * @param timeout timeout
18 | * @param timeUnit unit
19 | * @throws TimeoutException thrown if timeout passes
20 | */
21 | @Override
22 | void pause(long timeout, TimeUnit timeUnit) throws TimeoutException;
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/internal/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | /**
5 | * This package and any and all sub-packages contains strictly internal classes for this Chronicle library.
6 | * Internal classes shall never be used directly.
7 | *
8 | * Specifically, the following actions (including, but not limited to) are not allowed
9 | * on internal classes and packages:
10 | *
11 | *
Casting to
12 | *
Reflection of any kind
13 | *
Explicit Serialize/deserialize
14 | *
15 | *
16 | * The classes in this package and any sub-package are subject to
17 | * changes at any time for any reason.
18 | */
19 | package net.openhft.chronicle.threads.internal;
20 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/NotifyDiskLow.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import java.nio.file.FileStore;
7 |
8 | /**
9 | * Receives notifications from the disk space monitor.
10 | *
11 | *
The {@link #panic(FileStore)} method is called when a file store
12 | * is critically short of space. Implementations should act immediately
13 | * as memory-mapped writes may fail.
14 | *
15 | *
The {@link #warning(double, FileStore)} method signals that a disk
16 | * is nearing its limit. The percentage parameter denotes how full the disk
17 | * currently is.
18 | */
19 | public interface NotifyDiskLow {
20 | void panic(FileStore fileStore);
21 |
22 | void warning(double diskSpaceFullPercent, FileStore fileStore);
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/ThreadMonitor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.EventHandler;
7 | import net.openhft.chronicle.core.threads.HandlerPriority;
8 | import org.jetbrains.annotations.NotNull;
9 |
10 | /**
11 | * Event handler used by the monitor loop to detect threads that appear to be
12 | * blocked. Instances are typically produced by {@link ThreadMonitors}.
13 | */
14 | public interface ThreadMonitor extends EventHandler {
15 | /**
16 | * Returns {@link HandlerPriority#MONITOR} so monitoring does not compete
17 | * with application handlers.
18 | */
19 | @Override
20 | default @NotNull HandlerPriority priority() {
21 | return HandlerPriority.MONITOR;
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### How to update
2 | # This is copied from OpenHFT/.gitignore
3 | # update the original and run OpenHFT/update_gitignore.sh
4 |
5 | ### Compiled class file
6 | *.class
7 |
8 | ### Package Files
9 | *.jar
10 | *.war
11 | *.ear
12 |
13 | ### Log file
14 | *.log
15 |
16 | ### IntelliJ
17 | *.iml
18 | *.ipr
19 | *.iws
20 | .idea
21 | compat_reports
22 | .attach_pid*
23 |
24 | ### Virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
25 | hs_err_pid*
26 |
27 | ### Maven template
28 | target/
29 | pom.xml.tag
30 | pom.xml.releaseBackup
31 | pom.xml.versionsBackup
32 | pom.xml.next
33 | release.properties
34 |
35 | ### Eclipse template
36 | *.pydevproject
37 | .metadata
38 | .gradle
39 | bin/
40 | tmp/
41 | *.tmp
42 | *.bak
43 | *.swp
44 | *~.nib
45 | local.properties
46 | .classpath
47 | .project
48 | .settings/
49 | .loadpath
50 |
51 | ### Queue files
52 | *.cq4t
53 | *.cq4
54 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | /**
5 | * Event loop implementations and utilities for running deterministic
6 | * single-threaded event handlers. {@link net.openhft.chronicle.core.threads.EventLoop EventLoop}
7 | * implementations are aggregated by {@link net.openhft.chronicle.threads.EventGroup EventGroup}.
8 | * Pauser strategies ({@link net.openhft.chronicle.threads.Pauser Pauser}) control the
9 | * trade off between latency and CPU use when no work is available.
10 | *
11 | * Typical usage involves building an {@code EventGroup} via
12 | * {@link net.openhft.chronicle.threads.EventGroupBuilder}, installing handlers then calling
13 | * {@code start()}. Handlers are executed on the same thread, avoiding locks in hot paths.
14 | *
15 | * Behaviour such as loop monitoring or thread counts can be configured via system properties
16 | * (see {@code systemProperties.adoc}).
17 | */
18 | package net.openhft.chronicle.threads;
19 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/EventGroupBadAffinityTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.EventLoop;
7 | import org.junit.jupiter.api.Test;
8 | import org.junit.jupiter.api.Timeout;
9 |
10 | import java.util.concurrent.TimeoutException;
11 |
12 | import static org.junit.jupiter.api.Assertions.assertThrows;
13 |
14 | class EventGroupBadAffinityTest extends ThreadsTestCommon {
15 |
16 | /**
17 | * Ensures that an invalid CPU affinity string fails fast so that
18 | * misconfigured deployments do not run with unexpected processor binding.
19 | */
20 | @Timeout(5_000)
21 | @Test
22 | void testInvalidAffinity() {
23 | expectException("Cannot parse 'xxx'");
24 | ignoreException("Timed out waiting for start!");
25 | try (final EventLoop eventGroup = EventGroup.builder().withBinding("xxx").build()) {
26 | assertThrows(TimeoutException.class, eventGroup::start);
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/NotifyDiskLowLogWarn.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 |
8 | import java.nio.file.FileStore;
9 |
10 | /**
11 | * Logs to the configured {@link Jvm} logger when disk space is low.
12 | * The {@link #panic(FileStore)} method emits an error level message
13 | * and {@link #warning(double, FileStore)} emits a warning.
14 | */
15 | public class NotifyDiskLowLogWarn implements NotifyDiskLow {
16 | @Override
17 | public void panic(FileStore fileStore) {
18 | Jvm.error().on(DiskSpaceMonitor.class, "your disk " + fileStore + " is almost full, " +
19 | "warning: the JVM may crash if it undertakes an operation with a memory-mapped file.");
20 | }
21 |
22 | @Override
23 | public void warning(double diskSpaceFullPercent, FileStore fileStore) {
24 | Jvm.warn().on(DiskSpaceMonitor.class, "your disk " + fileStore
25 | + " is " + diskSpaceFullPercent + "% full, " +
26 | "warning: the JVM may crash if it undertakes an operation with a memory-mapped file and the disk is out of space.");
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/internal/EventLoopUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 |
8 | /**
9 | * Configuration values for event loop behaviour.
10 | *
11 | *
The {@code ACCEPT_HANDLER_MOD_COUNT} system property specifies how often
12 | * new accept handlers are inserted to avoid starvation. A value of zero
13 | * disables this feature. If the property is absent the
14 | * {@link #DEFAULT_ACCEPT_HANDLER_MOD_COUNT default} is used. The
15 | * {@link #IS_ACCEPT_HANDLER_MOD_COUNT} flag reveals whether re-arming is
16 | * enabled.
17 | */
18 | public enum EventLoopUtil {
19 | ; // none
20 |
21 | /** Fallback when {@code eventloop.accept.mod} is not set. */
22 | private static final int DEFAULT_ACCEPT_HANDLER_MOD_COUNT = 128;
23 |
24 | /** Interval for re-adding accept handlers. */
25 | public static final int ACCEPT_HANDLER_MOD_COUNT =
26 | Jvm.getInteger("eventloop.accept.mod", DEFAULT_ACCEPT_HANDLER_MOD_COUNT);
27 |
28 | /** True when accept handler re-arming is active. */
29 | public static final boolean IS_ACCEPT_HANDLER_MOD_COUNT = ACCEPT_HANDLER_MOD_COUNT > 0;
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/LongPauserBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 |
8 | import java.util.concurrent.TimeUnit;
9 |
10 | /**
11 | * Benchmark used to gauge the overhead of waking a {@link LongPauser}.
12 | *
13 | * A helper thread loops calling {@link LongPauser#pause()} and then yields.
14 | * The main thread repeatedly invokes {@link LongPauser#unpause()} a fixed
15 | * number of times and measures the elapsed time. Dividing the total by the
16 | * iteration count reveals the average cost of a single unpark operation.
17 | */
18 | public final class LongPauserBenchmark {
19 |
20 | public static void main(String[] args) {
21 | final LongPauser pauser = new LongPauser(1, 1, 100, 1000, TimeUnit.MICROSECONDS);
22 | Thread thread = new Thread(() -> {
23 | while (!Thread.interrupted()) {
24 | pauser.pause();
25 | Thread.yield();
26 | }
27 | });
28 | thread.start();
29 |
30 | for (int t = 0; t < 3; t++) {
31 | long start = System.nanoTime();
32 | int runs = 10000000;
33 | for (int i = 0; i < runs; i++)
34 | pauser.unpause();
35 | long time = System.nanoTime() - start;
36 | System.out.printf("Average time to unpark was %,d ns%n", time / runs);
37 | Jvm.pause(20);
38 | }
39 | thread.interrupt();
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/ExecutorFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import java.util.concurrent.ExecutorService;
7 | import java.util.concurrent.ScheduledExecutorService;
8 |
9 | /**
10 | * Strategy interface for obtaining {@link ExecutorService} instances.
11 | *
12 | *
The Chronicle Threads utility relies on this abstraction so that
13 | * applications may plug in their own executor creation logic. The
14 | * supplied implementation can integrate with alternative concurrency
15 | * frameworks or simply wrap the standard JDK executors.
16 | */
17 | public interface ExecutorFactory {
18 |
19 | /**
20 | * Creates or retrieves an {@link ExecutorService}.
21 | *
22 | * @param name base name for the threads created by the executor
23 | * @param threads requested thread count
24 | * @param daemon {@code true} if the threads should be daemon threads
25 | * @return a service suitable for running general tasks
26 | */
27 | ExecutorService acquireExecutorService(String name, int threads, boolean daemon);
28 |
29 | /**
30 | * Creates or retrieves a {@link ScheduledExecutorService}.
31 | *
32 | * @param name base name for the threads created by the scheduler
33 | * @param daemon {@code true} if the threads should be daemon threads
34 | * @return a single-threaded scheduler
35 | */
36 | ScheduledExecutorService acquireScheduledExecutorService(String name, boolean daemon);
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/EventLoopLifecycle.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | /**
7 | * The life-cycle of an event loop. The state moves from {@link #NEW} to
8 | * {@link #STARTED} when {@code start()} is invoked. A request to {@code stop()}
9 | * moves the loop to {@link #STOPPING} and once all handlers have completed it
10 | * becomes {@link #STOPPED}.
11 | *
20 | */
21 | public enum EventLoopLifecycle {
22 | /**
23 | * The event loop has been created but not yet started. Only
24 | * {@code start()} or {@code stop()} are meaningful in this state.
25 | */
26 | NEW(false),
27 |
28 | /**
29 | * The event loop is running. Calling {@code stop()} moves it to
30 | * {@link #STOPPING}.
31 | */
32 | STARTED(false),
33 |
34 | /**
35 | * {@code stop()} has been called and handlers are finishing. Further calls
36 | * to {@code stop()} wait for completion.
37 | */
38 | STOPPING(true),
39 |
40 | /**
41 | * The event loop has been stopped and cannot be restarted.
42 | */
43 | STOPPED(true);
44 |
45 | private final boolean stopped;
46 |
47 | EventLoopLifecycle(boolean stopped) {
48 | this.stopped = stopped;
49 | }
50 |
51 | public boolean isStopped() {
52 | return stopped;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/YieldingPauserTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import org.junit.jupiter.api.Test;
7 |
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.concurrent.TimeoutException;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertEquals;
12 | import static org.junit.jupiter.api.Assertions.fail;
13 |
14 | class YieldingPauserTest extends ThreadsTestCommon {
15 |
16 | @Test
17 | void pause() {
18 | final int pauseTimeMillis = 100;
19 | final YieldingPauser tp = new YieldingPauser(pauseTimeMillis);
20 | for (int i = 0; i < 10; i++) {
21 | final long start = System.currentTimeMillis();
22 | while (true) {
23 | try {
24 | tp.pause(pauseTimeMillis, TimeUnit.MILLISECONDS);
25 | if (System.currentTimeMillis() - start > 200)
26 | fail();
27 | } catch (TimeoutException e) {
28 | final long time = System.currentTimeMillis() - start;
29 | // delta used to be 5 for Linux but occasionally we see it blow in Continuous Integration
30 | // a delta of 20 was used here, however in some situations in CI that was not sufficient:
31 | // org.opentest4j.AssertionFailedError: expected: <100.0> but was: <126.0>
32 | int delta = 30;
33 | // please don't add delta to pauseTimeMillis below - it makes this test flakier on Windows
34 | assertEquals(pauseTimeMillis, time, delta);
35 | tp.reset();
36 | break;
37 | }
38 | }
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/VanillaExecutorFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import java.util.concurrent.ExecutorService;
7 | import java.util.concurrent.Executors;
8 | import java.util.concurrent.ScheduledExecutorService;
9 |
10 | /**
11 | * Default {@link ExecutorFactory} used by Chronicle Threads.
12 | *
13 | *
It creates standard JDK executor services backed by a
14 | * {@link NamedThreadFactory}. Single thread requests result in a
15 | * {@link java.util.concurrent.Executors#newSingleThreadExecutor single-thread}
16 | * pool, otherwise a fixed thread pool is returned. Scheduled executors are
17 | * always single-threaded.
18 | */
19 | public enum VanillaExecutorFactory implements ExecutorFactory {
20 | /** sole instance used by default */
21 | INSTANCE;
22 |
23 | /**
24 | * Provides an executor backed by a {@link NamedThreadFactory}. A single
25 | * thread executor is created when {@code threads} equals one, otherwise a
26 | * fixed thread pool is returned.
27 | */
28 | @Override
29 | public ExecutorService acquireExecutorService(String name, int threads, boolean daemon) {
30 | NamedThreadFactory threadFactory = new NamedThreadFactory(name, daemon);
31 | return threads == 1
32 | ? Executors.newSingleThreadExecutor(threadFactory)
33 | : Executors.newFixedThreadPool(threads, threadFactory);
34 | }
35 |
36 | /**
37 | * Creates a single-thread {@link ScheduledExecutorService}.
38 | */
39 | @Override
40 | public ScheduledExecutorService acquireScheduledExecutorService(String name, boolean daemon) {
41 | return Executors.newSingleThreadScheduledExecutor(
42 | new NamedThreadFactory(name, daemon));
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/PauserMonitorFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.EventHandler;
7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
8 |
9 | import java.util.Iterator;
10 | import java.util.ServiceLoader;
11 |
12 | /**
13 | * Factory for {@link EventHandler} instances that observe a {@link Pauser}.
14 | *
15 | *
Implementations are discovered through Java's {@link ServiceLoader}
16 | * mechanism. When no implementation is found a no-op handler is returned.
17 | */
18 | public interface PauserMonitorFactory {
19 |
20 | /**
21 | * Create an event handler that records the behaviour of a {@code pauser}.
22 | * Typical implementations will log the pause count or total time paused and
23 | * may alert if the pauser has remained idle for longer than {@code seconds}.
24 | *
25 | * @param pauser the {@link Pauser} to monitor
26 | * @param description label used in the monitor's {@code toString}
27 | * @param seconds threshold before reporting prolonged pauses
28 | * @return an event handler suitable for a monitoring loop
29 | */
30 | EventHandler pauserMonitor(Pauser pauser, String description, int seconds);
31 |
32 | static PauserMonitorFactory load() {
33 | final Iterator iterator = ServiceLoader.load(PauserMonitorFactory.class).iterator();
34 | return iterator.hasNext() ?
35 | iterator.next() :
36 | (pauser, description, seconds) -> new EventHandler() {
37 | @Override
38 | public boolean action() throws InvalidEventHandlerException {
39 | throw new InvalidEventHandlerException();
40 | }
41 | @Override
42 | public String toString() {
43 | return "NOOP_PAUSER_MONITOR";
44 | }
45 | };
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/CoreEventLoop.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.EventLoop;
7 | import org.jetbrains.annotations.NotNull;
8 |
9 | import java.util.function.BooleanSupplier;
10 |
11 | /**
12 | * Contract for the fast core loop used within an {@link EventGroup}.
13 | *
14 | *
The core loop runs on a dedicated thread and executes handlers
15 | * one by one. Implementations aim to minimise latency and usually rely
16 | * on a {@link net.openhft.chronicle.threads.Pauser} during idle periods.
17 | */
18 | public interface CoreEventLoop extends EventLoop {
19 |
20 | /**
21 | * The value returned for {@link #loopStartNS()} when the event loop is not currently
22 | * executing an iteration
23 | */
24 | long NOT_IN_A_LOOP = Long.MAX_VALUE;
25 |
26 | /**
27 | * The thread currently running the loop.
28 | *
29 | * @return the loop thread, or {@code null} if the loop has not yet started
30 | * or has finished
31 | */
32 | Thread thread();
33 |
34 | /**
35 | * Time in {@link System#nanoTime()} units when the current iteration began.
36 | *
37 | * @return the start time, or {@link #NOT_IN_A_LOOP} if the loop is idle
38 | */
39 | long loopStartNS();
40 |
41 | /**
42 | * Dump the stack trace when a monitor suspects the loop is blocked.
43 | *
44 | * @param message text to include in the log
45 | * @param finalCheck invoked after taking the stack trace; the state is
46 | * logged only when this returns {@code true}
47 | */
48 | void dumpRunningState(@NotNull String message, @NotNull BooleanSupplier finalCheck);
49 |
50 | /**
51 | * Check whether the given thread is executing this loop.
52 | *
53 | *
Used by diagnostics to ignore activity from other threads.
54 | *
55 | * @param thread candidate thread
56 | * @return {@code true} if the loop is running on {@code thread}
57 | */
58 | boolean isRunningOnThread(Thread thread);
59 |
60 | void privateGroup(boolean privateGroup);
61 | }
62 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/BlockingEventLoopTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.threads.InterruptedRuntimeException;
8 | import org.junit.jupiter.api.Test;
9 |
10 | import java.util.concurrent.BrokenBarrierException;
11 | import java.util.concurrent.CyclicBarrier;
12 | import java.util.concurrent.TimeUnit;
13 | import java.util.concurrent.TimeoutException;
14 | import java.util.concurrent.atomic.AtomicBoolean;
15 |
16 | import static org.junit.jupiter.api.Assertions.assertFalse;
17 | import static org.junit.jupiter.api.Assertions.assertTrue;
18 |
19 | /**
20 | * Verifies that a handler in a {@link BlockingEventLoop} is interrupted when
21 | * the loop is stopped while the calling thread continues unimpeded.
22 | */
23 | class BlockingEventLoopTest extends ThreadsTestCommon {
24 |
25 | @Test
26 | void handlersAreInterruptedOnStop() throws TimeoutException {
27 | try (final BlockingEventLoop el = new BlockingEventLoop("test-blocking-loop")) {
28 | el.start();
29 |
30 | AtomicBoolean wasStoppedSuccessfully = new AtomicBoolean(false);
31 | CyclicBarrier barrier = new CyclicBarrier(2);
32 |
33 | el.addHandler(() -> {
34 | waitQuietly(barrier);
35 |
36 | while (!Thread.currentThread().isInterrupted()) {
37 | Jvm.pause(10);
38 | }
39 | wasStoppedSuccessfully.set(true);
40 | return false;
41 | });
42 |
43 | waitQuietly(barrier);
44 | Jvm.pause(10);
45 | el.stop();
46 |
47 | TimingPauser pauser = Pauser.balanced();
48 | while (!wasStoppedSuccessfully.get()) {
49 | pauser.pause(1, TimeUnit.SECONDS);
50 | }
51 | assertTrue(wasStoppedSuccessfully.get());
52 | assertFalse(Thread.currentThread().isInterrupted());
53 | }
54 | }
55 |
56 | private void waitQuietly(CyclicBarrier barrier) {
57 | try {
58 | barrier.await();
59 | } catch (InterruptedException | BrokenBarrierException e) {
60 | Thread.currentThread().interrupt();
61 | throw new InterruptedRuntimeException("Interrupted waiting at barrier");
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/ThreadHolder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
8 |
9 | /**
10 | * Supplies runtime details of a thread or event loop being monitored. The
11 | * associated {@link ThreadMonitor} uses this information to detect long blocks
12 | * or unexpected thread termination.
13 | */
14 | public interface ThreadHolder {
15 | int TIMING_ERROR = Jvm.getInteger("threads.timing.error", 80_000_000);
16 |
17 | /**
18 | * Indicates whether the monitored thread is still running.
19 | *
20 | * @return {@code true} if the thread has not terminated
21 | * @throws InvalidEventHandlerException if the holder can no longer be queried
22 | */
23 | boolean isAlive() throws InvalidEventHandlerException;
24 |
25 | /**
26 | * Called once the thread has ended so monitoring can be stopped or logged.
27 | */
28 | void reportFinished();
29 |
30 | /**
31 | * Clears any internal timers when a new loop iteration begins.
32 | */
33 | void resetTimers();
34 |
35 | /**
36 | * Get the {@link System#nanoTime()} at which the currently executing loop iteration started
37 | *
38 | * @return The time the current loop started, or {@link CoreEventLoop#NOT_IN_A_LOOP} if no iteration is executing
39 | */
40 | long startedNS();
41 |
42 | /**
43 | * Determines whether a block has exceeded the logging threshold.
44 | *
45 | * @param nowNS the current time in nanoseconds
46 | * @return {@code true} if logging should occur
47 | */
48 | boolean shouldLog(long nowNS);
49 |
50 | /**
51 | * Produces a diagnostic dump when a stall is detected.
52 | *
53 | * @param startedNS when the loop iteration began
54 | * @param nowNS the time the dump is triggered
55 | */
56 | void dumpThread(long startedNS, long nowNS);
57 |
58 | /**
59 | * Descriptive name used in log output.
60 | */
61 | String getName();
62 |
63 | /**
64 | * Notifies that the monitor thread itself was delayed.
65 | *
66 | * @param actionCallDelayNS time since the last monitor call in nanoseconds
67 | */
68 | void monitorThreadDelayed(long actionCallDelayNS);
69 |
70 | /**
71 | * Maximum delay between monitor calls before a warning is triggered.
72 | *
73 | * @return tolerance in nanoseconds
74 | */
75 | long timingToleranceNS();
76 | }
77 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/PauserTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import org.junit.jupiter.api.Test;
7 |
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.concurrent.TimeoutException;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertEquals;
12 | import static org.junit.jupiter.api.Assertions.assertTrue;
13 |
14 | /**
15 | * Tests the behaviour of the various {@link Pauser} implementations.
16 | *
15 | * When {@code action()} is invoked the handler checks whether the
16 | * current time has passed {@code nextRunNS}. If so it performs the
17 | * work and asks {@code timedAction()} how many micro-seconds to wait
18 | * before running again. A negative delay signals that the handler has
19 | * finished and should be removed.
20 | *
21 | *
22 | * class HeartbeatHandler extends TimedEventHandler {
23 | * @Override
24 | * protected long timedAction() {
25 | * sendHeartbeat();
26 | * return 500_000; // run again in half a second
27 | * }
28 | * }
29 | *
30 | */
31 | public abstract class TimedEventHandler implements EventHandler {
32 | /** next scheduled run time in {@link System#nanoTime()} units. */
33 | private long nextRunNS = 0;
34 |
35 | /**
36 | * Executes the handler when the scheduled time has arrived.
37 | *
38 | * If {@code System.nanoTime()} is greater than or equal to
39 | * {@code nextRunNS} the handler calls {@link #timedAction()} and
40 | * stores the returned delay to compute the next run time. The delay
41 | * is specified in micro-seconds and converted to nano-seconds. A
42 | * negative delay causes the method to return {@code true} so the
43 | * event loop can drop this handler.
44 | */
45 | @Override
46 | public boolean action() throws InvalidEventHandlerException {
47 | long now = System.nanoTime();
48 | if (nextRunNS <= now) {
49 | long delayUS = timedAction();
50 | if (delayUS < 0)
51 | return true;
52 | nextRunNS = now + delayUS * 1000;
53 | }
54 | return false;
55 | }
56 |
57 | /**
58 | * Performs the timed work and specifies the delay until the next call.
59 | *
60 | * @return delay in micro-seconds. A negative value means the handler has
61 | * finished and {@code action()} should return {@code true}.
62 | */
63 | protected abstract long timedAction() throws InvalidEventHandlerException;
64 |
65 | @NotNull
66 | @Override
67 | public HandlerPriority priority() {
68 | return HandlerPriority.TIMER;
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/systemProperties.adoc:
--------------------------------------------------------------------------------
1 | == System Properties
2 |
3 | Chronicle Threads reads several system properties at start up.
4 | These values tune event loops, pausing strategies, monitoring intervals and disk space checks.
5 | All properties may be supplied on the command line with `-D` flags.
6 |
7 | NOTE: All boolean properties below are read using link:https://javadoc.io/static/net.openhft/chronicle-core/2.23ea13/net/openhft/chronicle/core/Jvm.html#getBoolean-java.lang.String-[net.openhft.chronicle.core.Jvm.getBoolean(java.lang.String)], and so are enabled if either `-Dflag` or `-Dflag=true` or `-Dflag=yes`.
8 |
9 | === Disk monitoring
10 |
11 | [cols=4*,options="header"]
12 | |===
13 | | Property Key | Default | Description | Java Variable Name (Type)
14 | | chronicle.disk.monitor.disable | `false` | Disable the background disk space monitor | _DISABLED_ (boolean)
15 | | chronicle.disk.monitor.threshold.percent | 5% | Issue warnings when free space drops below this percentage | _thresholdPercentage_ (int)
16 | | disk.monitor.deleted.warning | `false` | Warn if disk space cannot be determined | _WARN_DELETED_ (boolean)
17 | |===
18 |
19 | === Event loops
20 |
21 | [cols=4*,options="header"]
22 | |===
23 | | Property Key | Default | Description | Java Variable Name (Type)
24 | | eventloop.accept.mod | 128 | Prevent starvation by inserting new handlers every modulo iteration | _ACCEPT_HANDLER_MOD_COUNT_ (int)
25 | | eventGroup.conc.threads | processors/4 | Number of concurrent event loop threads (minimum one) | _CONC_THREADS_ (int)
26 | | eventGroup.wait.to.start.ms | 2_000 ms | Delay before the core event loop begins | _WAIT_TO_START_MS_ (long)
27 | | replicationEventPauseTime | 20 ms | Pause time used for replication event pausers | _REPLICATION_EVENT_PAUSE_TIME_ (int)
28 | | REPLICATION_MONITOR_INTERVAL_MS | 500 ms | Interval for monitoring replication loops | _REPLICATION_MONITOR_INTERVAL_MS_ (long)
29 | |===
30 |
31 | === Pausers
32 |
33 | [cols=4*,options="header"]
34 | |===
35 | | Property Key | Default | Description | Java Variable Name (Type)
36 | | pauser.minProcessors | 4 | Minimum number of processors required before busy pausing is used | _MIN_PROCESSORS_ (int)
37 | |===
38 |
39 | === Monitoring
40 |
41 | [cols=4*,options="header"]
42 | |===
43 | | Property Key | Default | Description | Java Variable Name (Type)
44 | | disableLoopBlockMonitor | `false` | Disable loop block monitoring | _ENABLE_LOOP_BLOCK_MONITOR_ (boolean)
45 | | ignoreThreadMonitorEventHandler | `false` | If enabled, throw an exception when thread monitoring fails | _IGNORE_THREAD_MONITOR_EVENT_HANDLER_ (boolean)
46 | | MONITOR_INTERVAL_MS | 100 ms | Sampling interval for monitoring core threads | _MONITOR_INTERVAL_MS_ (long)
47 | | SHUTDOWN_WAIT_MS | 500 ms | Time to wait for services to stop on shutdown | _SHUTDOWN_WAIT_MILLIS_ (long)
48 | | threads.timing.error | 80_000_000 ns | Allowed timing error for loop execution | _TIMING_ERROR_ (int)
49 | |===
50 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/EventLoops.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.threads.EventLoop;
8 |
9 | import java.util.ArrayList;
10 | import java.util.Arrays;
11 | import java.util.Collection;
12 | import java.util.List;
13 | import java.util.concurrent.Callable;
14 | import java.util.concurrent.ExecutionException;
15 | import java.util.concurrent.ForkJoinPool;
16 | import java.util.concurrent.Future;
17 |
18 | /**
19 | * Utility methods for working with {@link EventLoop EventLoops}. At present the
20 | * class only supplies a helper to stop several loops at once.
21 | */
22 | public final class EventLoops {
23 |
24 | // Suppresses default constructor, ensuring non-instantiability.
25 | private EventLoops() {
26 | }
27 |
28 | /**
29 | * Stops many {@link EventLoop}s concurrently using {@link ForkJoinPool#commonPool()}.
30 | * The call blocks until every {@code EventLoop.stop()} has finished. Null
31 | * values or collections containing nulls are ignored. Each task runs in the
32 | * common pool and any {@link ExecutionException} is logged. If interrupted
33 | * while waiting the interrupt status is restored.
34 | *
35 | * @param eventLoops a list of {@link EventLoop}s or collections of them
36 | */
37 | public static void stopAll(Object... eventLoops) {
38 | List> eventLoopStoppers = new ArrayList<>();
39 | addAllEventLoopStoppers(Arrays.asList(eventLoops), eventLoopStoppers);
40 | try {
41 | for (Future voidFuture : ForkJoinPool.commonPool().invokeAll(eventLoopStoppers)) {
42 | try {
43 | voidFuture.get();
44 | } catch (ExecutionException e) {
45 | Jvm.error().on(EventLoops.class, "Error stopping event loop", e);
46 | }
47 | }
48 | } catch (InterruptedException e) {
49 | Jvm.warn().on(EventLoops.class, "Interrupted waiting for event loops to stop");
50 | Thread.currentThread().interrupt();
51 | }
52 | }
53 |
54 | private static void addAllEventLoopStoppers(Collection> collection, List> stoppers) {
55 | for (Object o : collection) {
56 | if (o == null) {
57 | continue;
58 | }
59 | if (o instanceof EventLoop) {
60 | stoppers.add(() -> {
61 | ((EventLoop) o).stop();
62 | return null;
63 | });
64 | } else if (o instanceof Collection) {
65 | addAllEventLoopStoppers((Collection>) o, stoppers);
66 | } else {
67 | Jvm.warn().on(EventLoops.class, "Unexpected object passed to EventLoops.stop(): " + o);
68 | }
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/PauserTimeoutTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import org.junit.jupiter.api.Test;
7 |
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.concurrent.TimeoutException;
10 |
11 | import static org.junit.jupiter.api.Assertions.fail;
12 |
13 | /**
14 | * Exercises {@link Pauser#pause(long, java.util.concurrent.TimeUnit)} with a
15 | * timeout across several implementations. Pausers that support the timeout
16 | * contract are called repeatedly until half the period has passed without a
17 | * {@link TimeoutException}. After the interval expires, the next call must
18 | * throw a {@link TimeoutException}. Pausers that do not implement this
19 | * behaviour are expected to throw {@link UnsupportedOperationException} when a
20 | * timeout is supplied.
21 | */
22 | class PauserTimeoutTest extends ThreadsTestCommon {
23 | private Pauser[] pausersSupportTimeout = {
24 | Pauser.balanced(),
25 | Pauser.sleepy(),
26 | new BusyTimedPauser(),
27 | new YieldingPauser(0),
28 | new LongPauser(0, 0, 1, 10, TimeUnit.MILLISECONDS),
29 | // new MilliPauser(1)
30 | };
31 | private Pauser[] pausersDontSupportTimeout = {
32 | BusyPauser.INSTANCE};
33 |
34 | /**
35 | * Confirms that pausers honour the timeout parameter. Each pauser is
36 | * called in a loop until half the timeout has elapsed and should not throw.
37 | * Once the timeout has expired the next call must raise
38 | * {@link TimeoutException}.
39 | */
40 | @Test
41 | void pausersSupportTimeout() {
42 | int timeoutNS = 100_000_000;
43 | for (Pauser p : pausersSupportTimeout) {
44 | long start = System.nanoTime();
45 | do try {
46 | p.pause(timeoutNS, TimeUnit.NANOSECONDS);
47 | } catch (TimeoutException e) {
48 | fail(p + " timed out");
49 | } while (System.nanoTime() < start + timeoutNS / 2);
50 | while (System.nanoTime() < start + timeoutNS * 5 / 4) ;
51 | try {
52 | p.pause(timeoutNS, TimeUnit.NANOSECONDS);
53 | } catch (TimeoutException e) {
54 | continue;
55 | }
56 | fail(p + " did not timeoutNS");
57 | }
58 | }
59 |
60 | /**
61 | * Checks that pausers without timeout capability throw
62 | * {@link UnsupportedOperationException} when a timeout is supplied.
63 | */
64 | @Test
65 | void pausersDontSupportTimeout() throws TimeoutException {
66 | for (Pauser p : pausersDontSupportTimeout) {
67 | try {
68 | p.pause(100, TimeUnit.MILLISECONDS);
69 | } catch (UnsupportedOperationException e) {
70 | continue;
71 | }
72 | fail(p + " did not throw");
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/ThreadMonitorsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
7 | import org.junit.jupiter.api.Test;
8 |
9 | import java.util.ArrayList;
10 | import java.util.List;
11 | import java.util.concurrent.atomic.AtomicBoolean;
12 | import java.util.function.Consumer;
13 | import java.util.function.LongSupplier;
14 |
15 | import static org.junit.jupiter.api.Assertions.assertEquals;
16 | import static org.junit.jupiter.api.Assertions.assertFalse;
17 | import static org.junit.jupiter.api.Assertions.assertTrue;
18 |
19 | class ThreadMonitorsTest {
20 |
21 | @Test
22 | void forThreadLogsWhenEnabled() throws InvalidEventHandlerException {
23 | RecordingConsumer consumer = new RecordingConsumer();
24 | AtomicBoolean enabled = new AtomicBoolean(true);
25 | ThreadMonitor monitor = ThreadMonitors.forThread(
26 | "loop",
27 | 1_000_000L,
28 | new DeterministicLongSupplier(-5_000_000L, -5_000_000L),
29 | Thread::currentThread,
30 | enabled::get,
31 | consumer
32 | );
33 |
34 | boolean result = monitor.action();
35 |
36 | assertFalse(result);
37 | assertEquals(1, consumer.messages.size());
38 | assertTrue(consumer.messages.get(0).contains("loop"));
39 | }
40 |
41 | @Test
42 | void forThreadSkipsLoggingWhenDisabled() throws InvalidEventHandlerException {
43 | List messages = new ArrayList<>();
44 | AtomicBoolean enabled = new AtomicBoolean(false);
45 | ThreadMonitor monitor = ThreadMonitors.forThread(
46 | "loop",
47 | 1_000_000L,
48 | new DeterministicLongSupplier(-5_000_000L, -5_000_000L),
49 | Thread::currentThread,
50 | enabled::get,
51 | messages::add
52 | );
53 |
54 | boolean result = monitor.action();
55 |
56 | assertFalse(result);
57 | assertTrue(messages.isEmpty());
58 | }
59 |
60 | private static final class DeterministicLongSupplier implements LongSupplier {
61 | private final long[] values;
62 | private int index;
63 |
64 | DeterministicLongSupplier(long... values) {
65 | this.values = values;
66 | }
67 |
68 | @Override
69 | public long getAsLong() {
70 | if (index >= values.length) {
71 | return values[values.length - 1];
72 | }
73 | return values[index++];
74 | }
75 | }
76 |
77 | private static final class RecordingConsumer implements Consumer {
78 | private final List messages = new ArrayList<>();
79 |
80 | @Override
81 | public void accept(String message) {
82 | messages.add(message);
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/BusyPauser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 |
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.concurrent.TimeoutException;
10 |
11 | /**
12 | * Busy-spin implementation of {@link Pauser}.
13 | *
14 | * The pauser repeatedly invokes {@link Jvm#nanoPause()} and never yields or
15 | * sleeps. A thread using this pauser therefore consumes an entire CPU core
16 | * while waiting. No state is kept, so most lifecycle methods are no-ops.
17 | */
18 | public enum BusyPauser implements Pauser {
19 | INSTANCE;
20 |
21 | /**
22 | * Does nothing as {@code BusyPauser} does not maintain state that requires resetting.
23 | */
24 | @Override
25 | public void reset() {
26 | // Do nothing
27 | }
28 |
29 | /**
30 | * Performs a single busy-spin step by calling {@link Jvm#nanoPause()}.
31 | * The call neither yields nor sleeps and therefore burns CPU cycles.
32 | */
33 | @Override
34 | public void pause() {
35 | Jvm.nanoPause();
36 | }
37 |
38 | /**
39 | * Unsupported operation as this pauser is stateless.
40 | * Use {@link BusyTimedPauser} when a timeout is required.
41 | *
42 | * @param timeout timeout duration (ignored)
43 | * @param timeUnit unit of the timeout (ignored)
44 | * @throws TimeoutException never thrown
45 | */
46 | @Override
47 | public void pause(long timeout, TimeUnit timeUnit) throws TimeoutException {
48 | throw new UnsupportedOperationException(this + " is not stateful, use a " + BusyTimedPauser.class.getSimpleName());
49 | }
50 |
51 | /**
52 | * Does nothing as {@code BusyPauser} has no pausing state to unpause from.
53 | */
54 | @Override
55 | public void unpause() {
56 | // nothing to unpause.
57 | }
58 |
59 | /**
60 | * Always returns {@code 0} as {@code BusyPauser} does not track paused time.
61 | *
62 | * @return {@code 0} always
63 | */
64 | @Override
65 | public long timePaused() {
66 | return 0;
67 | }
68 |
69 | /**
70 | * Always returns {@code 0} as {@code BusyPauser} does not count pauses.
71 | *
72 | * @return {@code 0} always
73 | */
74 | @Override
75 | public long countPaused() {
76 | return 0;
77 | }
78 |
79 | /**
80 | * Always returns {@code true}, indicating that this pauser keeps the thread busy rather than truly pausing it.
81 | *
82 | * @return {@code true} always
83 | */
84 | @Override
85 | public boolean isBusy() {
86 | return true;
87 | }
88 |
89 | /**
90 | * Provides a string representation of this pauser, identifying it as "PauserMode.busy".
91 | *
92 | * @return the string "PauserMode.busy"
93 | */
94 | @Override
95 | public String toString() {
96 | return "PauserMode.busy";
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/Issue251Test.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import org.junit.jupiter.api.Test;
7 |
8 | import static org.junit.jupiter.api.Assertions.assertEquals;
9 |
10 | /**
11 | * Confirms the stable {@code toString} output for each built-in pauser.
12 | * Verifies the fix for issue {@code #251} where descriptions were inconsistent.
13 | */
14 | class Issue251Test {
15 | @Test
16 | void toString_timedBusyVariants() {
17 | assertEquals("PauserMode.timedBusy", new BusyTimedPauser().toString());
18 | assertEquals("PauserMode.timedBusy", PauserMode.timedBusy.get().toString());
19 | assertEquals("PauserMode.timedBusy", Pauser.timedBusy().toString());
20 | }
21 |
22 | @Test
23 | void toString_busyVariants() {
24 | assertEquals("PauserMode.busy", BusyPauser.INSTANCE.toString());
25 | assertEquals("PauserMode.busy", PauserMode.busy.get().toString());
26 | assertEquals("PauserMode.busy", Pauser.busy().toString());
27 | }
28 |
29 | @Test
30 | void toString_balancedFromMode() {
31 | assertEquals("PauserMode.balanced", PauserMode.balanced.get().toString());
32 | }
33 |
34 | @Test
35 | void toString_balanced() {
36 | assertEquals("PauserMode.balanced", Pauser.balanced().toString());
37 | }
38 |
39 | @Test
40 | void toString_millis3ms() {
41 | assertEquals("Pauser.millis(3)", Pauser.millis(3).toString());
42 | }
43 |
44 | @Test
45 | void toString_milli1and10() {
46 | assertEquals("Pauser.milli(1, 10)", Pauser.millis(1, 10).toString());
47 | }
48 |
49 | @Test
50 | void toString_balanced2ms() {
51 | assertEquals("Pauser.balancedUpToMillis(2)", Pauser.balancedUpToMillis(2).toString());
52 | }
53 |
54 | @Test
55 | void toString_yieldingNoParams() {
56 | assertEquals("PauserMode.yielding", Pauser.yielding().toString());
57 | }
58 |
59 | @Test
60 | void toString_yieldingMinBusy3() {
61 | assertEquals("YieldingPauser{minBusy=3}", Pauser.yielding(3).toString());
62 | }
63 |
64 | @Test
65 | void toString_milliMode() {
66 | assertEquals("PauserMode.milli", PauserMode.milli.get().toString());
67 | }
68 |
69 | @Test
70 | void toString_yieldingMode() {
71 | assertEquals("PauserMode.yielding", PauserMode.yielding.get().toString());
72 | }
73 |
74 | @Test
75 | void toString_sleepyMode() {
76 | assertEquals("PauserMode.sleepy", PauserMode.sleepy.get().toString());
77 | }
78 |
79 | @Test
80 | void toString_yieldingMinBusy7() {
81 | assertEquals("YieldingPauser{minBusy=7}", new YieldingPauser(7).toString());
82 | }
83 |
84 | @Test
85 | void toString_yieldingMinBusy1() {
86 | assertEquals("YieldingPauser{minBusy=1}", new YieldingPauser(1).toString());
87 | }
88 |
89 | @Test
90 | void toString_millis7ms() {
91 | assertEquals("Pauser.millis(7)", new MilliPauser(7).toString());
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/internal/EventLoopThreadHolder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.threads.CoreEventLoop;
8 | import net.openhft.chronicle.threads.ThreadHolder;
9 | /**
10 | * {@link ThreadHolder} implementation used to monitor a single event loop
11 | * thread. It keeps track of how long the loop has been running and requests a
12 | * dump of the loop's state when the thread appears to have blocked for longer
13 | * than the configured monitoring interval. Each subsequent dump is spaced
14 | * further apart to reduce log volume while the loop remains stuck.
15 | */
16 |
17 | public class EventLoopThreadHolder implements ThreadHolder {
18 | private final CoreEventLoop eventLoop;
19 | private final long monitorIntervalNS;
20 | // additional time added to the next logging threshold
21 | private long intervalToAddNS;
22 | // nanoseconds before the next thread dump is logged
23 | private long printBlockTimeNS;
24 |
25 | public EventLoopThreadHolder(long monitorIntervalNS, CoreEventLoop eventLoop) {
26 | this.monitorIntervalNS = intervalToAddNS = printBlockTimeNS = monitorIntervalNS;
27 | this.eventLoop = eventLoop;
28 | }
29 |
30 | @Override
31 | public boolean isAlive() {
32 | return eventLoop.isAlive();
33 | }
34 |
35 | @Override
36 | public void reportFinished() {
37 | Jvm.warn().on(getClass(), "Monitoring a task which has finished " + eventLoop);
38 | }
39 |
40 | @Override
41 | public long startedNS() {
42 | return eventLoop.loopStartNS();
43 | }
44 |
45 | @Override
46 | public void resetTimers() {
47 | intervalToAddNS =
48 | printBlockTimeNS = monitorIntervalNS;
49 | }
50 |
51 | @Override
52 | public boolean shouldLog(long nowNS) {
53 | long blockingTimeNS = nowNS - startedNS();
54 | return blockingTimeNS >= printBlockTimeNS;
55 | }
56 |
57 | @Override
58 | public void dumpThread(long startedNS, long nowNS) {
59 | long blockingTimeNS = nowNS - startedNS;
60 | double blockingTimeMS = blockingTimeNS / 100_000 / 10.0;
61 | if (blockingTimeMS <= 0.0)
62 | return;
63 | eventLoop.dumpRunningState(eventLoop.name() + " thread has blocked for "
64 | + blockingTimeMS + " ms.",
65 | // check we are still in the loop.
66 | () -> eventLoop.loopStartNS() == startedNS);
67 |
68 | printBlockTimeNS += intervalToAddNS;
69 | intervalToAddNS = (long) Math.min(1.41d * intervalToAddNS, 20d * monitorIntervalNS);
70 | }
71 |
72 | @Override
73 | public long timingToleranceNS() {
74 | return monitorIntervalNS + timingErrorNS();
75 | }
76 |
77 | protected long timingErrorNS() {
78 | return TIMING_ERROR;
79 | }
80 |
81 | @Override
82 | public String getName() {
83 | return eventLoop.name();
84 | }
85 |
86 | @Override
87 | public void monitorThreadDelayed(long actionCallDelayNS) {
88 | // report it??
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/LongPauserTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import org.junit.jupiter.api.Test;
8 |
9 | import java.util.concurrent.CountDownLatch;
10 | import java.util.concurrent.TimeUnit;
11 |
12 | import static org.junit.jupiter.api.Assertions.*;
13 |
14 | /**
15 | * Tests the pausing behaviour of {@link LongPauser}.
16 | *
17 | *
These tests ensure that:
18 | *
19 | *
{@link LongPauser#unpause()} releases a thread blocked in
20 | * {@link LongPauser#pause()} promptly.
21 | *
{@link LongPauser#asyncPause()} waits for roughly the configured
22 | * duration before clearing.
23 | *
{@link LongPauser#reset()} cancels any pending asynchronous
24 | * pause.
25 | *
26 | */
27 | class LongPauserTest extends ThreadsTestCommon {
28 |
29 | @Test
30 | void unpauseStopsPausing() throws InterruptedException {
31 | final int pauseMillis = 1_000;
32 | final LongPauser pauser = new LongPauser(0, 0, pauseMillis, pauseMillis, TimeUnit.MILLISECONDS);
33 | final CountDownLatch started = new CountDownLatch(1);
34 | Thread thread = new Thread(() -> {
35 | started.countDown();
36 | pauser.pause();
37 | });
38 | thread.start();
39 | started.await(50, TimeUnit.MILLISECONDS);
40 | Jvm.pause(10); // give the thread some time to park
41 | pauser.unpause();
42 | final long startNs = System.nanoTime();
43 | thread.join();
44 | final long timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs);
45 | assertTrue(timeTakenMs < pauseMillis / 5, "Took " + timeTakenMs + " to stop");
46 | }
47 |
48 | @Test
49 | void testLongAsyncPauser() {
50 | final LongPauser pauser = new LongPauser(0, 0, 1, 1, TimeUnit.MILLISECONDS);
51 | boolean failedOnce = false;
52 | for (int i = 0; i < 100; i++) {
53 | try {
54 | pauser.asyncPause();
55 | testUntilUnpaused(pauser, 1, TimeUnit.MILLISECONDS);
56 | pauser.reset();
57 | testUntilUnpaused(pauser, 0, TimeUnit.MILLISECONDS);
58 | } catch (AssertionError e) {
59 | if (failedOnce)
60 | throw e;
61 | failedOnce = true;
62 | }
63 | }
64 | }
65 |
66 | @Test
67 | void asyncPauseIsResetOnReset() {
68 | final LongPauser longPauser = new LongPauser(0, 0, 1, 1, TimeUnit.SECONDS);
69 | longPauser.asyncPause();
70 | assertTrue(longPauser.asyncPausing());
71 | longPauser.reset();
72 | assertFalse(longPauser.asyncPausing());
73 | }
74 |
75 | private static void testUntilUnpaused(LongPauser pauser, int n, TimeUnit timeUnit) {
76 | long timeNS = timeUnit.convert(n, TimeUnit.NANOSECONDS);
77 | long start = System.nanoTime();
78 | while (pauser.asyncPausing()) {
79 | if (System.nanoTime() > start + timeNS + 100_000_000)
80 | fail();
81 | }
82 | long time = System.nanoTime() - start;
83 | final int delta = 11_000_000;
84 | assertEquals(timeNS + delta, time, delta);
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/DiskSpaceMonitorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.onoes.ExceptionKey;
8 | import net.openhft.chronicle.core.time.SetTimeProvider;
9 | import org.junit.jupiter.api.BeforeEach;
10 | import org.junit.jupiter.api.Test;
11 | import org.junit.jupiter.api.AfterEach;
12 |
13 | import java.io.File;
14 | import java.time.Duration;
15 | import java.util.Map;
16 |
17 | import static org.junit.jupiter.api.Assertions.assertEquals;
18 | import static org.junit.jupiter.api.Assumptions.assumeTrue;
19 |
20 | class DiskSpaceMonitorTest extends ThreadsTestCommon {
21 |
22 | @BeforeEach
23 | void beforeEach(){
24 | clearState();
25 | }
26 |
27 | @AfterEach
28 | void afterEach(){
29 | clearState();
30 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(5);
31 | }
32 |
33 | private void clearState() {
34 | DiskSpaceMonitor.INSTANCE.clear();
35 | }
36 |
37 | /**
38 | * Exercises disk monitoring when the threshold is raised from zero to 100 per cent.
39 | * Exceptions are recorded and disk space is polled repeatedly to verify that
40 | * roughly five warnings are reported. The test is skipped on Arm hardware.
41 | */
42 | @Test
43 | void pollDiskSpace() {
44 | // todo investigate why this fails on arm
45 | assumeTrue(!Jvm.isArm());
46 | Map map = Jvm.recordExceptions();
47 | assertEquals(5, DiskSpaceMonitor.INSTANCE.getThresholdPercentage());
48 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(100);
49 | for (int i = 0; i < 51; i++) {
50 | DiskSpaceMonitor.INSTANCE.pollDiskSpace(new File("."));
51 | Jvm.pause(100);
52 | }
53 | DiskSpaceMonitor.INSTANCE.clear();
54 | map.entrySet().forEach(System.out::println);
55 | long count = map.entrySet()
56 | .stream()
57 | .filter(e -> e.getKey().clazz() == DiskSpaceMonitor.class)
58 | .mapToInt(Map.Entry::getValue)
59 | .sum();
60 | Jvm.resetExceptionHandlers();
61 | System.out.println("Disk space warnings/errors: " + count);
62 | // look for 5 disk space checks and some debug messages about slow disk checks.
63 | assertEquals(5.5, count, 1.5);
64 | }
65 |
66 | /**
67 | * This test was created to verify that the core monitoring loop actually runs more than once. It used to run once
68 | * and then never again. This test explicitly changes the threshold after the first run has happened to ensure that
69 | * a failure occurs on a subsequent run.
70 | */
71 | @Test
72 | void ensureThatDiskSpaceMonitorRunsForMoreThanOneIteration() throws InterruptedException {
73 | SetTimeProvider timeProvider = new SetTimeProvider();
74 | ignoreException("warning: the JVM may crash if it undertakes an operation with a memory-mapped file and the disk is out of space");
75 | DiskSpaceMonitor.INSTANCE.pollDiskSpace(new File("."));
76 | timeProvider.advanceMillis(1200);
77 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(100);
78 | timeProvider.advanceMillis(Duration.ofHours(24).toMillis());
79 | Thread.sleep(1000);
80 | }
81 |
82 | }
83 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/example/SingleAndMultiThreadedExample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.example;
5 |
6 | import net.openhft.chronicle.core.threads.EventLoop;
7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
8 | import net.openhft.chronicle.threads.MediumEventLoop;
9 | import net.openhft.chronicle.threads.Pauser;
10 |
11 | import java.util.concurrent.CountDownLatch;
12 | import java.util.concurrent.ExecutionException;
13 | import java.util.concurrent.ExecutorService;
14 | import java.util.concurrent.Future;
15 | import java.util.concurrent.atomic.AtomicLong;
16 |
17 | import static java.util.concurrent.Executors.newCachedThreadPool;
18 |
19 | /**
20 | * An example that was used in a DZone article
21 | */
22 | public class SingleAndMultiThreadedExample {
23 |
24 | private AtomicLong multiThreadedValue = new AtomicLong();
25 | private long singleThreadedValue;
26 |
27 | /**
28 | * The two examples in this code do the same thing, they both increment a shared counter from 0 to 500
29 | * one is written using java threads and the other uses the Chronicle Event Loop.
30 | */
31 | public static void main(String[] args) throws ExecutionException, InterruptedException {
32 | SingleAndMultiThreadedExample example = new SingleAndMultiThreadedExample();
33 |
34 | // runs using java Executor - outputs 500
35 | example.multiThreadedExample();
36 |
37 | // using the chronicle event loop
38 | example.eventLoopExample();
39 |
40 | }
41 |
42 | private Void addOneHundred() {
43 | for (int i = 0; i < 100; i++) {
44 | multiThreadedValue.incrementAndGet();
45 | }
46 | return null;
47 | }
48 |
49 | private void multiThreadedExample() throws ExecutionException, InterruptedException {
50 |
51 | // example using Java Threads
52 | final ExecutorService executorService = newCachedThreadPool();
53 | Future> f1 = executorService.submit(this::addOneHundred);
54 | Future> f2 = executorService.submit(this::addOneHundred);
55 | Future> f3 = executorService.submit(this::addOneHundred);
56 | Future> f4 = executorService.submit(this::addOneHundred);
57 | Future> f5 = executorService.submit(this::addOneHundred);
58 |
59 | f1.get();
60 | f2.get();
61 | f3.get();
62 | f4.get();
63 | f5.get();
64 | System.out.println("multiThreadedValue=" + multiThreadedValue);
65 | }
66 |
67 | private void eventLoopExample() throws InterruptedException {
68 | final EventLoop eventLoop = new MediumEventLoop(null, "test", Pauser.balanced(), false, "none");
69 | eventLoop.start();
70 | CountDownLatch finished = new CountDownLatch(1);
71 | eventLoop.addHandler(() -> {
72 |
73 | singleThreadedValue++;
74 | // we throw this to un-register the event loop
75 |
76 | if (singleThreadedValue == 500) {
77 | finished.countDown();
78 | throw new InvalidEventHandlerException("finished");
79 | }
80 |
81 | // return false if you don't want to be called back for a while
82 | return true;
83 | });
84 |
85 | finished.await();
86 | System.out.println("eventLoopExample=" + singleThreadedValue);
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/BusyTimedPauser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 |
8 | import java.util.concurrent.TimeUnit;
9 | import java.util.concurrent.TimeoutException;
10 |
11 | /**
12 | * Busy-spin pauser that also implements {@link TimingPauser}.
13 | *
14 | * Like {@link BusyPauser} it never yields or sleeps, so it occupies a CPU core
15 | * while waiting. In addition it tracks elapsed busy-spin time and can throw a
16 | * {@link TimeoutException} when a configured timeout is exceeded.
17 | */
18 | public class BusyTimedPauser implements Pauser, TimingPauser {
19 |
20 | private long time = Long.MAX_VALUE;
21 | private long countPaused = 0;
22 |
23 | /**
24 | * Always returns {@code true}, indicating that this pauser predominantly keeps the thread busy.
25 | *
26 | * @return {@code true}, as the primary operation is a busy wait
27 | */
28 | @Override
29 | public boolean isBusy() {
30 | return true;
31 | }
32 |
33 | /**
34 | * Clears any timeout state so the next timed pause starts afresh.
35 | */
36 | @Override
37 | public void reset() {
38 | time = Long.MAX_VALUE;
39 | }
40 |
41 | /**
42 | * Busy-spins once and increments the pause count.
43 | * No yielding or sleeping occurs.
44 | */
45 | @Override
46 | public void pause() {
47 | countPaused++;
48 | Jvm.nanoPause();
49 | }
50 |
51 | /**
52 | * Busy-spins until the accumulated pause time exceeds the supplied timeout.
53 | * The timer starts with the first call after {@link #reset()}.
54 | *
55 | * @param timeout maximum time to spin before throwing an exception
56 | * @param timeUnit unit for {@code timeout}
57 | * @throws TimeoutException if the time since the first call exceeds the timeout
58 | */
59 | @Override
60 | public void pause(long timeout, TimeUnit timeUnit) throws TimeoutException {
61 | if (time == Long.MAX_VALUE)
62 | time = System.nanoTime();
63 | if (System.nanoTime() - time > timeUnit.toNanos(timeout))
64 | throw new TimeoutException("Pause timed out after " + timeout + " " + timeUnit);
65 | pause();
66 | }
67 |
68 | /**
69 | * Does nothing since this implementation has no state to unpause from. The method exists to fulfill the interface contract.
70 | */
71 | @Override
72 | public void unpause() {
73 | // nothing to unpause.
74 | }
75 |
76 | /**
77 | * Always returns {@code 0} as this pauser does not actually track total pause time.
78 | *
79 | * @return {@code 0}, indicating no measurable pause duration
80 | */
81 | @Override
82 | public long timePaused() {
83 | return 0;
84 | }
85 |
86 | /**
87 | * Returns the count of how many times the {@code pause()} method has been called.
88 | *
89 | * @return the number of pauses that have been initiated
90 | */
91 | @Override
92 | public long countPaused() {
93 | return countPaused;
94 | }
95 |
96 | /**
97 | * Provides a string representation for this pauser, identifying it as "PauserMode.timedBusy".
98 | *
99 | * @return a string indicating the type of pauser
100 | */
101 | @Override
102 | public String toString() {
103 | return "PauserMode.timedBusy";
104 | }
105 | }
106 |
107 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/internal/ThreadMonitorHarness.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
7 | import net.openhft.chronicle.threads.ThreadHolder;
8 | import net.openhft.chronicle.threads.ThreadMonitor;
9 |
10 | import java.util.function.LongSupplier;
11 |
12 | import static net.openhft.chronicle.threads.CoreEventLoop.NOT_IN_A_LOOP;
13 |
14 | /**
15 | * Monitoring harness that drives a {@link ThreadHolder} via the
16 | * {@link ThreadMonitor} interface. The harness delegates all
17 | * monitoring actions to the wrapped holder and records the last time
18 | * an action was run.
19 | */
20 | public class ThreadMonitorHarness implements ThreadMonitor {
21 | private final ThreadHolder thread;
22 | private final LongSupplier timeSupplier;
23 | private long lastActionCall = Long.MAX_VALUE;
24 | private long lastStartedNS = NOT_IN_A_LOOP;
25 |
26 | /**
27 | * Creates a harness that reports on the supplied holder using the given
28 | * time supplier.
29 | *
30 | * @param thread holder describing the monitored thread
31 | * @param timeSupplier provider of the current time in nanoseconds
32 | */
33 | public ThreadMonitorHarness(ThreadHolder thread, LongSupplier timeSupplier) {
34 | this.thread = thread;
35 | this.timeSupplier = timeSupplier;
36 | }
37 |
38 | /**
39 | * Creates a harness using {@link System#nanoTime()} as the time provider.
40 | *
41 | * @param thread holder describing the monitored thread
42 | */
43 | public ThreadMonitorHarness(ThreadHolder thread) {
44 | this(thread, System::nanoTime);
45 | }
46 |
47 | /**
48 | * Called periodically to check the state of the wrapped thread.
49 | * Throws {@link InvalidEventHandlerException} if the thread has
50 | * finished. If a delay greater than the tolerance is observed the
51 | * holder is notified and {@code true} is returned.
52 | *
53 | * @return {@code true} when the holder reports a delay
54 | * @throws InvalidEventHandlerException if the thread is no longer alive
55 | */
56 | @Override
57 | public boolean action() throws InvalidEventHandlerException {
58 | if (!thread.isAlive()) {
59 | thread.reportFinished();
60 | throw new InvalidEventHandlerException();
61 | }
62 | long startedNS = thread.startedNS();
63 | long nowNS = timeSupplier.getAsLong();
64 |
65 | // Record lastActionCall time on every call to prevent false-positive "monitorThreadDelayed" reports
66 | long actionCallDelay = nowNS - this.lastActionCall;
67 | this.lastActionCall = nowNS;
68 |
69 | if (startedNS == 0 || startedNS == NOT_IN_A_LOOP) {
70 | return false;
71 | }
72 | if (startedNS != lastStartedNS) {
73 | thread.resetTimers();
74 | lastStartedNS = startedNS;
75 | }
76 | if (actionCallDelay > thread.timingToleranceNS()) {
77 | if (thread.isAlive())
78 | thread.monitorThreadDelayed(actionCallDelay);
79 | return true;
80 | }
81 | if (!thread.shouldLog(nowNS))
82 | return false;
83 | thread.dumpThread(startedNS, nowNS);
84 | return false; // true assumes we are about to need to check again.
85 | }
86 |
87 | @Override
88 | public String toString() {
89 | return "ThreadMonitorHarness<" + thread.getName() + ">";
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/docs/thread-thread-safety-guide.adoc:
--------------------------------------------------------------------------------
1 | = Chronicle Threads Thread-Safety Guide
2 | :toc:
3 | :sectnums:
4 | :lang: en-GB
5 |
6 | == Scope
7 |
8 | This guide explains how Chronicle Threads enforces single-threaded handler execution and how developers should structure code that interacts with event loops.
9 | It expands on requirements THR-FN-006 through THR-NF-O-009 and aligns with Chronicle Core's `SingleThreadedChecked` utilities.
10 |
11 | == Event Loop Ownership Model
12 |
13 | * Each `EventLoop` runs on a dedicated Java platform thread; handlers registered on that loop must not share mutable state with other threads without explicit synchronisation (THR-FN-006).
14 | * When a handler needs to hand over work to another thread (e.g., a blocking worker), use thread-safe queues or Chronicle Queue to transfer data without violating loop confinement.
15 | * The boolean result of `action()` should reflect whether more immediate work is available; returning `true` repeatedly for idle handlers forces tight scheduling and increases contention for other handlers (THR-FN-007).
16 |
17 | == Safe Hand-off Patterns
18 |
19 | Initialise :: Construct handlers and supporting resources on the main thread, then call `singleThreadedCheckReset()` before registering them with the target loop.
20 |
21 | Operate :: Once registered, treat handler state as confined to the loop's thread.
22 | All mutations should occur inside `action()` or helper methods invoked from that loop.
23 |
24 | Dispose :: Use `InvalidEventHandlerException.reusable()` to self-deregister when the handler has completed its lifecycle (THR-FN-008).
25 | Ensure downstream resources honour Chronicle Core's `Closeable` and `ReferenceCounted` contracts.
26 |
27 | == Interaction with Shared Services
28 |
29 | * Shared caches or maps must expose lock-free APIs that are safe for single-writer, multi-reader scenarios, or provide appropriate synchronisation.
30 | * When invoking Chronicle Queue appenders or tailers from handlers, rely on their single-threaded guarantees and avoid sharing instances across loops without resetting ownership.
31 | * If a handler must update shared analytics or metrics collectors, prefer non-blocking data structures (e.g., `LongAdder`) to minimise stall risk.
32 |
33 | == Error Handling Discipline
34 |
35 | Unchecked exceptions ::
36 | * The loop removes the offending handler and logs via `Jvm.warn()`; implement catch-and-report patterns where recovery is possible (THR-NF-O-009).
37 |
38 | Timeouts ::
39 | * Use monitor-loop thresholds to detect blocked handlers early (THR-NF-O-018).
40 | Handlers can emit domain-specific heartbeats to aid diagnosis.
41 |
42 | Defensive coding ::
43 | * Validate external inputs before entering tight loops to avoid unbounded CPU usage.
44 | * Leverage Chronicle Core's `SingleThreadedChecked` exceptions during testing to catch accidental cross-thread access.
45 |
46 | == Testing Strategies
47 |
48 | * Run unit tests with assertions enabled to surface `SingleThreadedChecked` violations.
49 | * Use deterministic executors in integration tests to simulate loop progression and ensure handlers remain idempotent.
50 | * Incorporate concurrency stress tests that replay boundary scenarios (e.g., handler self-deregistration while monitor loop samples metrics).
51 |
52 | == Documentation and Traceability
53 |
54 | * Annotate handler classes with the relevant requirement IDs (e.g., `THR-FN-006`) in code comments or design docs to aid reviews.
55 | * Update operational run-books to describe ownership expectations and hand-off procedures.
56 | * Ensure new handlers ship with accompanying tests that prove thread-safety assumptions, referencing requirement IDs in test names where practical.
57 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/NamedThreadFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.StackTrace;
8 | import net.openhft.chronicle.core.threads.CleaningThread;
9 | import net.openhft.chronicle.core.threads.ThreadDump;
10 | import org.jetbrains.annotations.NotNull;
11 |
12 | import java.util.concurrent.ThreadFactory;
13 | import java.util.concurrent.atomic.AtomicInteger;
14 |
15 | /**
16 | * Creates named threads within a dedicated group. The first thread is named
17 | * {@code groupName} and each subsequent one is suffixed with {@code -n} where
18 | * {@code n} increments from one. Every thread is a {@link CleaningThread} so
19 | * that thread-local resources are cleared when it terminates.
20 | */
21 | public class NamedThreadFactory extends ThreadGroup implements ThreadFactory {
22 | private final AtomicInteger id = new AtomicInteger();
23 | private final String nameShadow;
24 | private final Boolean daemonShadow;
25 | private final Integer priority;
26 | private final StackTrace createdHere;
27 | private final boolean inEventLoop;
28 |
29 | public NamedThreadFactory(String name) {
30 | this(name, null, null);
31 | }
32 |
33 | public NamedThreadFactory(String name, Boolean daemon) {
34 | this(name, daemon, null);
35 | }
36 |
37 | public NamedThreadFactory(String name, Boolean daemon, Integer priority) {
38 | this(name, daemon, priority, false);
39 | }
40 |
41 | /**
42 | * Constructs a factory with the supplied options.
43 | *
44 | * @param name prefix used for the thread group and thread names
45 | * @param daemon set to {@code true} if created threads should be daemons
46 | * @param priority priority to assign or {@code null} for the JVM default
47 | * @param inEventLoop mark threads as part of an event loop for monitoring
48 | */
49 | public NamedThreadFactory(String name, Boolean daemon, Integer priority, boolean inEventLoop) {
50 | super(name);
51 | this.nameShadow = name;
52 | this.daemonShadow = daemon;
53 | this.priority = priority;
54 | this.inEventLoop = inEventLoop;
55 | createdHere = Jvm.isResourceTracing() ? new StackTrace("NamedThreadFactory created here") : null;
56 | }
57 |
58 | /**
59 | * Returns a new {@link CleaningThread} executing the given task. The
60 | * thread name is formed by {@link Threads#threadGroupPrefix()} followed by
61 | * the factory name. Subsequent threads append {@code -n} where {@code n}
62 | * is an incrementing number.
63 | */
64 | @Override
65 | @NotNull
66 | public Thread newThread(@NotNull Runnable r) {
67 | final int idSnapshot = this.id.getAndIncrement();
68 | final String nameN = Threads.threadGroupPrefix() + (idSnapshot == 0 ? this.nameShadow : (this.nameShadow + '-' + idSnapshot));
69 | Thread t = new CleaningThread(r, nameN, inEventLoop);
70 | ThreadDump.add(t, createdHere);
71 | if (daemonShadow != null)
72 | t.setDaemon(daemonShadow);
73 | if (priority != null)
74 | t.setPriority(priority);
75 | return t;
76 | }
77 |
78 | /**
79 | * Interrupts every thread currently in this group. Threads that have
80 | * already finished are ignored.
81 | */
82 | public void interruptAll() {
83 | Thread[] list = new Thread[activeCount() + 1];
84 | super.enumerate(list);
85 | for (Thread thread : list) {
86 | if (thread != null)
87 | thread.interrupt();
88 | }
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/docs/images/source/image1.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
45 |
--------------------------------------------------------------------------------
/src/main/docs/thread-performance-targets.adoc:
--------------------------------------------------------------------------------
1 | = Chronicle Threads Performance Targets
2 | :toc:
3 | :sectnums:
4 | :lang: en-GB
5 |
6 | == Scope
7 |
8 | This document enumerates the latency, jitter, throughput, and allocation targets for Chronicle Threads and describes how teams must measure and report them.
9 | It elaborates the non-functional requirements captured in `project-requirements.adoc` (THR-NF-P-014 through THR-NF-P-031).
10 |
11 | == Reference Hardware Profile
12 |
13 | Baseline ::
14 | * Dual-socket x86_64 server, 3.2 GHz or faster, Turbo disabled.
15 | * 64 GiB RAM, uniform memory access within a socket.
16 | * Linux kernel 5.15 or newer with `isolcpus`, `nohz_full`, and `rcu_nocbs` tuned for fast cores.
17 | * OpenJDK 21 LTS, G1 GC, `-XX:+UseNUMA`, `-XX:+AlwaysPreTouch`.
18 |
19 | Variations ::
20 | * ARM64 hosts must document deviations from the x86 baseline and retune thresholds accordingly.
21 | * Virtualised environments require an additional jitter budget that is recorded alongside benchmark artefacts.
22 |
23 | == Target Matrix
24 |
25 | [cols="2,3,3",options="header"]
26 | |===
27 | |Requirement |Target |Measurement Notes
28 | |THR-NF-P-027 (Latency) |<= 10 microseconds at 99.99 percentile for single-hop handler runs |Profiling harness schedules 10 million iterations with a busy pauser and isolated core.
29 | |THR-NF-P-028 (Jitter) |<= 2 microseconds peak-to-peak jitter under steady load |Continuous histogram per handler, sampled via monitor loop over 15 minute windows.
30 | |THR-NF-P-029 (Throughput) |>= 5 million 64-byte events per second on a fast loop |Benchmark harness dispatches fixed-size payloads, recording sustained processing rate.
31 | |THR-NF-P-030 (Heap Allocation) |<= 0.1 Bytes per event averaged across handlers |Java Flight Recorder or allocation profiler attached during workload replay.
32 | |THR-NF-P-014 (Pauser Hot Path) |0 allocations in `Pauser.pause()` / `reset()` |Unit tests instrumented with allocation counters; CI gate fails on non-zero heap activity.
33 | |THR-NF-P-031 (CPU Utilisation) |Loop CPU utilisation tracks input rate; idle loops drop below 5 percent |Derived from pauser metrics (`timePaused`, `countPaused`); reported via telemetry dashboards.
34 | |===
35 |
36 | == Measurement Methodology
37 |
38 | Workload Selection ::
39 | * Use representative handlers (queue tailer, order matching micro-benchmark, timed maintenance task).
40 | * Include at least one blocking handler routed to the `BLOCKING` priority to validate segregation.
41 |
42 | Warm-up ::
43 | * Discard initial 30 seconds to allow JIT compilation and cache priming.
44 | * Verify monitor-loop metrics stabilise before collecting results.
45 |
46 | Sampling ::
47 | * Persist HDR histograms for latency and jitter with 2 decimal microsecond precision.
48 | * Capture CPU affinity maps and pauser states alongside results to prove configuration fidelity.
49 |
50 | Repeatability ::
51 | * Run each scenario three times; publish mean and worst-case metrics.
52 | * Store benchmark artefacts in build pipelines so regressions can be bisected.
53 |
54 | == Instrumentation Guidelines
55 |
56 | * Enable loop-block monitor logging at WARN to capture threshold breaches (THR-NF-O-019).
57 | * Attach `PauserMonitorFactory` exporters to push pause counts and durations into time-series storage.
58 | * Tag benchmark runs with Git commit, JVM build, and operating system version for traceability.
59 |
60 | == Regression Gates
61 |
62 | * CI pipelines must reject changes that exceed any target by more than 5 percent unless accompanied by an approved waiver referencing the relevant requirement ID.
63 | * Nightly builds execute an extended soak (minimum 8 hours) to surface low-frequency jitter outliers; findings feed into operational run-books.
64 |
65 | == Reporting
66 |
67 | * Summarise performance results in release notes with explicit references to the requirements satisfied (e.g., "Maintains THR-NF-P-027 latency target").
68 | * Archive raw benchmark logs and histograms for audit and future tuning.
69 | * When targets cannot be met on non-reference hardware, document compensating controls and adjustments to operational thresholds.
70 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/ThreadsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import org.junit.jupiter.api.Test;
8 |
9 | import java.util.concurrent.ExecutorService;
10 | import java.util.concurrent.Executors;
11 | import java.util.concurrent.atomic.AtomicBoolean;
12 |
13 | import static org.junit.jupiter.api.Assertions.assertEquals;
14 |
15 | class ThreadsTest extends ThreadsTestCommon {
16 |
17 | @Test
18 | void shouldDumpStackTracesForStuckDelegatedExecutors() {
19 | final AtomicBoolean running = new AtomicBoolean(true);
20 | final ExecutorService service = Executors.newSingleThreadExecutor(new NamedThreadFactory("non-daemon-test"));
21 | service.submit(() -> {
22 | while (running.get()) {
23 | Jvm.pause(10L);
24 | }
25 | });
26 |
27 | Threads.shutdown(service);
28 | running.set(false);
29 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$");
30 | expectException("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***");
31 | assertExceptionThrown("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***");
32 | }
33 |
34 | @Test
35 | void shouldDumpStackTracesForStuckDaemonDelegatedExecutors() {
36 | final AtomicBoolean running = new AtomicBoolean(true);
37 | final ExecutorService service = Executors.newSingleThreadExecutor(new NamedThreadFactory("daemon-test"));
38 | service.submit(() -> {
39 | while (running.get()) {
40 | Jvm.pause(10L);
41 | }
42 | });
43 |
44 | Threads.shutdownDaemon(service);
45 | running.set(false);
46 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$");
47 | expectException("**** THE main/daemon-test THREAD DID NOT SHUTDOWN ***");
48 | assertExceptionThrown("**** THE main/daemon-test THREAD DID NOT SHUTDOWN ***");
49 | }
50 |
51 | @Test
52 | void shouldDumpStackTracesForStuckNestedDelegatedExecutors() {
53 | final AtomicBoolean running = new AtomicBoolean(true);
54 | final ExecutorService service = Executors.unconfigurableExecutorService(
55 | Executors.unconfigurableExecutorService(
56 | Executors.unconfigurableExecutorService(
57 | Executors.newSingleThreadExecutor(new NamedThreadFactory("non-daemon-test"))
58 | )
59 | )
60 | );
61 | service.submit(() -> {
62 | while (running.get()) {
63 | Jvm.pause(10L);
64 | }
65 | });
66 |
67 | Threads.shutdown(service);
68 | running.set(false);
69 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$");
70 | expectException("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***");
71 | assertExceptionThrown("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***");
72 | }
73 |
74 | @Test
75 | void testRenderStackTrace() {
76 | StackTraceElement[] stackTrace = new StackTraceElement[]{
77 | new StackTraceElement("com.test.Something", "doSomething", "Something.java", 123),
78 | new StackTraceElement("com.test.SomethingElse", "doSomethingElse", "SomethingElse.java", 456),
79 | new StackTraceElement("com.test.SomethingElseAgain", "doSomethingElseAgain", "SomethingElseAgain.java", 789),
80 | };
81 | StringBuilder stringBuilder = new StringBuilder();
82 | Threads.renderStackTrace(stringBuilder, stackTrace);
83 | assertEquals(
84 | " com.test.Something.doSomething(Something.java:123)\n" +
85 | " com.test.SomethingElse.doSomethingElse(SomethingElse.java:456)\n" +
86 | " com.test.SomethingElseAgain.doSomethingElseAgain(SomethingElseAgain.java:789)\n",
87 | stringBuilder.toString());
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/PauserMode.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import java.util.function.Supplier;
7 |
8 | /**
9 | * Enumerates the built-in pausing strategies provided by {@link Pauser}.
10 | *
11 | *
{@code Pauser} implementations are not {@code enum}s and cannot easily be
12 | * referred to from configuration files. {@code PauserMode} gives each common
13 | * strategy a serialisable name so that YAML and similar configuration formats
14 | * can specify the desired pauser.
15 | *
16 | *
The README contains a table under the "PauserMode" section that summarises
17 | * the latency and CPU characteristics for each mode.
18 | */
19 | public enum PauserMode implements Supplier {
20 |
21 | /**
22 | * Busy waits for a short time before yielding and eventually sleeping.
23 | * Latency is moderate but CPU use is reduced compared to {@link #busy}.
24 | * Typical choice for event loops dealing with bursty traffic.
25 | * Can be monitored and does not need CPU isolation.
26 | */
27 | balanced {
28 | @Override
29 | public Pauser get() {
30 | return Pauser.balanced();
31 | }
32 | },
33 |
34 | /**
35 | * Continuously busy spins to minimise jitter and give the lowest latency.
36 | * Best used when a dedicated core is available.
37 | * Not monitorable and prefers CPU isolation.
38 | */
39 | busy {
40 | @Override
41 | public Pauser get() {
42 | return Pauser.busy();
43 | }
44 |
45 | @Override
46 | public boolean isolcpus() {
47 | return true;
48 | }
49 |
50 | @Override
51 | public boolean monitor() {
52 | return false;
53 | }
54 | },
55 |
56 | /**
57 | * Always sleeps for roughly one millisecond and never busy waits.
58 | * Latency can be around one millisecond but CPU usage is very low.
59 | * Useful for low priority polling where jitter is acceptable.
60 | */
61 | milli {
62 | @Override
63 | public Pauser get() {
64 | return Pauser.millis(1);
65 | }
66 | },
67 |
68 | /**
69 | * Less aggressive than {@link #balanced}; mainly sleeps to conserve CPU.
70 | * Offers high jitter and therefore suits background or diagnostic work.
71 | */
72 | sleepy {
73 | @Override
74 | public Pauser get() {
75 | return Pauser.sleepy();
76 | }
77 | },
78 |
79 | /**
80 | * Behaves like {@link #busy} but also supports timeout based pauses.
81 | * Maintains minimal jitter while allowing a time limit to be enforced.
82 | * Not monitorable and prefers CPU isolation.
83 | */
84 | timedBusy {
85 | @Override
86 | public Pauser get() {
87 | return Pauser.timedBusy();
88 | }
89 |
90 | @Override
91 | public boolean isolcpus() {
92 | return true;
93 | }
94 |
95 | @Override
96 | public boolean monitor() {
97 | return false;
98 | }
99 | },
100 | /**
101 | * Briefly busy spins then yields the CPU.
102 | * Latency is low and the pauser can be shared between threads.
103 | * Suitable when threads share CPUs but responsiveness is still important.
104 | */
105 | yielding {
106 | @Override
107 | public Pauser get() {
108 | return Pauser.yielding();
109 | }
110 | };
111 |
112 | /**
113 | * Indicates whether the provided {@link Pauser} is suitable for CPU isolation.
114 | *
115 | * @return {@code true} if CPU isolation is suitable, otherwise {@code false}
116 | */
117 | public boolean isolcpus() {
118 | return false;
119 | }
120 |
121 | /**
122 | * Indicates whether the provided {@link Pauser} can be monitored.
123 | *
124 | * @return {@code true} if the pauser can be monitored, otherwise {@code false}
125 | */
126 | public boolean monitor() {
127 | return true;
128 | }
129 | }
130 |
--------------------------------------------------------------------------------
/src/main/docs/thread-operational-controls.adoc:
--------------------------------------------------------------------------------
1 | = Chronicle Threads Operational Controls
2 | :toc:
3 | :sectnums:
4 | :lang: en-GB
5 |
6 | == CPU Isolation and Affinity Governance
7 |
8 | Why ::
9 | Latency-sensitive handlers rely on predictable scheduling and cache residency.
10 |
11 | Core controls ::
12 | * Reserve dedicated CPU cores for loops using busy pausers, aligning with documented recommendations (THR-DOC-016).
13 | * Validate runtime affinity strings against estate topology before deployment (THR-FN-015, THR-FN-017).
14 | * Record the chosen affinity mapping in run-books so support engineers can confirm compliance during incident response.
15 |
16 | Review hot-spots ::
17 | * K8s or container orchestrators that may reassign cores.
18 | * BIOS or hypervisor changes that alter NUMA layout.
19 | * Third-party tooling that repins threads (profilers, debuggers).
20 |
21 | == Loop-Block Monitoring and Alerting
22 |
23 | Why ::
24 | A stalled handler compromises all work on its loop and introduces systemic jitter.
25 |
26 | Core controls ::
27 | * Keep the monitor loop enabled in production to enforce execution thresholds (THR-NF-O-018, THR-NF-O-019).
28 | * Tune `loop.block.threshold.ns` and `MONITOR_INTERVAL_MS` via system properties to reflect acceptable tail latency (THR-OPS-023, THR-OPS-024).
29 | * Integrate `PauserMonitorFactory` outputs with telemetry collectors so SLO breaches surface quickly (THR-NF-O-021).
30 |
31 | Review hot-spots ::
32 | * Handlers that call out to external services.
33 | * Contended locks inside business logic.
34 | * JVM safepoint pauses observable as correlated spikes across all loops.
35 |
36 | == Startup, Shutdown, and Recovery
37 |
38 | Why ::
39 | Predictable lifecycle management prevents resource leaks and eases maintenance.
40 |
41 | Core controls ::
42 | * Configure shutdown hooks or explicit close ordering so loops stop gracefully and relinquish resources (THR-FN-002, THR-OPS-025).
43 | * Use builder precedence rules to override unsuitable host-wide defaults (THR-OPS-024).
44 | * Include loop topology and handler binding in operational documentation to guide failover drills.
45 |
46 | Review hot-spots ::
47 | * Mutable static state shared across handlers that survives restart.
48 | * Incomplete handler deregistration causing repeated warnings during shutdown.
49 | * JVM exit sequences where native resources must release before process termination.
50 |
51 | == Configuration Hygiene
52 |
53 | Why ::
54 | Misconfiguration can disable safety features or erode performance targets.
55 |
56 | Core controls ::
57 | * Maintain an allow-listed set of Chronicle Threads system properties and validate them in CI pipelines.
58 | * Version control default builder profiles for each environment (development, certification, production) and peer review changes.
59 | * Capture pauser and monitor settings in infrastructure-as-code artefacts to avoid snowflake deployments.
60 |
61 | Review hot-spots ::
62 | * Ad-hoc overrides applied via command-line flags.
63 | * Legacy scripts that pre-date the Nine-Box taxonomy and omit traceability IDs.
64 | * Environment variable templating that truncates affinity strings.
65 |
66 | == Telemetry and Observability
67 |
68 | Why ::
69 | Workload visibility enables tuning and rapid diagnosis.
70 |
71 | Core controls ::
72 | * Export pauser and loop-block metrics to the organisation-wide metrics pipeline (e.g., Prometheus, Graphite).
73 | * Correlate Chronicle Threads metrics with downstream components (Queues, Maps) to contextualise latency spikes.
74 | * Ensure monitor-loop warnings are promoted to actionable alerts rather than suppressed in logs.
75 |
76 | Review hot-spots ::
77 | * Handlers that bypass standard logging frameworks.
78 | * Log rotation policies that discard stack traces before investigation.
79 | * Telemetry exporters that share threads with latency-sensitive loops.
80 |
81 | == Change Management
82 |
83 | Why ::
84 | Threading behaviour influences end-to-end latency; uncontrolled change increases risk.
85 |
86 | Core controls ::
87 | * Pair configuration modifications with updated documentation and automated tests (THR-OPS-023).
88 | * Track requirement IDs (e.g., THR-NF-P-027) in change tickets so reviewers can verify continued compliance.
89 | * Simulate workload impact in a staging environment whenever pausers, affinities, or monitor thresholds change.
90 |
91 | Review hot-spots ::
92 | * Hot fixes applied directly to production nodes.
93 | * Divergent configuration between active-active sites.
94 | * Missing rollback plans for affinity or pauser adjustments.
95 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/StopVCloseTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.StackTrace;
8 | import net.openhft.chronicle.core.threads.EventHandler;
9 | import net.openhft.chronicle.core.threads.EventLoop;
10 | import net.openhft.chronicle.core.threads.HandlerPriority;
11 | import org.jetbrains.annotations.NotNull;
12 | import org.junit.jupiter.api.BeforeEach;
13 | import org.junit.jupiter.api.Test;
14 |
15 | import java.util.Collections;
16 | import java.util.EnumSet;
17 | import java.util.Set;
18 | import java.util.concurrent.BlockingQueue;
19 | import java.util.concurrent.LinkedBlockingQueue;
20 | import java.util.concurrent.TimeUnit;
21 | import java.util.concurrent.atomic.AtomicBoolean;
22 | import java.util.concurrent.atomic.AtomicReference;
23 | import java.util.concurrent.locks.LockSupport;
24 |
25 | import static org.junit.jupiter.api.Assertions.assertEquals;
26 | import static org.junit.jupiter.api.Assertions.assertTrue;
27 |
28 | /**
29 | * Demonstrates how stopping and closing an {@link EventLoop} differ.
30 | *
31 | *
Stopping allows existing handlers to finish so the loop may be started
32 | * again. Closing interrupts blocking work and frees the loop's resources.
33 | */
34 | public class StopVCloseTest extends ThreadsTestCommon {
35 |
36 | @BeforeEach
37 | void handlersInit() {
38 | ignoreException("Monitoring a task which has finished ");
39 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = 1;
40 | }
41 |
42 | @Override
43 | public void preAfter() {
44 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = 10_000;
45 | }
46 |
47 | @Test
48 | void eventGroupStop() {
49 | final EnumSet allPriorities = EnumSet.allOf(HandlerPriority.class);
50 | try (final EventLoop eventGroup = EventGroup.builder()
51 | .withConcurrentThreadsNum(1)
52 | .withPriorities(allPriorities)
53 | .build()) {
54 | eventGroup.start();
55 |
56 | Set started = Collections.synchronizedSet(EnumSet.noneOf(HandlerPriority.class));
57 | Set stopped = Collections.synchronizedSet(EnumSet.noneOf(HandlerPriority.class));
58 | for (HandlerPriority hp : allPriorities)
59 | eventGroup.addHandler(new EventHandler() {
60 | @Override
61 | public boolean action() {
62 | return true;
63 | }
64 |
65 | @Override
66 | public void loopStarted() {
67 | started.add(hp);
68 | }
69 |
70 | @Override
71 | public void loopFinished() {
72 | stopped.add(hp);
73 | }
74 |
75 | @Override
76 | public @NotNull HandlerPriority priority() {
77 | return hp;
78 | }
79 | });
80 |
81 | for (int i = 0; i < 100; i++)
82 | if (!started.equals(allPriorities))
83 | Jvm.pause(1);
84 | eventGroup.stop();
85 | assertTrue(eventGroup.isStopped());
86 | assertEquals(allPriorities, started);
87 | assertEquals(allPriorities, stopped);
88 | }
89 | }
90 |
91 | @Test
92 | void blockingStopped() throws InterruptedException {
93 | BlockingEventLoop bel = new BlockingEventLoop("blocking");
94 | bel.start();
95 | BlockingQueue q = new LinkedBlockingQueue<>();
96 | AtomicBoolean stopped = new AtomicBoolean();
97 | AtomicReference thread = new AtomicReference<>();
98 | bel.addHandler(() -> {
99 | try {
100 | thread.set(Thread.currentThread());
101 | q.add("token");
102 | LockSupport.parkNanos(2_000_000_000L);
103 | return false;
104 | } finally {
105 | stopped.set(true);
106 | }
107 | });
108 | q.poll(1, TimeUnit.SECONDS);
109 | bel.close();
110 | if (thread.get().isAlive())
111 | StackTrace.forThread(thread.get()).printStackTrace();
112 | assertTrue(stopped.get());
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/internal/EventLoopStateRendererTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.threads.EventLoop;
8 | import net.openhft.chronicle.threads.*;
9 | import org.junit.jupiter.api.Test;
10 |
11 | import static org.junit.jupiter.api.Assertions.assertEquals;
12 | import static org.junit.jupiter.api.Assertions.assertTrue;
13 |
14 | class EventLoopStateRendererTest extends ThreadsTestCommon {
15 |
16 | @Test
17 | void isNullSafe() {
18 | assertEquals("Foo event loop is null", EventLoopStateRenderer.INSTANCE.render("Foo", null));
19 | }
20 |
21 | @Test
22 | void testCanRenderMediumEventLoop() {
23 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) {
24 | mediumEventLoop.start();
25 | while (!mediumEventLoop.isAlive()) {
26 | Jvm.pause(10);
27 | }
28 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop);
29 | Jvm.startup().on(EventLoopStateRendererTest.class, dump);
30 | assertTrue(dump.contains("Medium event loop state"));
31 | assertTrue(dump.contains("Closed: false"));
32 | assertTrue(dump.contains("Closing: false"));
33 | assertTrue(dump.contains("Lifecycle: STARTED"));
34 | assertTrue(dump.contains("Thread state: "));
35 | }
36 | }
37 |
38 | @Test
39 | void testCanRenderStoppedMediumEventLoop() {
40 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) {
41 | mediumEventLoop.start();
42 | while (!mediumEventLoop.isAlive()) {
43 | Jvm.pause(10);
44 | }
45 | mediumEventLoop.stop();
46 | while (!mediumEventLoop.isStopped()) {
47 | Jvm.pause(10);
48 | }
49 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop);
50 | Jvm.startup().on(EventLoopStateRendererTest.class, dump);
51 | assertTrue(dump.contains("Medium event loop state"));
52 | assertTrue(dump.contains("Closed: false"));
53 | assertTrue(dump.contains("Closing: false"));
54 | assertTrue(dump.contains("Lifecycle: STOPPED"));
55 | assertTrue(dump.contains("Thread state: "));
56 | }
57 | }
58 |
59 | @Test
60 | void testCanRenderUnstartedMediumEventLoop() {
61 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) {
62 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop);
63 | Jvm.startup().on(EventLoopStateRendererTest.class, dump);
64 | assertTrue(dump.contains("Medium event loop state"));
65 | assertTrue(dump.contains("Closed: false"));
66 | assertTrue(dump.contains("Closing: false"));
67 | assertTrue(dump.contains("Lifecycle: NEW"));
68 | }
69 | }
70 |
71 | @Test
72 | void testCanRenderMonitorEventLoop() {
73 | try (final MonitorEventLoop monitorEventLoop = new MonitorEventLoop(null, Pauser.sleepy())) {
74 | monitorEventLoop.start();
75 | while (!monitorEventLoop.isAlive()) {
76 | Jvm.pause(10);
77 | }
78 | final String dump = EventLoopStateRenderer.INSTANCE.render("Monitor", monitorEventLoop);
79 | Jvm.startup().on(EventLoopStateRendererTest.class, dump);
80 | assertTrue(dump.contains("Monitor event loop state"));
81 | assertTrue(dump.contains("Closed: false"));
82 | assertTrue(dump.contains("Closing: false"));
83 | assertTrue(dump.contains("Lifecycle: STARTED"));
84 | }
85 | }
86 |
87 | @Test
88 | void testCanRenderEventGroup() {
89 | try (final EventLoop eventGroup = EventGroup.builder().build()) {
90 | eventGroup.start();
91 | while (!eventGroup.isAlive()) {
92 | Jvm.pause(10);
93 | }
94 | final String dump = EventLoopStateRenderer.INSTANCE.render("EG", eventGroup);
95 | Jvm.startup().on(EventLoopStateRendererTest.class, dump);
96 | assertTrue(dump.contains("EG event loop state"));
97 | assertTrue(dump.contains("Closed: false"));
98 | assertTrue(dump.contains("Closing: false"));
99 | assertTrue(dump.contains("Lifecycle: STARTED"));
100 | }
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/main/docs/thread-security-review.adoc:
--------------------------------------------------------------------------------
1 | = Chronicle Threads Security Review
2 | :toc:
3 | :sectnums:
4 | :lang: en-GB
5 |
6 | == Handler Admission and Privilege Escalation
7 |
8 | Why ::
9 | Handlers execute with the full privileges of the hosting JVM; untrusted code can compromise sensitive data paths.
10 |
11 | Core risks ::
12 | * Malicious handler registration at runtime via exposed management endpoints (THR-FN-004).
13 | * Unsandboxed handlers accessing shared mutable state or credentials.
14 | * Reflection-based injection of handlers that bypass intended builder configuration (THR-FN-001).
15 |
16 | Mitigations ::
17 | * Restrict handler installation to trusted bootstrap code paths; gate dynamic registration behind authentication and authorisation.
18 | * Use code reviews and static analysis to enforce least-privilege principles within handlers.
19 | * Log handler class names and source artefacts during registration for audit trails.
20 |
21 | Review hot-spots ::
22 | * Deployment scripts that allow arbitrary classpath extensions.
23 | * OSGi or plugin frameworks injecting handlers dynamically.
24 |
25 | == Affinity and Resource Isolation
26 |
27 | Why ::
28 | Incorrect core binding can leak workload information across tenants or undermine performance isolation.
29 |
30 | Core risks ::
31 | * Shared core usage permits timing side channels between sensitive workloads.
32 | * NUMA misalignment causes cross-node memory access patterns exposing high-resolution timing data (THR-FN-017).
33 | * System properties overridden by untrusted inputs altering affinity strings (THR-OPS-023).
34 |
35 | Mitigations ::
36 | * Validate affinity strings against an approved list before instantiating `EventGroup` builders (THR-FN-015).
37 | * Store affinity selections in configuration repositories with change control.
38 | * Monitor actual thread-to-core bindings via OS tooling (e.g., `taskset`, `ps -Lo pid,psr`) and alert on drift.
39 |
40 | Review hot-spots ::
41 | * Container orchestrators with relaxed CPU quotas.
42 | * Multi-tenant hosts lacking hardware partitioning.
43 |
44 | == Monitoring and Telemetry Integrity
45 |
46 | Why ::
47 | Accurate telemetry is essential for detecting anomalous behaviour and limit breaches.
48 |
49 | Core risks ::
50 | * Attackers disable loop-block monitoring through system properties (THR-OPS-020).
51 | * Log tampering obscures stack traces that evidence suspicious handler execution times (THR-NF-O-019).
52 | * Telemetry collectors overloaded by attacker-generated events, leading to blind spots (THR-NF-O-021).
53 |
54 | Mitigations ::
55 | * Lock down JVM arguments in production; apply checksum or signature validation to launch scripts.
56 | * Forward critical monitor events to secure log aggregation platforms with tamper detection.
57 | * Rate-limit telemetry ingestion and validate payload sizes from handlers publishing metrics.
58 |
59 | Review hot-spots ::
60 | * Support run-books that recommend disabling monitors during troubleshooting.
61 | * Nodes operating with reduced logging due to storage constraints.
62 |
63 | == Shutdown and Resource Hygiene
64 |
65 | Why ::
66 | Handlers often manage off-heap or file-backed resources via Chronicle Core abstractions; improper shutdown can leak descriptors or expose data.
67 |
68 | Core risks ::
69 | * `EventGroup` instances left open, keeping sensitive files mapped (THR-FN-002).
70 | * Shutdown hooks overridden by untrusted code, preventing orderly release (THR-OPS-025).
71 | * Race conditions during shutdown causing inconsistent state for dependent services.
72 |
73 | Mitigations ::
74 | * Apply Chronicle Core's `ReferenceCounted` policies, ensuring handlers close dependent resources during loop shutdown.
75 | * Harden shutdown hook registration; disallow multiple components from mutating the same hook.
76 | * Capture and audit shutdown logs for every production cycle.
77 |
78 | Review hot-spots ::
79 | * Handlers that interact with Chronicle Queue or Chronicle Map without corresponding close semantics.
80 | * Scripted restarts that do not wait for `EventGroup.close()` completion.
81 |
82 | == Supply Chain and Dependency Considerations
83 |
84 | Why ::
85 | Chronicle Threads relies on Chronicle Core and Affinity; vulnerabilities propagate through these dependencies.
86 |
87 | Core risks ::
88 | * Outdated dependencies lacking recent security patches or mitigations.
89 | * Misaligned versions introducing behavioural regressions in pauser or affinity handling.
90 |
91 | Mitigations ::
92 | * Track dependency versions in BOM files; enforce minimum patch levels aligned with security advisories.
93 | * Execute dependency-update dry runs in staging to validate core functionality and performance targets.
94 | * Subscribe to Chronicle Software security bulletins and integrate alerts into incident response procedures.
95 |
96 | Review hot-spots ::
97 | * Custom forks of Chronicle libraries.
98 | * Environments that block outbound network access, delaying vulnerability scanning updates.
99 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/YieldingPauser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import org.jetbrains.annotations.NotNull;
8 |
9 | import java.util.concurrent.TimeUnit;
10 | import java.util.concurrent.TimeoutException;
11 |
12 | /**
13 | * Pauser that spins for a fixed number of calls and then yields.
14 | *
15 | *
It consumes less CPU than {@link BusyPauser} yet avoids the sleeping
16 | * stage used by {@link LongPauser}. Use it when short bursts of activity are
17 | * expected but the thread must remain responsive.
18 | */
19 | public class YieldingPauser implements TimingPauser {
20 | final int minBusy;
21 | int count = 0;
22 | private long timePaused = 0;
23 | private long countPaused = 0;
24 | private long yieldStart = 0;
25 | private long timeOutStart = Long.MAX_VALUE;
26 |
27 | /**
28 | * @param minBusy number of {@link #pause()} calls to spin before yielding.
29 | * A value of {@code 0} yields immediately.
30 | */
31 | public YieldingPauser(int minBusy) {
32 | this.minBusy = minBusy;
33 | }
34 |
35 | @Override
36 | public void reset() {
37 | checkYieldTime();
38 | count = 0;
39 | timeOutStart = Long.MAX_VALUE;
40 | }
41 |
42 | /**
43 | * Increments an internal counter and either spins or yields.
44 | *
45 | *
While the count is below {@code minBusy} a safepoint is executed and
46 | * the method returns. Once the threshold is passed the thread yields and the
47 | * time spent yielding is measured.
48 | */
49 | @Override
50 | public void pause() {
51 | ++count;
52 | if (count < minBusy) {
53 | ++countPaused;
54 | Jvm.safepoint();
55 | return;
56 | }
57 | yield0();
58 | checkYieldTime();
59 | }
60 |
61 | /**
62 | * Variant of {@link #pause()} that fails after the given timeout.
63 | *
64 | *
The first call records the start time. Once yielding begins the elapsed
65 | * time is checked and a {@link TimeoutException} is thrown when the limit is
66 | * exceeded.
67 | *
68 | * @param timeout maximum time to wait
69 | * @param timeUnit unit of the timeout
70 | * @throws TimeoutException if the elapsed time passes the timeout
71 | */
72 | @Override
73 | public void pause(long timeout, @NotNull TimeUnit timeUnit) throws TimeoutException {
74 | if (timeOutStart == Long.MAX_VALUE)
75 | timeOutStart = System.nanoTime();
76 |
77 | ++count;
78 | if (count < minBusy)
79 | return;
80 | yield0();
81 |
82 | if (System.nanoTime() - timeOutStart > timeUnit.toNanos(timeout))
83 | throw new TimeoutException();
84 | checkYieldTime();
85 | }
86 |
87 | /**
88 | * Records and accumulates the duration of yielding if any, and resets the start time of yielding.
89 | */
90 | void checkYieldTime() {
91 | if (yieldStart > 0) {
92 | long time = System.nanoTime() - yieldStart;
93 | timePaused += time;
94 | countPaused++;
95 | yieldStart = 0;
96 | }
97 | }
98 |
99 | /**
100 | * Initiates or continues a yielding phase for this pauser.
101 | */
102 | void yield0() {
103 | if (yieldStart == 0)
104 | yieldStart = System.nanoTime();
105 | Thread.yield();
106 | }
107 |
108 | @Override
109 | public void unpause() {
110 | // Do nothing
111 | }
112 |
113 | /**
114 | * Returns the total time this pauser has spent yielding, measured in milliseconds.
115 | *
116 | * @return total yielding time in milliseconds
117 | */
118 | @Override
119 | public long timePaused() {
120 | return timePaused / 1_000_000;
121 | }
122 |
123 | /**
124 | * Returns the number of times this pauser has been activated, including both busy-wait and yield iterations.
125 | *
126 | * @return the total number of pause activations
127 | */
128 | @Override
129 | public long countPaused() {
130 | return countPaused;
131 | }
132 |
133 | /**
134 | * Provides a string representation of this pauser, which varies based on the {@code minBusy} configuration.
135 | *
136 | * @return a string representation identifying the mode and settings of this pauser
137 | */
138 | @Override
139 | public String toString() {
140 | if (minBusy == 2)
141 | return "PauserMode.yielding";
142 | return "YieldingPauser{" +
143 | "minBusy=" + minBusy +
144 | '}';
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/ThreadMonitors.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.threads.internal.ThreadMonitorHarness;
8 | import net.openhft.chronicle.threads.internal.ThreadsThreadHolder;
9 | import org.jetbrains.annotations.NotNull;
10 |
11 | import java.util.function.BooleanSupplier;
12 | import java.util.function.Consumer;
13 | import java.util.function.LongSupplier;
14 | import java.util.function.Supplier;
15 |
16 | public enum ThreadMonitors {
17 | ; // none
18 |
19 | /**
20 | * Create a monitor for a single thread.
21 | *
22 | * @param description text used in log messages
23 | * @param timeLimit threshold in nanoseconds before a stack trace is logged
24 | * @param timeSupplier supplies the current time, usually {@link System#nanoTime}
25 | * @param threadSupplier returns the thread to observe
26 | * @return a monitor handler for installation on a monitor loop
27 | */
28 | public static ThreadMonitor forThread(String description, long timeLimit,
29 | LongSupplier timeSupplier,
30 | Supplier threadSupplier) {
31 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description,
32 | timeLimit, timeSupplier, threadSupplier, () -> true, perfOn()));
33 | }
34 |
35 | @NotNull
36 | private static Consumer perfOn() {
37 | return msg -> Jvm.perf().on(ThreadMonitor.class, msg);
38 | }
39 |
40 | /**
41 | * Variant of {@link #forThread(String, long, LongSupplier, Supplier)} that
42 | * allows the caller to control logging.
43 | *
44 | * @param description text used in log messages
45 | * @param timeLimit threshold in nanoseconds before a stack trace is logged
46 | * @param timeSupplier supplies the current time
47 | * @param threadSupplier returns the thread to observe
48 | * @param logEnabled predicate controlling whether logging occurs
49 | * @param logConsumer receives the formatted log message
50 | * @return a monitor handler for installation on a monitor loop
51 | */
52 | public static ThreadMonitor forThread(String description, long timeLimit,
53 | LongSupplier timeSupplier,
54 | Supplier threadSupplier,
55 | BooleanSupplier logEnabled,
56 | Consumer logConsumer) {
57 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description,
58 | timeLimit, timeSupplier, threadSupplier, logEnabled, logConsumer));
59 | }
60 |
61 | /**
62 | * Create a monitor aimed at a service thread.
63 | *
64 | * @param description text used in log messages
65 | * @param timeLimit threshold in nanoseconds before a stack trace is logged
66 | * @param timeSupplier supplies the current time
67 | * @param threadSupplier returns the thread to observe
68 | * @return a monitor handler for installation on a monitor loop
69 | */
70 | public static ThreadMonitor forServices(String description, long timeLimit,
71 | LongSupplier timeSupplier,
72 | Supplier threadSupplier) {
73 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description,
74 | timeLimit, timeSupplier, threadSupplier, () -> true, perfOn()));
75 | }
76 |
77 | /**
78 | * Variant of {@link #forServices(String, long, LongSupplier, Supplier)} with
79 | * caller controlled logging.
80 | *
81 | * @param description text used in log messages
82 | * @param timeLimit threshold in nanoseconds before a stack trace is logged
83 | * @param timeSupplier supplies the current time
84 | * @param threadSupplier returns the thread to observe
85 | * @param logEnabled predicate controlling whether logging occurs
86 | * @param logConsumer receives the formatted log message
87 | * @return a monitor handler for installation on a monitor loop
88 | */
89 | public static ThreadMonitor forServices(String description, long timeLimit,
90 | LongSupplier timeSupplier,
91 | Supplier threadSupplier,
92 | BooleanSupplier logEnabled,
93 | Consumer logConsumer) {
94 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description,
95 | timeLimit, timeSupplier, threadSupplier, logEnabled, logConsumer));
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/src/test/java/net/openhft/chronicle/threads/EventGroupStressTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.OS;
8 | import net.openhft.chronicle.core.io.Closeable;
9 | import net.openhft.chronicle.core.threads.EventHandler;
10 | import net.openhft.chronicle.core.threads.HandlerPriority;
11 | import net.openhft.chronicle.testframework.process.JavaProcessBuilder;
12 | import org.jetbrains.annotations.NotNull;
13 | import org.junit.jupiter.api.Disabled;
14 | import org.junit.jupiter.api.Test;
15 | import org.junit.jupiter.api.Timeout;
16 |
17 | import java.util.ArrayList;
18 | import java.util.List;
19 | import java.util.concurrent.ThreadLocalRandom;
20 | import java.util.concurrent.TimeUnit;
21 | import java.util.stream.IntStream;
22 |
23 | import static org.junit.jupiter.api.Assumptions.assumeFalse;
24 |
25 | /**
26 | * Spawns many event groups across several processes to check that a large
27 | * number of event loops can be created and closed without exhausting the CPU.
28 | */
29 | class EventGroupStressTest extends ThreadsTestCommon {
30 |
31 | private static final int NUM_PROCESSES = 10;
32 | private static final int NUM_GROUPS_PER_PROCESS = 20;
33 |
34 | @Disabled("https://github.com/OpenHFT/Chronicle-Threads/issues/186")
35 | @Test
36 | @Timeout(30)
37 | void canOverloadTheCPUWithEventGroupsSafely() {
38 | assumeFalse(OS.isWindows());
39 | IntStream.range(0, NUM_PROCESSES).mapToObj(i -> JavaProcessBuilder.create(EventGroupStarterProcess.class)
40 | .withProgramArguments(String.valueOf(NUM_GROUPS_PER_PROCESS))
41 | .start())
42 | .forEach(process -> {
43 | try {
44 | if (!process.waitFor(10, TimeUnit.SECONDS) || process.exitValue() != 0) {
45 | Jvm.error().on(EventGroupStressTest.class, "Process didn't end or ended in error");
46 | JavaProcessBuilder.printProcessOutput("event group getter", process);
47 | }
48 | } catch (InterruptedException e) {
49 | Jvm.error().on(EventGroupStressTest.class, "Interrupted waiting for process to end");
50 | Thread.currentThread().interrupt();
51 | }
52 | });
53 | }
54 |
55 | static class EventGroupStarterProcess {
56 |
57 | public static void main(String[] args) {
58 | int groupsToStart = Integer.parseInt(args[0]);
59 | List eventGroups = new ArrayList<>();
60 | List handlers = new ArrayList<>();
61 | try {
62 | for (int j = 0; j < groupsToStart; j++) {
63 | final EventGroup eventGroup = EventGroup.builder().withBinding("any").build();
64 | final TestEventHandler beforeStartHandler = new TestEventHandler();
65 | eventGroup.addHandler(beforeStartHandler);
66 | eventGroup.start();
67 | final TestEventHandler afterStartHandler = new TestEventHandler();
68 | eventGroup.addHandler(afterStartHandler);
69 | handlers.add(beforeStartHandler);
70 | handlers.add(afterStartHandler);
71 | eventGroups.add(eventGroup);
72 | }
73 | while (!handlers.stream().allMatch(handler -> handler.loopStarted)) {
74 | Jvm.pause(100);
75 | }
76 | } finally {
77 | eventGroups.forEach(Closeable::closeQuietly);
78 | }
79 | }
80 | }
81 |
82 | static class TestEventHandler implements EventHandler {
83 |
84 | private static final HandlerPriority[] PRIORITIES = new HandlerPriority[]{
85 | HandlerPriority.HIGH, HandlerPriority.MEDIUM, HandlerPriority.REPLICATION, HandlerPriority.TIMER,
86 | HandlerPriority.BLOCKING, HandlerPriority.DAEMON
87 | };
88 |
89 | private final HandlerPriority priority;
90 | private volatile boolean loopStarted = false;
91 |
92 | TestEventHandler() {
93 | this.priority = PRIORITIES[ThreadLocalRandom.current().nextInt(PRIORITIES.length)];
94 | }
95 |
96 | @Override
97 | public void loopStarted() {
98 | loopStarted = true;
99 | }
100 |
101 | @Override
102 | public boolean action() {
103 | // Does nothing
104 | return false;
105 | }
106 |
107 | @Override
108 | public @NotNull HandlerPriority priority() {
109 | return priority;
110 | }
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src/main/docs/thread-architecture-overview.adoc:
--------------------------------------------------------------------------------
1 | = Chronicle Threads Architecture Overview
2 | :toc:
3 | :sectnums:
4 | :lang: en-GB
5 |
6 | == Purpose
7 |
8 | This guide explains how Chronicle Threads composes event loops, handlers, pausers, and monitoring into a cohesive runtime so that engineers can reason about placement, affinity, and operational behaviour.
9 | It complements the functional catalogue in `project-requirements.adoc` and provides concrete design cues for solution architects.
10 |
11 | == Event Loop Topologies
12 |
13 | Chronicle Threads organises work into named `EventLoop` instances that the `EventGroup` manages (THR-FN-001).
14 | Each loop is single-threaded for handler execution (THR-FN-006) and is categorised by handler priority (THR-FN-005).
15 |
16 | ....
17 | EventGroup
18 | |
19 | +-- CoreLoop[HIGH|MEDIUM] ---> fast path handlers (trading logic, matching)
20 | |
21 | +-- BlockingPool[BLOCKING] --> dedicated threads for I/O or storage waits
22 | |
23 | +-- TimerLoop[TIMER] ------> scheduled maintenance and time-based work
24 | |
25 | +-- MonitorLoop[MONITOR] -> observes loop-block latency and pauser metrics
26 | ....
27 |
28 | Handlers attach to the loop whose priority matches their declared `HandlerPriority`.
29 | An `EventGroup` materialises blocking and monitor loops only when required.
30 | Applications can deploy multiple `EventGroup` instances in the same JVM to isolate subsystems whilst sharing pauser implementations.
31 |
32 | == Handler Lifecycle and Serial Execution
33 |
34 | Handlers are added at runtime via `EventGroup.addHandler()` (THR-FN-004).
35 | The loop invokes each handler serially, ensuring stateful logic can remain lock-free (THR-FN-006).
36 | The handler signals its progress via the boolean return value of `action()` (THR-FN-007).
37 | Self-removal uses `InvalidEventHandlerException` (THR-FN-008); the loop removes the handler, logs through the standard `Jvm` channel, and continues running (THR-NF-O-009).
38 |
39 | Handlers should bound their execution time so that monitor loops can flag outliers reliably (THR-NF-O-018).
40 | Long-running work belongs on the `BLOCKING` priority where independent threads handle it.
41 | When reconfiguring a live loop, call `EventLoop.addHandler()` on the owning loop thread or rely on the concurrency-safe wrappers provided by `EventGroup`.
42 |
43 | == Pauser Strategy and Scheduler Interaction
44 |
45 | Pausers implement the idle strategy for each loop and are configured via builders or per-loop overrides (THR-FN-010, THR-FN-011).
46 | Adaptive pausers expose tuning parameters that balance busy-spin and sleeping phases (THR-FN-012) while exposing metrics for observability (THR-NF-O-013, THR-NF-O-021).
47 |
48 | * `BUSY` / `TIMED_BUSY`: Bind to isolated cores, targeting nanosecond wake-up latency (THR-DOC-016).
49 | * `BALANCED` / `SLEEPY`: Combine spin, yield, and park for mixed workloads.
50 | * Custom: Provide a bespoke `Pauser` for domain-specific throttling.
51 |
52 | Hot paths avoid allocations (THR-NF-P-014) so a pauser change cannot introduce garbage.
53 | Each loop records the time spent paused, supporting utilisation diagnostics.
54 |
55 | == Affinity and NUMA Alignment
56 |
57 | Affinity strings supplied via builders control how loops bind to hardware threads (THR-FN-015).
58 | They accept the Chronicle Affinity syntax, including NUMA-aware layouts (THR-FN-017).
59 | Example:
60 |
61 | ----
62 | EventGroup eg = EventGroup.builder()
63 | .withName("risk-eg")
64 | .withBinding("0,2-3")
65 | .build();
66 | ----
67 |
68 | * `0` binds the primary high-priority loop to core 0.
69 | * `2-3` pins additional loops (e.g., MONITOR or BLOCKING) across cores 2 and 3.
70 |
71 | When multiple `EventGroup` instances coexist, coordinate bindings to avoid core contention.
72 | Document selected affinities alongside deployment manifests so operators can validate CPU isolation.
73 |
74 | == Monitoring Plane
75 |
76 | Each `EventGroup` provisions a monitor loop that samples execution times and resets pausers at configurable intervals (THR-NF-O-018, THR-NF-O-019, THR-OPS-020).
77 | The monitor loop:
78 |
79 | * Measures handler invocation duration, logging stack traces for breaches.
80 | * Publishes pauser metrics through configured `PauserMonitorFactory` hooks.
81 | * Responds to system properties that disable or tune monitoring (THR-OPS-023).
82 |
83 | The monitoring loop is not latency-critical but must keep pace with the core loops to avoid stale diagnostics.
84 | Ensure JVM logging levels capture WARN messages from monitor handlers in production.
85 |
86 | == Integration Touchpoints
87 |
88 | Chronicle Threads commonly underpins Chronicle Queue tailers, Chronicle Map maintenance tasks, and application-specific pipelines.
89 | When integrating:
90 |
91 | * Use `net.openhft.chronicle.core.io.Closeable` semantics to align handler lifecycle with queue appenders or tailers.
92 | * Combine telemetry exports with the monitor loop to funnel utilisation metrics to the estate-wide monitoring system.
93 | * Align handler priorities with data criticality so that core loops handle order flow while auxiliary loops manage persistence, replay, or housekeeping.
94 |
95 | Refer to `README.adoc` for code-level examples and to the operational controls document for deployment-time safeguards.
96 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/internal/ThreadsThreadHolder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads.internal;
5 |
6 | import net.openhft.affinity.Affinity;
7 | import net.openhft.chronicle.core.Jvm;
8 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
9 | import net.openhft.chronicle.threads.ThreadHolder;
10 |
11 | import java.util.function.BooleanSupplier;
12 | import java.util.function.Consumer;
13 | import java.util.function.LongSupplier;
14 | import java.util.function.Supplier;
15 |
16 | /**
17 | * Helper used by {@link ThreadMonitorHarness} to monitor a service thread.
18 | *
19 | * The harness polls the thread and the supplied time source. When the
20 | * thread appears to be blocked for longer than the configured limit the
21 | * stack trace is logged via {@link #logConsumer} if {@link #logEnabled} is
22 | * true.
23 | *
It extends {@link AbstractCloseable}, and integrates with the
20 | * closeable hierarchy.
21 | *
22 | *
The life-cycle follows {@link EventLoopLifecycle}:
23 | *
24 | *
{@code NEW} – constructed but not running.
25 | *
{@code STARTED} – handlers are executing.
26 | *
{@code STOPPING} – {@link #stop()} has been requested.
27 | *
{@code STOPPED} – all work is finished.
28 | *
29 | * Transitions are linear in that order. Invoking {@code stop()} while in
30 | * {@code NEW} skips {@code STARTED} entirely. Both {@code start()} and
31 | * {@code stop()} are idempotent and {@code stop()} blocks until the loop is
32 | * {@code STOPPED}.
33 | */
34 | @SuppressWarnings("this-escape")
35 | public abstract class AbstractLifecycleEventLoop extends AbstractCloseable implements EventLoop {
36 |
37 | /**
38 | * After this time, awaitTermination will log an error and return, this is really only so
39 | * tests don't block forever. This time should be kept as "effectively forever".
40 | */
41 | private static final long AWAIT_TERMINATION_TIMEOUT_MS = TimeUnit.MINUTES.toMillis(5);
42 | private final AtomicReference lifecycle = new AtomicReference<>(EventLoopLifecycle.NEW);
43 | protected final String name;
44 | boolean privateGroup;
45 |
46 | /**
47 | * Create an instance with the supplied name.
48 | *
49 | * The {@link AbstractCloseable} thread ownership check is disabled so the
50 | * loop may be started or stopped from threads other than the creating
51 | * thread.
52 | *
53 | * @param name descriptive name for the loop
54 | */
55 | protected AbstractLifecycleEventLoop(@NotNull String name) {
56 | this.name = name.replaceAll("/$", "");
57 |
58 | // event loops operate on dedicated threads but may be closed elsewhere
59 | singleThreadedCheckDisabled(true);
60 | }
61 |
62 | protected String nameWithSlash() {
63 | return withSlash(name);
64 | }
65 |
66 | @Override
67 | public final void start() {
68 | throwExceptionIfClosed();
69 |
70 | if (lifecycle.compareAndSet(EventLoopLifecycle.NEW, EventLoopLifecycle.STARTED)) {
71 | performStart();
72 | }
73 | }
74 |
75 | @Override
76 | public final String name() {
77 | return name;
78 | }
79 |
80 | /**
81 | * Perform the concrete start up work.
82 | * Invoked exactly once when the life-cycle moves from
83 | * {@link EventLoopLifecycle#NEW} to {@link EventLoopLifecycle#STARTED}.
84 | */
85 | protected abstract void performStart();
86 |
87 | @Override
88 | public final void stop() {
89 | if (lifecycle.compareAndSet(EventLoopLifecycle.NEW, EventLoopLifecycle.STOPPING)) {
90 | performStopFromNew();
91 | lifecycle.set(EventLoopLifecycle.STOPPED);
92 | } else if (lifecycle.compareAndSet(EventLoopLifecycle.STARTED, EventLoopLifecycle.STOPPING)) {
93 | performStopFromStarted();
94 | lifecycle.set(EventLoopLifecycle.STOPPED);
95 | } else {
96 | awaitTermination();
97 | }
98 | }
99 |
100 | /**
101 | * Stop the loop when {@link #stop()} is invoked before it has started.
102 | * Implementations should block until every handler has received
103 | * {@link EventHandler#loopFinished()}.
104 | */
105 | protected abstract void performStopFromNew();
106 |
107 | /**
108 | * Stop the loop once it has begun processing.
109 | * Implementations should wait for the current iteration to finish and then
110 | * invoke {@link EventHandler#loopFinished()} on every handler.
111 | */
112 | protected abstract void performStopFromStarted();
113 |
114 | /**
115 | * Wait for the loop to reach {@link EventLoopLifecycle#STOPPED}.
116 | *
117 | *
If the state does not change within
118 | * {@link #AWAIT_TERMINATION_TIMEOUT_MS} milliseconds an error is logged and
119 | * the method returns. The timeout is primarily to avoid tests hanging
120 | * indefinitely.
The {@link Pauser} supplied at construction is used to create a fresh
27 | * instance for every handler thread. Idle handlers therefore pause
28 | * independently of one another.
29 | *
30 | *
Calling {@link #start()} launches a thread for each added handler.
31 | * When {@link #stop()} is invoked those threads are interrupted and the
32 | * executor service is shut down.
33 | *
34 | *
Handlers with priorities other than
35 | * {@link net.openhft.chronicle.core.threads.HandlerPriority#BLOCKING}
36 | * are accepted but treated the same as blocking handlers.
37 | */
38 | public class BlockingEventLoop extends AbstractLifecycleEventLoop implements EventLoop {
39 |
40 | @NotNull
41 | private transient final EventLoop parent;
42 | @NotNull
43 | private transient final ExecutorService service;
44 | private final List handlers = new CopyOnWriteArrayList<>();
45 | private final List runners = new CopyOnWriteArrayList<>();
46 | private final NamedThreadFactory threadFactory;
47 | private final Supplier pauserSupplier;
48 |
49 | public BlockingEventLoop(@NotNull final EventLoop parent,
50 | @NotNull final String name,
51 | @NotNull final Supplier pauser) {
52 | super(name);
53 | this.parent = parent;
54 | this.threadFactory = new NamedThreadFactory(name, null, null, true);
55 | this.service = Executors.newCachedThreadPool(threadFactory);
56 | this.pauserSupplier = pauser;
57 | }
58 |
59 | public BlockingEventLoop(@NotNull final String name) {
60 | super(name);
61 | this.parent = this;
62 | this.threadFactory = new NamedThreadFactory(name, null, null, true);
63 | this.service = Executors.newCachedThreadPool(threadFactory);
64 | this.pauserSupplier = Pauser::balanced;
65 | }
66 |
67 | /**
68 | * Registers a new handler. Every call spawns another thread for the
69 | * handler.
70 | *
Priorities other than
71 | * {@link net.openhft.chronicle.core.threads.HandlerPriority#BLOCKING}
72 | * are permitted but are not treated specially.
Paths are registered via {@link #pollDiskSpace(File)}. The first call
27 | * obtains the {@link FileStore} for the supplied file and adds it to the
28 | * internal maps. Each subsequent call merely updates the cached entry. This
29 | * method is typically invoked when opening a queue or a memory-mapped file. A
30 | * scheduled executor named {@value #DISK_SPACE_CHECKER_NAME} then runs
31 | * the monitor once a second.
32 | *
33 | *
The monitor may be disabled with the system property
34 | * {@code chronicle.disk.monitor.disable}. The threshold that triggers a
35 | * warning is controlled by {@code chronicle.disk.monitor.threshold.percent}.
36 | *
37 | *
When the available space falls below these limits the monitor invokes a
38 | * {@link NotifyDiskLow} service. Implementations are discovered with
39 | * {@link java.util.ServiceLoader} and the default simply logs a warning.
40 | *
41 | *
The {@link #run()} loop iterates over the tracked {@link DiskAttributes}
42 | * entries. Each record stores a {@link FileStore}, the time for the next check
43 | * and the total size. When the free space is less than two hundred megabytes a
44 | * panic notification is sent. Otherwise the next check is delayed based on the
45 | * amount of free space.
46 | */
47 | public enum DiskSpaceMonitor implements Runnable, Closeable {
48 | INSTANCE;
49 |
50 | public static final String DISK_SPACE_CHECKER_NAME = "disk~space~checker";
51 | static final boolean WARN_DELETED = Jvm.getBoolean("disk.monitor.deleted.warning");
52 | private static final boolean DISABLED = Jvm.getBoolean("chronicle.disk.monitor.disable");
53 | public static final int TIME_TAKEN_WARN_THRESHOLD_US = Jvm.getInteger("chronicle.disk.monitor.warn.threshold.us", 250);
54 | private final NotifyDiskLow notifyDiskLow;
55 | final Map fileStoreCacheMap = new ConcurrentHashMap<>();
56 | final Map diskAttributesMap = new ConcurrentHashMap<>();
57 | final ScheduledExecutorService executor;
58 | private volatile int thresholdPercentage = Jvm.getInteger("chronicle.disk.monitor.threshold.percent", 5);
59 | private TimeProvider timeProvider = SystemTimeProvider.INSTANCE;
60 |
61 | DiskSpaceMonitor() {
62 | final ServiceLoader services = ServiceLoader.load(NotifyDiskLow.class);
63 | if (services.iterator().hasNext()) {
64 | final List warners = new ArrayList<>();
65 | services.iterator().forEachRemaining(warners::add);
66 | this.notifyDiskLow = new NotifyDiskLowIterator(warners);
67 | } else {
68 | this.notifyDiskLow = new NotifyDiskLowLogWarn();
69 | }
70 | boolean diabled = Jvm.getBoolean("chronicle.disk.monitor.disable");
71 | if (!diabled) {
72 | executor = Threads.acquireScheduledExecutorService(DISK_SPACE_CHECKER_NAME, true);
73 | long period = Jvm.getLong("chronicle.disk.monitor.period", 10L);
74 | executor.scheduleAtFixedRate(this, period, period, TimeUnit.SECONDS);
75 | } else {
76 | executor = null;
77 | }
78 | }
79 |
80 | // used for testing purposes
81 | public void clear() {
82 | fileStoreCacheMap.clear();
83 | diskAttributesMap.clear();
84 | }
85 |
86 | public void pollDiskSpace(File file) {
87 | if (DISABLED)
88 | return;
89 | long start = timeProvider.currentTimeNanos();
90 |
91 | final String absolutePath = file.getAbsolutePath();
92 | FileStore fs = fileStoreCacheMap.get(absolutePath);
93 | if (fs == null) {
94 | if (file.exists()) {
95 |
96 | Path path = Paths.get(absolutePath);
97 | try {
98 | fs = Files.getFileStore(path);
99 | fileStoreCacheMap.put(absolutePath, fs);
100 | } catch (IOException e) {
101 | Jvm.warn().on(getClass(), "Error trying to obtain the FileStore for " + path, e);
102 | return;
103 | }
104 | } else {
105 | // nothing to monitor if it doesn't exist.
106 | return;
107 | }
108 | }
109 | diskAttributesMap.computeIfAbsent(fs, DiskAttributes::new);
110 |
111 | final long tookUs = (timeProvider.currentTimeNanos() - start) / 1_000;
112 | if (tookUs > TIME_TAKEN_WARN_THRESHOLD_US)
113 | Jvm.perf().on(getClass(), "Took " + tookUs / 1000.0 + " ms to pollDiskSpace for " + file.getAbsolutePath());
114 | }
115 |
116 | @Override
117 | public void run() {
118 | for (Iterator iterator = diskAttributesMap.values().iterator(); iterator.hasNext(); ) {
119 | DiskAttributes da = iterator.next();
120 | try {
121 | da.run();
122 | } catch (IOException e) {
123 | if (WARN_DELETED)
124 | Jvm.warn().on(getClass(), "Unable to get disk space for " + da.fileStore, e);
125 | iterator.remove();
126 | }
127 | }
128 | }
129 |
130 | public int getThresholdPercentage() {
131 | return thresholdPercentage;
132 | }
133 |
134 | public void setThresholdPercentage(int thresholdPercentage) {
135 | this.thresholdPercentage = thresholdPercentage;
136 | }
137 |
138 | @VisibleForTesting
139 | protected void setTimeProvider(TimeProvider timeProvider) {
140 | this.timeProvider = timeProvider;
141 | }
142 |
143 | @Override
144 | public void close() {
145 | if (executor != null)
146 | Threads.shutdown(executor);
147 | }
148 |
149 | final class DiskAttributes {
150 |
151 | private final FileStore fileStore;
152 |
153 | long timeNextCheckedMS;
154 | long totalSpace;
155 |
156 | DiskAttributes(FileStore fileStore) {
157 | this.fileStore = fileStore;
158 | }
159 |
160 | void run() throws IOException {
161 | long now = timeProvider.currentTimeMillis();
162 | if (timeNextCheckedMS > now)
163 | return;
164 |
165 | long start = System.nanoTime();
166 | if (totalSpace <= 0)
167 | totalSpace = fileStore.getTotalSpace();
168 |
169 | long unallocatedBytes = fileStore.getUnallocatedSpace();
170 | if (unallocatedBytes < (200 << 20)) {
171 | // if less than 200 Megabytes
172 | notifyDiskLow.panic(fileStore);
173 |
174 | } else if (unallocatedBytes < totalSpace * DiskSpaceMonitor.INSTANCE.thresholdPercentage / 100) {
175 | final double diskSpaceFull = ((long) (1000d * (totalSpace - unallocatedBytes) / totalSpace + 0.999)) / 10.0;
176 | notifyDiskLow.warning(diskSpaceFull, fileStore);
177 |
178 | } else {
179 | // wait 1 ms per MB or approx 1 sec per GB free.
180 | timeNextCheckedMS = now + (unallocatedBytes >> 20);
181 | }
182 | long time = System.nanoTime() - start;
183 | if (time > 1_000_000)
184 | Jvm.perf().on(getClass(), "Took " + time / 10_000 / 100.0 + " ms to check the disk space of " + fileStore);
185 | }
186 | }
187 |
188 | private static class NotifyDiskLowIterator implements NotifyDiskLow {
189 | private final List list;
190 |
191 | public NotifyDiskLowIterator(List list) {
192 | this.list = list;
193 | }
194 |
195 | @Override
196 | public void panic(FileStore fileStore) {
197 | for (NotifyDiskLow mfy : list)
198 | mfy.panic(fileStore);
199 | }
200 |
201 | @Override
202 | public void warning(double diskSpaceFullPercent, FileStore fileStore) {
203 | for (NotifyDiskLow mfy : list)
204 | mfy.warning(diskSpaceFullPercent, fileStore);
205 | }
206 | }
207 | }
208 |
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # Guidance for AI agents, bots, and humans contributing to Chronicle Software's OpenHFT projects.
2 |
3 | LLM-based agents can accelerate development only if they respect our house rules. This file tells you:
4 |
5 | * how to run and verify the build;
6 | * what *not* to comment;
7 | * when to open pull requests.
8 |
9 | ## Language & character-set policy
10 |
11 | | Requirement | Rationale |
12 | |--------------|-----------|
13 | | **British English** spelling (`organisation`, `licence`, *not* `organization`, `license`) except technical US spellings like `synchronized` | Keeps wording consistent with Chronicle's London HQ and existing docs. See the University of Oxford style guide for reference. |
14 | | **ASCII-7 only** (code-points 0-127). Avoid smart quotes, non-breaking spaces and accented characters. | ASCII-7 survives every toolchain Chronicle uses, incl. low-latency binary wire formats that expect the 8th bit to be 0. |
15 | | If a symbol is not available in ASCII-7, use a textual form such as `micro-second`, `>=`, `:alpha:`, `:yes:`. This is the preferred approach and Unicode must not be inserted. | Extended or '8-bit ASCII' variants are *not* portable and are therefore disallowed. |
16 |
17 | ## Javadoc guidelines
18 |
19 | **Goal:** Every Javadoc block should add information you cannot glean from the method signature alone. Anything else is
20 | noise and slows readers down.
21 |
22 | | Do | Don't |
23 | |----|-------|
24 | | State *behavioural contracts*, edge-cases, thread-safety guarantees, units, performance characteristics and checked exceptions. | Restate the obvious ("Gets the value", "Sets the name"). |
25 | | Keep the first sentence short; it becomes the summary line in aggregated docs. | Duplicate parameter names/ types unless more explanation is needed. |
26 | | Prefer `@param` for *constraints* and `@throws` for *conditions*, following Oracle's style guide. | Pad comments to reach a line-length target. |
27 | | Remove or rewrite autogenerated Javadoc for trivial getters/setters. | Leave stale comments that now contradict the code. |
28 |
29 | The principle that Javadoc should only explain what is *not* manifest from the signature is well-established in the
30 | wider Java community.
31 |
32 | ## Build & test commands
33 |
34 | Agents must verify that the project still compiles and all unit tests pass before opening a PR:
35 |
36 | ```bash
37 | # From repo root
38 | mvn -q verify
39 | ```
40 |
41 | ## Commit-message & PR etiquette
42 |
43 | 1. **Subject line <= 72 chars**, imperative mood: "Fix roll-cycle offset in `ExcerptAppender`".
44 | 2. Reference the JIRA/GitHub issue if it exists.
45 | 3. In *body*: *root cause -> fix -> measurable impact* (latency, allocation, etc.). Use ASCII bullet points.
46 | 4. **Run `mvn verify`** again after rebasing.
47 |
48 | ## What to ask the reviewers
49 |
50 | * *Is this AsciiDoc documentation precise enough for a clean-room re-implementation?*
51 | * Does the Javadoc explain the code's *why* and *how* that a junior developer would not be expected to work out?
52 | * Are the documentation, tests and code updated together so the change is clear?
53 | * Does the commit point back to the relevant requirement or decision tag?
54 | * Would an example or small diagram help future maintainers?
55 |
56 | ## Project requirements
57 |
58 | See the [Decision Log](src/main/adoc/decision-log.adoc) for the latest project decisions.
59 | See the [Project Requirements](src/main/adoc/project-requirements.adoc) for details on project requirements.
60 |
61 | ## Elevating the Workflow with Real-Time Documentation
62 |
63 | Building upon our existing Iterative Workflow, the newest recommendation is to emphasise *real-time updates* to documentation.
64 | Ensure the relevant `.adoc` files are updated when features, requirements, implementation details, or tests change.
65 | This tight loop informs the AI accurately and creates immediate clarity for all team members.
66 |
67 | ### Benefits of Real-Time Documentation
68 |
69 | * **Confidence in documentation**: Accurate docs prevent miscommunications that derail real-world outcomes.
70 | * **Reduced drift**: Real-time updates keep requirements, tests and code aligned.
71 | * **Faster feedback**: AI can quickly highlight inconsistencies when everything is in sync.
72 | * **Better quality**: Frequent checks align the implementation with the specified behaviour.
73 | * **Smoother onboarding**: Up-to-date AsciiDoc clarifies the system for new developers.
74 | * **Incremental changes**: AIDE flags newly updated files so you can keep the documentation synchronised.
75 |
76 | ### Best Practices
77 |
78 | * **Maintain Sync**: Keep documentation (AsciiDoc), tests, and code synchronised in version control. Changes in one area should prompt reviews and potential updates in the others.
79 | * **Doc-First for New Work**: For *new* features or requirements, aim to update documentation first, then use AI to help produce or refine corresponding code and tests. For refactoring or initial bootstrapping, updates might flow from code/tests back to documentation, which should then be reviewed and finalised.
80 | * **Small Commits**: Each commit should ideally relate to a single requirement or coherent change, making reviews easier for humans and AI analysis tools.
81 | - **Team Buy-In**: Encourage everyone to review AI outputs critically and contribute to maintaining the synchronicity of all artefacts.
82 |
83 | ## AI Agent Guidelines
84 |
85 | When using AI agents to assist with development, please adhere to the following guidelines:
86 |
87 | * **Respect the Language & Character-set Policy**: Ensure all AI-generated content follows the British English and ASCII-7 guidelines outlined above.
88 | Focus on Clarity: AI-generated documentation should be clear and concise and add value beyond what is already present in the code or existing documentation.
89 | * **Avoid Redundancy**: Do not generate content that duplicates existing documentation or code comments unless it provides additional context or clarification.
90 | * **Review AI Outputs**: Always review AI-generated content for accuracy, relevance, and adherence to the project's documentation standards before committing it to the repository.
91 |
92 | ## Company-Wide Tagging
93 |
94 | This section records **company-wide** decisions that apply to *all* Chronicle projects. All identifiers use the --xxx prefix. The `xxx` are unique across in the same Scope even if the tags are different. Component-specific decisions live in their xxx-decision-log.adoc files.
95 |
96 | ### Tag Taxonomy (Nine-Box Framework)
97 |
98 | To improve traceability, we adopt the Nine-Box taxonomy for requirement and decision identifiers. These tags are used in addition to the existing ALL prefix, which remains reserved for global decisions across every project.
99 |
100 | .Adopt a Nine-Box Requirement Taxonomy
101 |
102 | |Tag | Scope | Typical examples |
103 | |----|-------|------------------|
104 | |FN |Functional user-visible behaviour | Message routing, business rules |
105 | |NF-P |Non-functional - Performance | Latency budgets, throughput targets |
106 | |NF-S |Non-functional - Security | Authentication method, TLS version |
107 | |NF-O |Non-functional - Operability | Logging, monitoring, health checks |
108 | |TEST |Test / QA obligations | Chaos scenarios, benchmarking rigs |
109 | |DOC |Documentation obligations | Sequence diagrams, user guides |
110 | |OPS |Operational / DevOps concerns | Helm values, deployment checklist |
111 | |UX |Operator or end-user experience | CLI ergonomics, dashboard layouts |
112 | |RISK |Compliance / risk controls | GDPR retention, audit trail |
113 |
114 | `ALL-*` stays global, case-exact tags. Pick one primary tag if multiple apply.
115 |
116 | ### Decision Record Template
117 |
118 | ```asciidoc
119 | === [Identifier] Title of Decision
120 |
121 | Date:: YYYY-MM-DD
122 | Context::
123 | * What is the issue that this decision addresses?
124 | * What are the driving forces, constraints, and requirements?
125 | Decision Statement::
126 | * What is the change that is being proposed or was decided?
127 | Alternatives Considered::
128 | * [Alternative 1 Name/Type]:
129 | ** *Description:* Brief description of the alternative.
130 | ** *Pros:* ...
131 | ** *Cons:* ...
132 | * [Alternative 2 Name/Type]:
133 | ** *Description:* Brief description of the alternative.
134 | ** *Pros:* ...
135 | ** *Cons:* ...
136 | Rationale for Decision::
137 | * Why was the chosen decision selected?
138 | * How does it address the context and outweigh the cons of alternatives?
139 | Impact & Consequences::
140 | * What are the positive and negative consequences of this decision?
141 | * How does this decision affect the system, developers, users, or operations?
142 | - What are the trade-offs made?
143 | Notes/Links::
144 | ** (Optional: Links to relevant issues, discussions, documentation, proof-of-concepts)
145 | ```
146 |
147 | ## Asciidoc formatting guidelines
148 |
149 | ### List Indentation
150 |
151 | Do not rely on indentation for list items in AsciiDoc documents. Use the following pattern instead:
152 |
153 | ```asciidoc
154 | section:: Top Level Section
155 | * first level
156 | ** nested level
157 | ```
158 |
159 | ### Emphasis and Bold Text
160 |
161 | In AsciiDoc, an underscore `_` is _emphasis_; `*text*` is *bold*.
162 |
--------------------------------------------------------------------------------
/src/main/java/net/openhft/chronicle/threads/MonitorEventLoop.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0
3 | */
4 | package net.openhft.chronicle.threads;
5 |
6 | import net.openhft.chronicle.core.Jvm;
7 | import net.openhft.chronicle.core.annotation.HotMethod;
8 | import net.openhft.chronicle.core.io.Closeable;
9 | import net.openhft.chronicle.core.io.SimpleCloseable;
10 | import net.openhft.chronicle.core.threads.EventHandler;
11 | import net.openhft.chronicle.core.threads.EventLoop;
12 | import net.openhft.chronicle.core.threads.HandlerPriority;
13 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException;
14 | import org.jetbrains.annotations.NotNull;
15 |
16 | import java.util.List;
17 | import java.util.concurrent.CopyOnWriteArrayList;
18 | import java.util.concurrent.ExecutorService;
19 | import java.util.concurrent.Executors;
20 |
21 | import static net.openhft.chronicle.threads.Threads.*;
22 |
23 | /**
24 | * Event loop dedicated to low-frequency monitoring tasks. Handlers added to this loop are
25 | * expected to use {@link HandlerPriority#MONITOR} so they do not interfere with application
26 | * work. The provided {@link Pauser} determines how often the handlers are polled and is reset
27 | * whenever a handler reports activity.
28 | *
29 | *
The loop waits for {@link #MONITOR_INITIAL_DELAY_MS} milliseconds after startup before
30 | * invoking any handlers.