├── docs └── images │ ├── image1.png │ ├── EventLoop.png │ ├── eventloop.jpg │ ├── pauserModes.png │ ├── Thread Affinity_line.png │ └── source │ └── image1.svg ├── src ├── main │ ├── java │ │ └── net │ │ │ └── openhft │ │ │ └── chronicle │ │ │ └── threads │ │ │ ├── example │ │ │ └── package-info.java │ │ │ ├── EventHandlers.java │ │ │ ├── TimingPauser.java │ │ │ ├── internal │ │ │ ├── package-info.java │ │ │ ├── EventLoopUtil.java │ │ │ ├── EventLoopStateRenderer.java │ │ │ ├── EventLoopThreadHolder.java │ │ │ ├── ThreadMonitorHarness.java │ │ │ └── ThreadsThreadHolder.java │ │ │ ├── NotifyDiskLow.java │ │ │ ├── ThreadMonitor.java │ │ │ ├── package-info.java │ │ │ ├── NotifyDiskLowLogWarn.java │ │ │ ├── ExecutorFactory.java │ │ │ ├── EventLoopLifecycle.java │ │ │ ├── VanillaExecutorFactory.java │ │ │ ├── PauserMonitorFactory.java │ │ │ ├── CoreEventLoop.java │ │ │ ├── ThreadHolder.java │ │ │ ├── TimedEventHandler.java │ │ │ ├── EventLoops.java │ │ │ ├── BusyPauser.java │ │ │ ├── BusyTimedPauser.java │ │ │ ├── NamedThreadFactory.java │ │ │ ├── PauserMode.java │ │ │ ├── YieldingPauser.java │ │ │ ├── ThreadMonitors.java │ │ │ ├── MilliPauser.java │ │ │ ├── AbstractLifecycleEventLoop.java │ │ │ ├── BlockingEventLoop.java │ │ │ ├── DiskSpaceMonitor.java │ │ │ └── MonitorEventLoop.java │ └── docs │ │ ├── thread-thread-safety-guide.adoc │ │ ├── thread-performance-targets.adoc │ │ ├── thread-operational-controls.adoc │ │ ├── thread-security-review.adoc │ │ └── thread-architecture-overview.adoc └── test │ └── java │ └── net │ └── openhft │ └── chronicle │ └── threads │ ├── internal │ ├── ThreadsThreadHolderTest.java │ ├── EventLoopStateRendererTest.java │ └── ThreadMonitorHarnessTest.java │ ├── EventGroupBadAffinityTest.java │ ├── LongPauserBenchmark.java │ ├── YieldingPauserTest.java │ ├── BlockingEventLoopTest.java │ ├── PauserTest.java │ ├── PauserTimeoutTest.java │ ├── ThreadMonitorsTest.java │ ├── Issue251Test.java │ ├── LongPauserTest.java │ ├── DiskSpaceMonitorTest.java │ ├── example │ └── SingleAndMultiThreadedExample.java │ ├── ThreadsTest.java │ ├── StopVCloseTest.java │ ├── EventGroupStressTest.java │ ├── ThreadsTestCommon.java │ ├── TestEventHandlers.java │ ├── EventLoopsTest.java │ └── LoopIntrospectionTest.java ├── system.properties ├── LICENSE.adoc ├── .gitignore ├── systemProperties.adoc └── AGENTS.md /docs/images/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenHFT/Chronicle-Threads/HEAD/docs/images/image1.png -------------------------------------------------------------------------------- /docs/images/EventLoop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenHFT/Chronicle-Threads/HEAD/docs/images/EventLoop.png -------------------------------------------------------------------------------- /docs/images/eventloop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenHFT/Chronicle-Threads/HEAD/docs/images/eventloop.jpg -------------------------------------------------------------------------------- /docs/images/pauserModes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenHFT/Chronicle-Threads/HEAD/docs/images/pauserModes.png -------------------------------------------------------------------------------- /docs/images/Thread Affinity_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenHFT/Chronicle-Threads/HEAD/docs/images/Thread Affinity_line.png -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/example/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | /** 5 | * Example code showing basic Chronicle Threads usage. 6 | *

7 | * The classes in this package demonstrate simple patterns for configuring event 8 | * loops and pausers. These classes are illustrative only and should not be used 9 | * as production code. 10 | */ 11 | package net.openhft.chronicle.threads.example; 12 | -------------------------------------------------------------------------------- /system.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | # 4 | 5 | # Tracing if resources are closed/released correctly. 6 | jvm.resource.tracing=true 7 | disable.resource.warning=true 8 | 9 | disable.discard.warning=false 10 | # for profiling 11 | jvm.safepoint.enabled=false 12 | # reduce logging of the announcer 13 | chronicle.announcer.disable=true 14 | pauser.minProcessors=1 15 | # to monitor disk space every 1 second in testing 16 | chronicle.disk.monitor.period=1 17 | 18 | # check it can be changed in a test 19 | chronicle.disk.monitor.threshold.percent=5 20 | -------------------------------------------------------------------------------- /LICENSE.adoc: -------------------------------------------------------------------------------- 1 | 2 | == Copyright 2016-2025 chronicle.software 3 | 4 | Licensed under the *Apache License, Version 2.0* (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/internal/ThreadsThreadHolderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | class ThreadsThreadHolderTest extends net.openhft.chronicle.threads.ThreadsTestCommon { 11 | 12 | @Test 13 | void testNanosecondsToMillisWithTenthsPrecision() { 14 | assertEquals(1.2d, ThreadsThreadHolder.nanosecondsToMillisWithTenthsPrecision(1_234_567), 0.000000001); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/EventHandlers.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventHandler; 7 | 8 | /** 9 | * Placeholder enum that holds simple {@link EventHandler} constants. 10 | * The only entry is {@link #NOOP}, whose {@code action()} method always 11 | * returns {@code false}. 12 | */ 13 | enum EventHandlers implements EventHandler { 14 | NOOP { 15 | @Override 16 | public boolean action() { 17 | return false; 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/TimingPauser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import java.util.concurrent.TimeUnit; 7 | import java.util.concurrent.TimeoutException; 8 | 9 | /** 10 | * Marker interface to show we support {@link #pause(long, TimeUnit)} 11 | */ 12 | public interface TimingPauser extends Pauser { 13 | 14 | /** 15 | * Pauses but keep tracks of accumulated pause time and throws if timeout exceeded 16 | * 17 | * @param timeout timeout 18 | * @param timeUnit unit 19 | * @throws TimeoutException thrown if timeout passes 20 | */ 21 | @Override 22 | void pause(long timeout, TimeUnit timeUnit) throws TimeoutException; 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | /** 5 | * This package and any and all sub-packages contains strictly internal classes for this Chronicle library. 6 | * Internal classes shall never be used directly. 7 | *

8 | * Specifically, the following actions (including, but not limited to) are not allowed 9 | * on internal classes and packages: 10 | *

15 | *

16 | * The classes in this package and any sub-package are subject to 17 | * changes at any time for any reason. 18 | */ 19 | package net.openhft.chronicle.threads.internal; 20 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/NotifyDiskLow.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import java.nio.file.FileStore; 7 | 8 | /** 9 | * Receives notifications from the disk space monitor. 10 | * 11 | *

The {@link #panic(FileStore)} method is called when a file store 12 | * is critically short of space. Implementations should act immediately 13 | * as memory-mapped writes may fail.

14 | * 15 | *

The {@link #warning(double, FileStore)} method signals that a disk 16 | * is nearing its limit. The percentage parameter denotes how full the disk 17 | * currently is.

18 | */ 19 | public interface NotifyDiskLow { 20 | void panic(FileStore fileStore); 21 | 22 | void warning(double diskSpaceFullPercent, FileStore fileStore); 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/ThreadMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventHandler; 7 | import net.openhft.chronicle.core.threads.HandlerPriority; 8 | import org.jetbrains.annotations.NotNull; 9 | 10 | /** 11 | * Event handler used by the monitor loop to detect threads that appear to be 12 | * blocked. Instances are typically produced by {@link ThreadMonitors}. 13 | */ 14 | public interface ThreadMonitor extends EventHandler { 15 | /** 16 | * Returns {@link HandlerPriority#MONITOR} so monitoring does not compete 17 | * with application handlers. 18 | */ 19 | @Override 20 | default @NotNull HandlerPriority priority() { 21 | return HandlerPriority.MONITOR; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### How to update 2 | # This is copied from OpenHFT/.gitignore 3 | # update the original and run OpenHFT/update_gitignore.sh 4 | 5 | ### Compiled class file 6 | *.class 7 | 8 | ### Package Files 9 | *.jar 10 | *.war 11 | *.ear 12 | 13 | ### Log file 14 | *.log 15 | 16 | ### IntelliJ 17 | *.iml 18 | *.ipr 19 | *.iws 20 | .idea 21 | compat_reports 22 | .attach_pid* 23 | 24 | ### Virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 25 | hs_err_pid* 26 | 27 | ### Maven template 28 | target/ 29 | pom.xml.tag 30 | pom.xml.releaseBackup 31 | pom.xml.versionsBackup 32 | pom.xml.next 33 | release.properties 34 | 35 | ### Eclipse template 36 | *.pydevproject 37 | .metadata 38 | .gradle 39 | bin/ 40 | tmp/ 41 | *.tmp 42 | *.bak 43 | *.swp 44 | *~.nib 45 | local.properties 46 | .classpath 47 | .project 48 | .settings/ 49 | .loadpath 50 | 51 | ### Queue files 52 | *.cq4t 53 | *.cq4 54 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | /** 5 | * Event loop implementations and utilities for running deterministic 6 | * single-threaded event handlers. {@link net.openhft.chronicle.core.threads.EventLoop EventLoop} 7 | * implementations are aggregated by {@link net.openhft.chronicle.threads.EventGroup EventGroup}. 8 | * Pauser strategies ({@link net.openhft.chronicle.threads.Pauser Pauser}) control the 9 | * trade off between latency and CPU use when no work is available. 10 | *

11 | * Typical usage involves building an {@code EventGroup} via 12 | * {@link net.openhft.chronicle.threads.EventGroupBuilder}, installing handlers then calling 13 | * {@code start()}. Handlers are executed on the same thread, avoiding locks in hot paths. 14 | *

15 | * Behaviour such as loop monitoring or thread counts can be configured via system properties 16 | * (see {@code systemProperties.adoc}). 17 | */ 18 | package net.openhft.chronicle.threads; 19 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/EventGroupBadAffinityTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventLoop; 7 | import org.junit.jupiter.api.Test; 8 | import org.junit.jupiter.api.Timeout; 9 | 10 | import java.util.concurrent.TimeoutException; 11 | 12 | import static org.junit.jupiter.api.Assertions.assertThrows; 13 | 14 | class EventGroupBadAffinityTest extends ThreadsTestCommon { 15 | 16 | /** 17 | * Ensures that an invalid CPU affinity string fails fast so that 18 | * misconfigured deployments do not run with unexpected processor binding. 19 | */ 20 | @Timeout(5_000) 21 | @Test 22 | void testInvalidAffinity() { 23 | expectException("Cannot parse 'xxx'"); 24 | ignoreException("Timed out waiting for start!"); 25 | try (final EventLoop eventGroup = EventGroup.builder().withBinding("xxx").build()) { 26 | assertThrows(TimeoutException.class, eventGroup::start); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/NotifyDiskLowLogWarn.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | 8 | import java.nio.file.FileStore; 9 | 10 | /** 11 | * Logs to the configured {@link Jvm} logger when disk space is low. 12 | * The {@link #panic(FileStore)} method emits an error level message 13 | * and {@link #warning(double, FileStore)} emits a warning. 14 | */ 15 | public class NotifyDiskLowLogWarn implements NotifyDiskLow { 16 | @Override 17 | public void panic(FileStore fileStore) { 18 | Jvm.error().on(DiskSpaceMonitor.class, "your disk " + fileStore + " is almost full, " + 19 | "warning: the JVM may crash if it undertakes an operation with a memory-mapped file."); 20 | } 21 | 22 | @Override 23 | public void warning(double diskSpaceFullPercent, FileStore fileStore) { 24 | Jvm.warn().on(DiskSpaceMonitor.class, "your disk " + fileStore 25 | + " is " + diskSpaceFullPercent + "% full, " + 26 | "warning: the JVM may crash if it undertakes an operation with a memory-mapped file and the disk is out of space."); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/EventLoopUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | 8 | /** 9 | * Configuration values for event loop behaviour. 10 | * 11 | *

The {@code ACCEPT_HANDLER_MOD_COUNT} system property specifies how often 12 | * new accept handlers are inserted to avoid starvation. A value of zero 13 | * disables this feature. If the property is absent the 14 | * {@link #DEFAULT_ACCEPT_HANDLER_MOD_COUNT default} is used. The 15 | * {@link #IS_ACCEPT_HANDLER_MOD_COUNT} flag reveals whether re-arming is 16 | * enabled.

17 | */ 18 | public enum EventLoopUtil { 19 | ; // none 20 | 21 | /** Fallback when {@code eventloop.accept.mod} is not set. */ 22 | private static final int DEFAULT_ACCEPT_HANDLER_MOD_COUNT = 128; 23 | 24 | /** Interval for re-adding accept handlers. */ 25 | public static final int ACCEPT_HANDLER_MOD_COUNT = 26 | Jvm.getInteger("eventloop.accept.mod", DEFAULT_ACCEPT_HANDLER_MOD_COUNT); 27 | 28 | /** True when accept handler re-arming is active. */ 29 | public static final boolean IS_ACCEPT_HANDLER_MOD_COUNT = ACCEPT_HANDLER_MOD_COUNT > 0; 30 | } 31 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/LongPauserBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | 10 | /** 11 | * Benchmark used to gauge the overhead of waking a {@link LongPauser}. 12 | * 13 | * A helper thread loops calling {@link LongPauser#pause()} and then yields. 14 | * The main thread repeatedly invokes {@link LongPauser#unpause()} a fixed 15 | * number of times and measures the elapsed time. Dividing the total by the 16 | * iteration count reveals the average cost of a single unpark operation. 17 | */ 18 | public final class LongPauserBenchmark { 19 | 20 | public static void main(String[] args) { 21 | final LongPauser pauser = new LongPauser(1, 1, 100, 1000, TimeUnit.MICROSECONDS); 22 | Thread thread = new Thread(() -> { 23 | while (!Thread.interrupted()) { 24 | pauser.pause(); 25 | Thread.yield(); 26 | } 27 | }); 28 | thread.start(); 29 | 30 | for (int t = 0; t < 3; t++) { 31 | long start = System.nanoTime(); 32 | int runs = 10000000; 33 | for (int i = 0; i < runs; i++) 34 | pauser.unpause(); 35 | long time = System.nanoTime() - start; 36 | System.out.printf("Average time to unpark was %,d ns%n", time / runs); 37 | Jvm.pause(20); 38 | } 39 | thread.interrupt(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/ExecutorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import java.util.concurrent.ExecutorService; 7 | import java.util.concurrent.ScheduledExecutorService; 8 | 9 | /** 10 | * Strategy interface for obtaining {@link ExecutorService} instances. 11 | * 12 | *

The Chronicle Threads utility relies on this abstraction so that 13 | * applications may plug in their own executor creation logic. The 14 | * supplied implementation can integrate with alternative concurrency 15 | * frameworks or simply wrap the standard JDK executors.

16 | */ 17 | public interface ExecutorFactory { 18 | 19 | /** 20 | * Creates or retrieves an {@link ExecutorService}. 21 | * 22 | * @param name base name for the threads created by the executor 23 | * @param threads requested thread count 24 | * @param daemon {@code true} if the threads should be daemon threads 25 | * @return a service suitable for running general tasks 26 | */ 27 | ExecutorService acquireExecutorService(String name, int threads, boolean daemon); 28 | 29 | /** 30 | * Creates or retrieves a {@link ScheduledExecutorService}. 31 | * 32 | * @param name base name for the threads created by the scheduler 33 | * @param daemon {@code true} if the threads should be daemon threads 34 | * @return a single-threaded scheduler 35 | */ 36 | ScheduledExecutorService acquireScheduledExecutorService(String name, boolean daemon); 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/EventLoopLifecycle.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | /** 7 | * The life-cycle of an event loop. The state moves from {@link #NEW} to 8 | * {@link #STARTED} when {@code start()} is invoked. A request to {@code stop()} 9 | * moves the loop to {@link #STOPPING} and once all handlers have completed it 10 | * becomes {@link #STOPPED}. 11 | *

12 | * Possible transitions include: 13 | *

14 |  *      +-------------------------------------------------+
15 |  *      |                                                 v
16 |  * +---------+    +-----------+    +-----------+    +-----------+
17 |  * |   NEW   |--->|  STARTED  |--->|  STOPPING |--->|  STOPPED  |
18 |  * +---------+    +-----------+    +-----------+    +-----------+
19 |  * 
20 | */ 21 | public enum EventLoopLifecycle { 22 | /** 23 | * The event loop has been created but not yet started. Only 24 | * {@code start()} or {@code stop()} are meaningful in this state. 25 | */ 26 | NEW(false), 27 | 28 | /** 29 | * The event loop is running. Calling {@code stop()} moves it to 30 | * {@link #STOPPING}. 31 | */ 32 | STARTED(false), 33 | 34 | /** 35 | * {@code stop()} has been called and handlers are finishing. Further calls 36 | * to {@code stop()} wait for completion. 37 | */ 38 | STOPPING(true), 39 | 40 | /** 41 | * The event loop has been stopped and cannot be restarted. 42 | */ 43 | STOPPED(true); 44 | 45 | private final boolean stopped; 46 | 47 | EventLoopLifecycle(boolean stopped) { 48 | this.stopped = stopped; 49 | } 50 | 51 | public boolean isStopped() { 52 | return stopped; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/YieldingPauserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.TimeoutException; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | import static org.junit.jupiter.api.Assertions.fail; 13 | 14 | class YieldingPauserTest extends ThreadsTestCommon { 15 | 16 | @Test 17 | void pause() { 18 | final int pauseTimeMillis = 100; 19 | final YieldingPauser tp = new YieldingPauser(pauseTimeMillis); 20 | for (int i = 0; i < 10; i++) { 21 | final long start = System.currentTimeMillis(); 22 | while (true) { 23 | try { 24 | tp.pause(pauseTimeMillis, TimeUnit.MILLISECONDS); 25 | if (System.currentTimeMillis() - start > 200) 26 | fail(); 27 | } catch (TimeoutException e) { 28 | final long time = System.currentTimeMillis() - start; 29 | // delta used to be 5 for Linux but occasionally we see it blow in Continuous Integration 30 | // a delta of 20 was used here, however in some situations in CI that was not sufficient: 31 | // org.opentest4j.AssertionFailedError: expected: <100.0> but was: <126.0> 32 | int delta = 30; 33 | // please don't add delta to pauseTimeMillis below - it makes this test flakier on Windows 34 | assertEquals(pauseTimeMillis, time, delta); 35 | tp.reset(); 36 | break; 37 | } 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/VanillaExecutorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import java.util.concurrent.ExecutorService; 7 | import java.util.concurrent.Executors; 8 | import java.util.concurrent.ScheduledExecutorService; 9 | 10 | /** 11 | * Default {@link ExecutorFactory} used by Chronicle Threads. 12 | * 13 | *

It creates standard JDK executor services backed by a 14 | * {@link NamedThreadFactory}. Single thread requests result in a 15 | * {@link java.util.concurrent.Executors#newSingleThreadExecutor single-thread} 16 | * pool, otherwise a fixed thread pool is returned. Scheduled executors are 17 | * always single-threaded.

18 | */ 19 | public enum VanillaExecutorFactory implements ExecutorFactory { 20 | /** sole instance used by default */ 21 | INSTANCE; 22 | 23 | /** 24 | * Provides an executor backed by a {@link NamedThreadFactory}. A single 25 | * thread executor is created when {@code threads} equals one, otherwise a 26 | * fixed thread pool is returned. 27 | */ 28 | @Override 29 | public ExecutorService acquireExecutorService(String name, int threads, boolean daemon) { 30 | NamedThreadFactory threadFactory = new NamedThreadFactory(name, daemon); 31 | return threads == 1 32 | ? Executors.newSingleThreadExecutor(threadFactory) 33 | : Executors.newFixedThreadPool(threads, threadFactory); 34 | } 35 | 36 | /** 37 | * Creates a single-thread {@link ScheduledExecutorService}. 38 | */ 39 | @Override 40 | public ScheduledExecutorService acquireScheduledExecutorService(String name, boolean daemon) { 41 | return Executors.newSingleThreadScheduledExecutor( 42 | new NamedThreadFactory(name, daemon)); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/PauserMonitorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventHandler; 7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 8 | 9 | import java.util.Iterator; 10 | import java.util.ServiceLoader; 11 | 12 | /** 13 | * Factory for {@link EventHandler} instances that observe a {@link Pauser}. 14 | * 15 | *

Implementations are discovered through Java's {@link ServiceLoader} 16 | * mechanism. When no implementation is found a no-op handler is returned.

17 | */ 18 | public interface PauserMonitorFactory { 19 | 20 | /** 21 | * Create an event handler that records the behaviour of a {@code pauser}. 22 | * Typical implementations will log the pause count or total time paused and 23 | * may alert if the pauser has remained idle for longer than {@code seconds}. 24 | * 25 | * @param pauser the {@link Pauser} to monitor 26 | * @param description label used in the monitor's {@code toString} 27 | * @param seconds threshold before reporting prolonged pauses 28 | * @return an event handler suitable for a monitoring loop 29 | */ 30 | EventHandler pauserMonitor(Pauser pauser, String description, int seconds); 31 | 32 | static PauserMonitorFactory load() { 33 | final Iterator iterator = ServiceLoader.load(PauserMonitorFactory.class).iterator(); 34 | return iterator.hasNext() ? 35 | iterator.next() : 36 | (pauser, description, seconds) -> new EventHandler() { 37 | @Override 38 | public boolean action() throws InvalidEventHandlerException { 39 | throw new InvalidEventHandlerException(); 40 | } 41 | @Override 42 | public String toString() { 43 | return "NOOP_PAUSER_MONITOR"; 44 | } 45 | }; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/CoreEventLoop.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventLoop; 7 | import org.jetbrains.annotations.NotNull; 8 | 9 | import java.util.function.BooleanSupplier; 10 | 11 | /** 12 | * Contract for the fast core loop used within an {@link EventGroup}. 13 | * 14 | *

The core loop runs on a dedicated thread and executes handlers 15 | * one by one. Implementations aim to minimise latency and usually rely 16 | * on a {@link net.openhft.chronicle.threads.Pauser} during idle periods. 17 | */ 18 | public interface CoreEventLoop extends EventLoop { 19 | 20 | /** 21 | * The value returned for {@link #loopStartNS()} when the event loop is not currently 22 | * executing an iteration 23 | */ 24 | long NOT_IN_A_LOOP = Long.MAX_VALUE; 25 | 26 | /** 27 | * The thread currently running the loop. 28 | * 29 | * @return the loop thread, or {@code null} if the loop has not yet started 30 | * or has finished 31 | */ 32 | Thread thread(); 33 | 34 | /** 35 | * Time in {@link System#nanoTime()} units when the current iteration began. 36 | * 37 | * @return the start time, or {@link #NOT_IN_A_LOOP} if the loop is idle 38 | */ 39 | long loopStartNS(); 40 | 41 | /** 42 | * Dump the stack trace when a monitor suspects the loop is blocked. 43 | * 44 | * @param message text to include in the log 45 | * @param finalCheck invoked after taking the stack trace; the state is 46 | * logged only when this returns {@code true} 47 | */ 48 | void dumpRunningState(@NotNull String message, @NotNull BooleanSupplier finalCheck); 49 | 50 | /** 51 | * Check whether the given thread is executing this loop. 52 | * 53 | *

Used by diagnostics to ignore activity from other threads.

54 | * 55 | * @param thread candidate thread 56 | * @return {@code true} if the loop is running on {@code thread} 57 | */ 58 | boolean isRunningOnThread(Thread thread); 59 | 60 | void privateGroup(boolean privateGroup); 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/BlockingEventLoopTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.InterruptedRuntimeException; 8 | import org.junit.jupiter.api.Test; 9 | 10 | import java.util.concurrent.BrokenBarrierException; 11 | import java.util.concurrent.CyclicBarrier; 12 | import java.util.concurrent.TimeUnit; 13 | import java.util.concurrent.TimeoutException; 14 | import java.util.concurrent.atomic.AtomicBoolean; 15 | 16 | import static org.junit.jupiter.api.Assertions.assertFalse; 17 | import static org.junit.jupiter.api.Assertions.assertTrue; 18 | 19 | /** 20 | * Verifies that a handler in a {@link BlockingEventLoop} is interrupted when 21 | * the loop is stopped while the calling thread continues unimpeded. 22 | */ 23 | class BlockingEventLoopTest extends ThreadsTestCommon { 24 | 25 | @Test 26 | void handlersAreInterruptedOnStop() throws TimeoutException { 27 | try (final BlockingEventLoop el = new BlockingEventLoop("test-blocking-loop")) { 28 | el.start(); 29 | 30 | AtomicBoolean wasStoppedSuccessfully = new AtomicBoolean(false); 31 | CyclicBarrier barrier = new CyclicBarrier(2); 32 | 33 | el.addHandler(() -> { 34 | waitQuietly(barrier); 35 | 36 | while (!Thread.currentThread().isInterrupted()) { 37 | Jvm.pause(10); 38 | } 39 | wasStoppedSuccessfully.set(true); 40 | return false; 41 | }); 42 | 43 | waitQuietly(barrier); 44 | Jvm.pause(10); 45 | el.stop(); 46 | 47 | TimingPauser pauser = Pauser.balanced(); 48 | while (!wasStoppedSuccessfully.get()) { 49 | pauser.pause(1, TimeUnit.SECONDS); 50 | } 51 | assertTrue(wasStoppedSuccessfully.get()); 52 | assertFalse(Thread.currentThread().isInterrupted()); 53 | } 54 | } 55 | 56 | private void waitQuietly(CyclicBarrier barrier) { 57 | try { 58 | barrier.await(); 59 | } catch (InterruptedException | BrokenBarrierException e) { 60 | Thread.currentThread().interrupt(); 61 | throw new InterruptedRuntimeException("Interrupted waiting at barrier"); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/ThreadHolder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 8 | 9 | /** 10 | * Supplies runtime details of a thread or event loop being monitored. The 11 | * associated {@link ThreadMonitor} uses this information to detect long blocks 12 | * or unexpected thread termination. 13 | */ 14 | public interface ThreadHolder { 15 | int TIMING_ERROR = Jvm.getInteger("threads.timing.error", 80_000_000); 16 | 17 | /** 18 | * Indicates whether the monitored thread is still running. 19 | * 20 | * @return {@code true} if the thread has not terminated 21 | * @throws InvalidEventHandlerException if the holder can no longer be queried 22 | */ 23 | boolean isAlive() throws InvalidEventHandlerException; 24 | 25 | /** 26 | * Called once the thread has ended so monitoring can be stopped or logged. 27 | */ 28 | void reportFinished(); 29 | 30 | /** 31 | * Clears any internal timers when a new loop iteration begins. 32 | */ 33 | void resetTimers(); 34 | 35 | /** 36 | * Get the {@link System#nanoTime()} at which the currently executing loop iteration started 37 | * 38 | * @return The time the current loop started, or {@link CoreEventLoop#NOT_IN_A_LOOP} if no iteration is executing 39 | */ 40 | long startedNS(); 41 | 42 | /** 43 | * Determines whether a block has exceeded the logging threshold. 44 | * 45 | * @param nowNS the current time in nanoseconds 46 | * @return {@code true} if logging should occur 47 | */ 48 | boolean shouldLog(long nowNS); 49 | 50 | /** 51 | * Produces a diagnostic dump when a stall is detected. 52 | * 53 | * @param startedNS when the loop iteration began 54 | * @param nowNS the time the dump is triggered 55 | */ 56 | void dumpThread(long startedNS, long nowNS); 57 | 58 | /** 59 | * Descriptive name used in log output. 60 | */ 61 | String getName(); 62 | 63 | /** 64 | * Notifies that the monitor thread itself was delayed. 65 | * 66 | * @param actionCallDelayNS time since the last monitor call in nanoseconds 67 | */ 68 | void monitorThreadDelayed(long actionCallDelayNS); 69 | 70 | /** 71 | * Maximum delay between monitor calls before a warning is triggered. 72 | * 73 | * @return tolerance in nanoseconds 74 | */ 75 | long timingToleranceNS(); 76 | } 77 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/PauserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.TimeoutException; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | import static org.junit.jupiter.api.Assertions.assertTrue; 13 | 14 | /** 15 | * Tests the behaviour of the various {@link Pauser} implementations. 16 | *

17 | * The suite verifies that pause counters start at zero and increment with each 18 | * call to {@link Pauser#pause()}. After {@link Pauser#unpause()} the test 19 | * asserts whether {@link Pauser#isBusy()} matches the pauser type. The 20 | * {@link BusyPauser} is additionally checked to confirm it does not record 21 | * pause counts and rejects timed pauses. 22 | */ 23 | 24 | class PauserTest extends ThreadsTestCommon { 25 | 26 | @Test 27 | void balanced() { 28 | doTest(Pauser.balanced()); 29 | } 30 | 31 | @Test 32 | void balancedUpToMillis1() { 33 | doTest(Pauser.balancedUpToMillis(1)); 34 | } 35 | 36 | @Test 37 | void busy() throws TimeoutException { 38 | Pauser pauser = BusyPauser.INSTANCE; 39 | assertEquals(0, pauser.countPaused()); 40 | assertEquals(0, pauser.timePaused()); 41 | pauser.pause(); 42 | try { 43 | pauser.pause(1, TimeUnit.MILLISECONDS); 44 | } catch (UnsupportedOperationException ignored) { 45 | } 46 | assertEquals(0, pauser.countPaused()); 47 | pauser.unpause(); 48 | assertTrue(pauser.isBusy()); 49 | } 50 | 51 | @Test 52 | void millis1() { 53 | doTest(Pauser.millis(1), 200); 54 | } 55 | 56 | @Test 57 | void sleepy() { 58 | doTest(Pauser.sleepy(), 200); 59 | } 60 | 61 | @Test 62 | void timedBusy() { 63 | doTest(Pauser.timedBusy()); 64 | } 65 | 66 | @Test 67 | void yielding() { 68 | doTest(Pauser.yielding()); 69 | } 70 | 71 | private void doTest(Pauser pauser) { 72 | doTest(pauser, 2000); 73 | } 74 | 75 | private void doTest(Pauser pauser, int count) { 76 | assertEquals(0, pauser.countPaused()); 77 | assertEquals(0, pauser.timePaused()); 78 | for (int i = 1; i < count; i++) { 79 | pauser.pause(); 80 | assertEquals(i, pauser.countPaused()); 81 | } 82 | pauser.unpause(); 83 | assertEquals(pauser.getClass().getSimpleName().contains("Busy"), 84 | pauser.isBusy()); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/EventLoopStateRenderer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.EventLoop; 8 | import net.openhft.chronicle.threads.AbstractLifecycleEventLoop; 9 | import net.openhft.chronicle.threads.CoreEventLoop; 10 | import org.jetbrains.annotations.Nullable; 11 | 12 | import java.lang.reflect.Field; 13 | 14 | /** 15 | * This is a utility to render a verbose summary of the state of an {@link EventLoop}. Useful for debugging. 16 | */ 17 | public enum EventLoopStateRenderer { 18 | INSTANCE; 19 | 20 | public String render(String name, @Nullable EventLoop eventLoop) { 21 | if (eventLoop == null) { 22 | return name + " event loop is null"; 23 | } 24 | StringBuilder builder = new StringBuilder(); 25 | builder.append(name).append(" event loop state\n"); 26 | builder.append("#toString(): ").append(eventLoop).append('\n'); 27 | builder.append("Closed: ").append(eventLoop.isClosed()).append('\n'); 28 | builder.append("Closing: ").append(eventLoop.isClosing()).append('\n'); 29 | addLifecycleDetails(builder, eventLoop); 30 | addCoreEventLoopDetails(builder, eventLoop); 31 | return builder.toString(); 32 | } 33 | 34 | private void addCoreEventLoopDetails(StringBuilder builder, EventLoop eventLoop) { 35 | if (eventLoop instanceof CoreEventLoop) { 36 | Thread t = ((CoreEventLoop) eventLoop).thread(); 37 | if (t != null) { 38 | builder.append("Thread state: ").append(t.getState()).append('\n'); 39 | final StackTraceElement[] stackTrace = t.getStackTrace(); 40 | if (stackTrace.length > 0) { 41 | builder.append("Stack trace:"); 42 | Jvm.trimStackTrace(builder, stackTrace); 43 | } 44 | } else { 45 | builder.append("Thread is null\n"); 46 | } 47 | } 48 | } 49 | 50 | private void addLifecycleDetails(StringBuilder builder, EventLoop eventLoop) { 51 | if (eventLoop instanceof AbstractLifecycleEventLoop) { 52 | try { 53 | final Field lifecycle = Jvm.getField(eventLoop.getClass(), "lifecycle"); 54 | builder.append("Lifecycle: ").append(lifecycle.get(eventLoop)).append('\n'); 55 | } catch (IllegalAccessException e) { 56 | Jvm.warn().on(EventLoopStateRenderer.class, "Error getting the lifecycle for " + eventLoop.getClass().getName()); 57 | } 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/TimedEventHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventHandler; 7 | import net.openhft.chronicle.core.threads.HandlerPriority; 8 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 9 | import org.jetbrains.annotations.NotNull; 10 | 11 | /** 12 | * Base {@link EventHandler} that schedules itself using 13 | * the return value of {@link #timedAction()}. 14 | *

15 | * When {@code action()} is invoked the handler checks whether the 16 | * current time has passed {@code nextRunNS}. If so it performs the 17 | * work and asks {@code timedAction()} how many micro-seconds to wait 18 | * before running again. A negative delay signals that the handler has 19 | * finished and should be removed. 20 | * 21 | *

22 |  * class HeartbeatHandler extends TimedEventHandler {
23 |  *     @Override
24 |  *     protected long timedAction() {
25 |  *         sendHeartbeat();
26 |  *         return 500_000; // run again in half a second
27 |  *     }
28 |  * }
29 |  * 
30 | */ 31 | public abstract class TimedEventHandler implements EventHandler { 32 | /** next scheduled run time in {@link System#nanoTime()} units. */ 33 | private long nextRunNS = 0; 34 | 35 | /** 36 | * Executes the handler when the scheduled time has arrived. 37 | *

38 | * If {@code System.nanoTime()} is greater than or equal to 39 | * {@code nextRunNS} the handler calls {@link #timedAction()} and 40 | * stores the returned delay to compute the next run time. The delay 41 | * is specified in micro-seconds and converted to nano-seconds. A 42 | * negative delay causes the method to return {@code true} so the 43 | * event loop can drop this handler. 44 | */ 45 | @Override 46 | public boolean action() throws InvalidEventHandlerException { 47 | long now = System.nanoTime(); 48 | if (nextRunNS <= now) { 49 | long delayUS = timedAction(); 50 | if (delayUS < 0) 51 | return true; 52 | nextRunNS = now + delayUS * 1000; 53 | } 54 | return false; 55 | } 56 | 57 | /** 58 | * Performs the timed work and specifies the delay until the next call. 59 | * 60 | * @return delay in micro-seconds. A negative value means the handler has 61 | * finished and {@code action()} should return {@code true}. 62 | */ 63 | protected abstract long timedAction() throws InvalidEventHandlerException; 64 | 65 | @NotNull 66 | @Override 67 | public HandlerPriority priority() { 68 | return HandlerPriority.TIMER; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /systemProperties.adoc: -------------------------------------------------------------------------------- 1 | == System Properties 2 | 3 | Chronicle Threads reads several system properties at start up. 4 | These values tune event loops, pausing strategies, monitoring intervals and disk space checks. 5 | All properties may be supplied on the command line with `-D` flags. 6 | 7 | NOTE: All boolean properties below are read using link:https://javadoc.io/static/net.openhft/chronicle-core/2.23ea13/net/openhft/chronicle/core/Jvm.html#getBoolean-java.lang.String-[net.openhft.chronicle.core.Jvm.getBoolean(java.lang.String)], and so are enabled if either `-Dflag` or `-Dflag=true` or `-Dflag=yes`. 8 | 9 | === Disk monitoring 10 | 11 | [cols=4*,options="header"] 12 | |=== 13 | | Property Key | Default | Description | Java Variable Name (Type) 14 | | chronicle.disk.monitor.disable | `false` | Disable the background disk space monitor | _DISABLED_ (boolean) 15 | | chronicle.disk.monitor.threshold.percent | 5% | Issue warnings when free space drops below this percentage | _thresholdPercentage_ (int) 16 | | disk.monitor.deleted.warning | `false` | Warn if disk space cannot be determined | _WARN_DELETED_ (boolean) 17 | |=== 18 | 19 | === Event loops 20 | 21 | [cols=4*,options="header"] 22 | |=== 23 | | Property Key | Default | Description | Java Variable Name (Type) 24 | | eventloop.accept.mod | 128 | Prevent starvation by inserting new handlers every modulo iteration | _ACCEPT_HANDLER_MOD_COUNT_ (int) 25 | | eventGroup.conc.threads | processors/4 | Number of concurrent event loop threads (minimum one) | _CONC_THREADS_ (int) 26 | | eventGroup.wait.to.start.ms | 2_000 ms | Delay before the core event loop begins | _WAIT_TO_START_MS_ (long) 27 | | replicationEventPauseTime | 20 ms | Pause time used for replication event pausers | _REPLICATION_EVENT_PAUSE_TIME_ (int) 28 | | REPLICATION_MONITOR_INTERVAL_MS | 500 ms | Interval for monitoring replication loops | _REPLICATION_MONITOR_INTERVAL_MS_ (long) 29 | |=== 30 | 31 | === Pausers 32 | 33 | [cols=4*,options="header"] 34 | |=== 35 | | Property Key | Default | Description | Java Variable Name (Type) 36 | | pauser.minProcessors | 4 | Minimum number of processors required before busy pausing is used | _MIN_PROCESSORS_ (int) 37 | |=== 38 | 39 | === Monitoring 40 | 41 | [cols=4*,options="header"] 42 | |=== 43 | | Property Key | Default | Description | Java Variable Name (Type) 44 | | disableLoopBlockMonitor | `false` | Disable loop block monitoring | _ENABLE_LOOP_BLOCK_MONITOR_ (boolean) 45 | | ignoreThreadMonitorEventHandler | `false` | If enabled, throw an exception when thread monitoring fails | _IGNORE_THREAD_MONITOR_EVENT_HANDLER_ (boolean) 46 | | MONITOR_INTERVAL_MS | 100 ms | Sampling interval for monitoring core threads | _MONITOR_INTERVAL_MS_ (long) 47 | | SHUTDOWN_WAIT_MS | 500 ms | Time to wait for services to stop on shutdown | _SHUTDOWN_WAIT_MILLIS_ (long) 48 | | threads.timing.error | 80_000_000 ns | Allowed timing error for loop execution | _TIMING_ERROR_ (int) 49 | |=== 50 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/EventLoops.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.EventLoop; 8 | 9 | import java.util.ArrayList; 10 | import java.util.Arrays; 11 | import java.util.Collection; 12 | import java.util.List; 13 | import java.util.concurrent.Callable; 14 | import java.util.concurrent.ExecutionException; 15 | import java.util.concurrent.ForkJoinPool; 16 | import java.util.concurrent.Future; 17 | 18 | /** 19 | * Utility methods for working with {@link EventLoop EventLoops}. At present the 20 | * class only supplies a helper to stop several loops at once. 21 | */ 22 | public final class EventLoops { 23 | 24 | // Suppresses default constructor, ensuring non-instantiability. 25 | private EventLoops() { 26 | } 27 | 28 | /** 29 | * Stops many {@link EventLoop}s concurrently using {@link ForkJoinPool#commonPool()}. 30 | * The call blocks until every {@code EventLoop.stop()} has finished. Null 31 | * values or collections containing nulls are ignored. Each task runs in the 32 | * common pool and any {@link ExecutionException} is logged. If interrupted 33 | * while waiting the interrupt status is restored. 34 | * 35 | * @param eventLoops a list of {@link EventLoop}s or collections of them 36 | */ 37 | public static void stopAll(Object... eventLoops) { 38 | List> eventLoopStoppers = new ArrayList<>(); 39 | addAllEventLoopStoppers(Arrays.asList(eventLoops), eventLoopStoppers); 40 | try { 41 | for (Future voidFuture : ForkJoinPool.commonPool().invokeAll(eventLoopStoppers)) { 42 | try { 43 | voidFuture.get(); 44 | } catch (ExecutionException e) { 45 | Jvm.error().on(EventLoops.class, "Error stopping event loop", e); 46 | } 47 | } 48 | } catch (InterruptedException e) { 49 | Jvm.warn().on(EventLoops.class, "Interrupted waiting for event loops to stop"); 50 | Thread.currentThread().interrupt(); 51 | } 52 | } 53 | 54 | private static void addAllEventLoopStoppers(Collection collection, List> stoppers) { 55 | for (Object o : collection) { 56 | if (o == null) { 57 | continue; 58 | } 59 | if (o instanceof EventLoop) { 60 | stoppers.add(() -> { 61 | ((EventLoop) o).stop(); 62 | return null; 63 | }); 64 | } else if (o instanceof Collection) { 65 | addAllEventLoopStoppers((Collection) o, stoppers); 66 | } else { 67 | Jvm.warn().on(EventLoops.class, "Unexpected object passed to EventLoops.stop(): " + o); 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/PauserTimeoutTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.TimeoutException; 10 | 11 | import static org.junit.jupiter.api.Assertions.fail; 12 | 13 | /** 14 | * Exercises {@link Pauser#pause(long, java.util.concurrent.TimeUnit)} with a 15 | * timeout across several implementations. Pausers that support the timeout 16 | * contract are called repeatedly until half the period has passed without a 17 | * {@link TimeoutException}. After the interval expires, the next call must 18 | * throw a {@link TimeoutException}. Pausers that do not implement this 19 | * behaviour are expected to throw {@link UnsupportedOperationException} when a 20 | * timeout is supplied. 21 | */ 22 | class PauserTimeoutTest extends ThreadsTestCommon { 23 | private Pauser[] pausersSupportTimeout = { 24 | Pauser.balanced(), 25 | Pauser.sleepy(), 26 | new BusyTimedPauser(), 27 | new YieldingPauser(0), 28 | new LongPauser(0, 0, 1, 10, TimeUnit.MILLISECONDS), 29 | // new MilliPauser(1) 30 | }; 31 | private Pauser[] pausersDontSupportTimeout = { 32 | BusyPauser.INSTANCE}; 33 | 34 | /** 35 | * Confirms that pausers honour the timeout parameter. Each pauser is 36 | * called in a loop until half the timeout has elapsed and should not throw. 37 | * Once the timeout has expired the next call must raise 38 | * {@link TimeoutException}. 39 | */ 40 | @Test 41 | void pausersSupportTimeout() { 42 | int timeoutNS = 100_000_000; 43 | for (Pauser p : pausersSupportTimeout) { 44 | long start = System.nanoTime(); 45 | do try { 46 | p.pause(timeoutNS, TimeUnit.NANOSECONDS); 47 | } catch (TimeoutException e) { 48 | fail(p + " timed out"); 49 | } while (System.nanoTime() < start + timeoutNS / 2); 50 | while (System.nanoTime() < start + timeoutNS * 5 / 4) ; 51 | try { 52 | p.pause(timeoutNS, TimeUnit.NANOSECONDS); 53 | } catch (TimeoutException e) { 54 | continue; 55 | } 56 | fail(p + " did not timeoutNS"); 57 | } 58 | } 59 | 60 | /** 61 | * Checks that pausers without timeout capability throw 62 | * {@link UnsupportedOperationException} when a timeout is supplied. 63 | */ 64 | @Test 65 | void pausersDontSupportTimeout() throws TimeoutException { 66 | for (Pauser p : pausersDontSupportTimeout) { 67 | try { 68 | p.pause(100, TimeUnit.MILLISECONDS); 69 | } catch (UnsupportedOperationException e) { 70 | continue; 71 | } 72 | fail(p + " did not throw"); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/ThreadMonitorsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.concurrent.atomic.AtomicBoolean; 12 | import java.util.function.Consumer; 13 | import java.util.function.LongSupplier; 14 | 15 | import static org.junit.jupiter.api.Assertions.assertEquals; 16 | import static org.junit.jupiter.api.Assertions.assertFalse; 17 | import static org.junit.jupiter.api.Assertions.assertTrue; 18 | 19 | class ThreadMonitorsTest { 20 | 21 | @Test 22 | void forThreadLogsWhenEnabled() throws InvalidEventHandlerException { 23 | RecordingConsumer consumer = new RecordingConsumer(); 24 | AtomicBoolean enabled = new AtomicBoolean(true); 25 | ThreadMonitor monitor = ThreadMonitors.forThread( 26 | "loop", 27 | 1_000_000L, 28 | new DeterministicLongSupplier(-5_000_000L, -5_000_000L), 29 | Thread::currentThread, 30 | enabled::get, 31 | consumer 32 | ); 33 | 34 | boolean result = monitor.action(); 35 | 36 | assertFalse(result); 37 | assertEquals(1, consumer.messages.size()); 38 | assertTrue(consumer.messages.get(0).contains("loop")); 39 | } 40 | 41 | @Test 42 | void forThreadSkipsLoggingWhenDisabled() throws InvalidEventHandlerException { 43 | List messages = new ArrayList<>(); 44 | AtomicBoolean enabled = new AtomicBoolean(false); 45 | ThreadMonitor monitor = ThreadMonitors.forThread( 46 | "loop", 47 | 1_000_000L, 48 | new DeterministicLongSupplier(-5_000_000L, -5_000_000L), 49 | Thread::currentThread, 50 | enabled::get, 51 | messages::add 52 | ); 53 | 54 | boolean result = monitor.action(); 55 | 56 | assertFalse(result); 57 | assertTrue(messages.isEmpty()); 58 | } 59 | 60 | private static final class DeterministicLongSupplier implements LongSupplier { 61 | private final long[] values; 62 | private int index; 63 | 64 | DeterministicLongSupplier(long... values) { 65 | this.values = values; 66 | } 67 | 68 | @Override 69 | public long getAsLong() { 70 | if (index >= values.length) { 71 | return values[values.length - 1]; 72 | } 73 | return values[index++]; 74 | } 75 | } 76 | 77 | private static final class RecordingConsumer implements Consumer { 78 | private final List messages = new ArrayList<>(); 79 | 80 | @Override 81 | public void accept(String message) { 82 | messages.add(message); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/BusyPauser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.TimeoutException; 10 | 11 | /** 12 | * Busy-spin implementation of {@link Pauser}. 13 | *

14 | * The pauser repeatedly invokes {@link Jvm#nanoPause()} and never yields or 15 | * sleeps. A thread using this pauser therefore consumes an entire CPU core 16 | * while waiting. No state is kept, so most lifecycle methods are no-ops. 17 | */ 18 | public enum BusyPauser implements Pauser { 19 | INSTANCE; 20 | 21 | /** 22 | * Does nothing as {@code BusyPauser} does not maintain state that requires resetting. 23 | */ 24 | @Override 25 | public void reset() { 26 | // Do nothing 27 | } 28 | 29 | /** 30 | * Performs a single busy-spin step by calling {@link Jvm#nanoPause()}. 31 | * The call neither yields nor sleeps and therefore burns CPU cycles. 32 | */ 33 | @Override 34 | public void pause() { 35 | Jvm.nanoPause(); 36 | } 37 | 38 | /** 39 | * Unsupported operation as this pauser is stateless. 40 | * Use {@link BusyTimedPauser} when a timeout is required. 41 | * 42 | * @param timeout timeout duration (ignored) 43 | * @param timeUnit unit of the timeout (ignored) 44 | * @throws TimeoutException never thrown 45 | */ 46 | @Override 47 | public void pause(long timeout, TimeUnit timeUnit) throws TimeoutException { 48 | throw new UnsupportedOperationException(this + " is not stateful, use a " + BusyTimedPauser.class.getSimpleName()); 49 | } 50 | 51 | /** 52 | * Does nothing as {@code BusyPauser} has no pausing state to unpause from. 53 | */ 54 | @Override 55 | public void unpause() { 56 | // nothing to unpause. 57 | } 58 | 59 | /** 60 | * Always returns {@code 0} as {@code BusyPauser} does not track paused time. 61 | * 62 | * @return {@code 0} always 63 | */ 64 | @Override 65 | public long timePaused() { 66 | return 0; 67 | } 68 | 69 | /** 70 | * Always returns {@code 0} as {@code BusyPauser} does not count pauses. 71 | * 72 | * @return {@code 0} always 73 | */ 74 | @Override 75 | public long countPaused() { 76 | return 0; 77 | } 78 | 79 | /** 80 | * Always returns {@code true}, indicating that this pauser keeps the thread busy rather than truly pausing it. 81 | * 82 | * @return {@code true} always 83 | */ 84 | @Override 85 | public boolean isBusy() { 86 | return true; 87 | } 88 | 89 | /** 90 | * Provides a string representation of this pauser, identifying it as "PauserMode.busy". 91 | * 92 | * @return the string "PauserMode.busy" 93 | */ 94 | @Override 95 | public String toString() { 96 | return "PauserMode.busy"; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/Issue251Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import org.junit.jupiter.api.Test; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | /** 11 | * Confirms the stable {@code toString} output for each built-in pauser. 12 | * Verifies the fix for issue {@code #251} where descriptions were inconsistent. 13 | */ 14 | class Issue251Test { 15 | @Test 16 | void toString_timedBusyVariants() { 17 | assertEquals("PauserMode.timedBusy", new BusyTimedPauser().toString()); 18 | assertEquals("PauserMode.timedBusy", PauserMode.timedBusy.get().toString()); 19 | assertEquals("PauserMode.timedBusy", Pauser.timedBusy().toString()); 20 | } 21 | 22 | @Test 23 | void toString_busyVariants() { 24 | assertEquals("PauserMode.busy", BusyPauser.INSTANCE.toString()); 25 | assertEquals("PauserMode.busy", PauserMode.busy.get().toString()); 26 | assertEquals("PauserMode.busy", Pauser.busy().toString()); 27 | } 28 | 29 | @Test 30 | void toString_balancedFromMode() { 31 | assertEquals("PauserMode.balanced", PauserMode.balanced.get().toString()); 32 | } 33 | 34 | @Test 35 | void toString_balanced() { 36 | assertEquals("PauserMode.balanced", Pauser.balanced().toString()); 37 | } 38 | 39 | @Test 40 | void toString_millis3ms() { 41 | assertEquals("Pauser.millis(3)", Pauser.millis(3).toString()); 42 | } 43 | 44 | @Test 45 | void toString_milli1and10() { 46 | assertEquals("Pauser.milli(1, 10)", Pauser.millis(1, 10).toString()); 47 | } 48 | 49 | @Test 50 | void toString_balanced2ms() { 51 | assertEquals("Pauser.balancedUpToMillis(2)", Pauser.balancedUpToMillis(2).toString()); 52 | } 53 | 54 | @Test 55 | void toString_yieldingNoParams() { 56 | assertEquals("PauserMode.yielding", Pauser.yielding().toString()); 57 | } 58 | 59 | @Test 60 | void toString_yieldingMinBusy3() { 61 | assertEquals("YieldingPauser{minBusy=3}", Pauser.yielding(3).toString()); 62 | } 63 | 64 | @Test 65 | void toString_milliMode() { 66 | assertEquals("PauserMode.milli", PauserMode.milli.get().toString()); 67 | } 68 | 69 | @Test 70 | void toString_yieldingMode() { 71 | assertEquals("PauserMode.yielding", PauserMode.yielding.get().toString()); 72 | } 73 | 74 | @Test 75 | void toString_sleepyMode() { 76 | assertEquals("PauserMode.sleepy", PauserMode.sleepy.get().toString()); 77 | } 78 | 79 | @Test 80 | void toString_yieldingMinBusy7() { 81 | assertEquals("YieldingPauser{minBusy=7}", new YieldingPauser(7).toString()); 82 | } 83 | 84 | @Test 85 | void toString_yieldingMinBusy1() { 86 | assertEquals("YieldingPauser{minBusy=1}", new YieldingPauser(1).toString()); 87 | } 88 | 89 | @Test 90 | void toString_millis7ms() { 91 | assertEquals("Pauser.millis(7)", new MilliPauser(7).toString()); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/EventLoopThreadHolder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.threads.CoreEventLoop; 8 | import net.openhft.chronicle.threads.ThreadHolder; 9 | /** 10 | * {@link ThreadHolder} implementation used to monitor a single event loop 11 | * thread. It keeps track of how long the loop has been running and requests a 12 | * dump of the loop's state when the thread appears to have blocked for longer 13 | * than the configured monitoring interval. Each subsequent dump is spaced 14 | * further apart to reduce log volume while the loop remains stuck. 15 | */ 16 | 17 | public class EventLoopThreadHolder implements ThreadHolder { 18 | private final CoreEventLoop eventLoop; 19 | private final long monitorIntervalNS; 20 | // additional time added to the next logging threshold 21 | private long intervalToAddNS; 22 | // nanoseconds before the next thread dump is logged 23 | private long printBlockTimeNS; 24 | 25 | public EventLoopThreadHolder(long monitorIntervalNS, CoreEventLoop eventLoop) { 26 | this.monitorIntervalNS = intervalToAddNS = printBlockTimeNS = monitorIntervalNS; 27 | this.eventLoop = eventLoop; 28 | } 29 | 30 | @Override 31 | public boolean isAlive() { 32 | return eventLoop.isAlive(); 33 | } 34 | 35 | @Override 36 | public void reportFinished() { 37 | Jvm.warn().on(getClass(), "Monitoring a task which has finished " + eventLoop); 38 | } 39 | 40 | @Override 41 | public long startedNS() { 42 | return eventLoop.loopStartNS(); 43 | } 44 | 45 | @Override 46 | public void resetTimers() { 47 | intervalToAddNS = 48 | printBlockTimeNS = monitorIntervalNS; 49 | } 50 | 51 | @Override 52 | public boolean shouldLog(long nowNS) { 53 | long blockingTimeNS = nowNS - startedNS(); 54 | return blockingTimeNS >= printBlockTimeNS; 55 | } 56 | 57 | @Override 58 | public void dumpThread(long startedNS, long nowNS) { 59 | long blockingTimeNS = nowNS - startedNS; 60 | double blockingTimeMS = blockingTimeNS / 100_000 / 10.0; 61 | if (blockingTimeMS <= 0.0) 62 | return; 63 | eventLoop.dumpRunningState(eventLoop.name() + " thread has blocked for " 64 | + blockingTimeMS + " ms.", 65 | // check we are still in the loop. 66 | () -> eventLoop.loopStartNS() == startedNS); 67 | 68 | printBlockTimeNS += intervalToAddNS; 69 | intervalToAddNS = (long) Math.min(1.41d * intervalToAddNS, 20d * monitorIntervalNS); 70 | } 71 | 72 | @Override 73 | public long timingToleranceNS() { 74 | return monitorIntervalNS + timingErrorNS(); 75 | } 76 | 77 | protected long timingErrorNS() { 78 | return TIMING_ERROR; 79 | } 80 | 81 | @Override 82 | public String getName() { 83 | return eventLoop.name(); 84 | } 85 | 86 | @Override 87 | public void monitorThreadDelayed(long actionCallDelayNS) { 88 | // report it?? 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/LongPauserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.util.concurrent.CountDownLatch; 10 | import java.util.concurrent.TimeUnit; 11 | 12 | import static org.junit.jupiter.api.Assertions.*; 13 | 14 | /** 15 | * Tests the pausing behaviour of {@link LongPauser}. 16 | * 17 | *

These tests ensure that: 18 | *

26 | */ 27 | class LongPauserTest extends ThreadsTestCommon { 28 | 29 | @Test 30 | void unpauseStopsPausing() throws InterruptedException { 31 | final int pauseMillis = 1_000; 32 | final LongPauser pauser = new LongPauser(0, 0, pauseMillis, pauseMillis, TimeUnit.MILLISECONDS); 33 | final CountDownLatch started = new CountDownLatch(1); 34 | Thread thread = new Thread(() -> { 35 | started.countDown(); 36 | pauser.pause(); 37 | }); 38 | thread.start(); 39 | started.await(50, TimeUnit.MILLISECONDS); 40 | Jvm.pause(10); // give the thread some time to park 41 | pauser.unpause(); 42 | final long startNs = System.nanoTime(); 43 | thread.join(); 44 | final long timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNs); 45 | assertTrue(timeTakenMs < pauseMillis / 5, "Took " + timeTakenMs + " to stop"); 46 | } 47 | 48 | @Test 49 | void testLongAsyncPauser() { 50 | final LongPauser pauser = new LongPauser(0, 0, 1, 1, TimeUnit.MILLISECONDS); 51 | boolean failedOnce = false; 52 | for (int i = 0; i < 100; i++) { 53 | try { 54 | pauser.asyncPause(); 55 | testUntilUnpaused(pauser, 1, TimeUnit.MILLISECONDS); 56 | pauser.reset(); 57 | testUntilUnpaused(pauser, 0, TimeUnit.MILLISECONDS); 58 | } catch (AssertionError e) { 59 | if (failedOnce) 60 | throw e; 61 | failedOnce = true; 62 | } 63 | } 64 | } 65 | 66 | @Test 67 | void asyncPauseIsResetOnReset() { 68 | final LongPauser longPauser = new LongPauser(0, 0, 1, 1, TimeUnit.SECONDS); 69 | longPauser.asyncPause(); 70 | assertTrue(longPauser.asyncPausing()); 71 | longPauser.reset(); 72 | assertFalse(longPauser.asyncPausing()); 73 | } 74 | 75 | private static void testUntilUnpaused(LongPauser pauser, int n, TimeUnit timeUnit) { 76 | long timeNS = timeUnit.convert(n, TimeUnit.NANOSECONDS); 77 | long start = System.nanoTime(); 78 | while (pauser.asyncPausing()) { 79 | if (System.nanoTime() > start + timeNS + 100_000_000) 80 | fail(); 81 | } 82 | long time = System.nanoTime() - start; 83 | final int delta = 11_000_000; 84 | assertEquals(timeNS + delta, time, delta); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/DiskSpaceMonitorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.onoes.ExceptionKey; 8 | import net.openhft.chronicle.core.time.SetTimeProvider; 9 | import org.junit.jupiter.api.BeforeEach; 10 | import org.junit.jupiter.api.Test; 11 | import org.junit.jupiter.api.AfterEach; 12 | 13 | import java.io.File; 14 | import java.time.Duration; 15 | import java.util.Map; 16 | 17 | import static org.junit.jupiter.api.Assertions.assertEquals; 18 | import static org.junit.jupiter.api.Assumptions.assumeTrue; 19 | 20 | class DiskSpaceMonitorTest extends ThreadsTestCommon { 21 | 22 | @BeforeEach 23 | void beforeEach(){ 24 | clearState(); 25 | } 26 | 27 | @AfterEach 28 | void afterEach(){ 29 | clearState(); 30 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(5); 31 | } 32 | 33 | private void clearState() { 34 | DiskSpaceMonitor.INSTANCE.clear(); 35 | } 36 | 37 | /** 38 | * Exercises disk monitoring when the threshold is raised from zero to 100 per cent. 39 | * Exceptions are recorded and disk space is polled repeatedly to verify that 40 | * roughly five warnings are reported. The test is skipped on Arm hardware. 41 | */ 42 | @Test 43 | void pollDiskSpace() { 44 | // todo investigate why this fails on arm 45 | assumeTrue(!Jvm.isArm()); 46 | Map map = Jvm.recordExceptions(); 47 | assertEquals(5, DiskSpaceMonitor.INSTANCE.getThresholdPercentage()); 48 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(100); 49 | for (int i = 0; i < 51; i++) { 50 | DiskSpaceMonitor.INSTANCE.pollDiskSpace(new File(".")); 51 | Jvm.pause(100); 52 | } 53 | DiskSpaceMonitor.INSTANCE.clear(); 54 | map.entrySet().forEach(System.out::println); 55 | long count = map.entrySet() 56 | .stream() 57 | .filter(e -> e.getKey().clazz() == DiskSpaceMonitor.class) 58 | .mapToInt(Map.Entry::getValue) 59 | .sum(); 60 | Jvm.resetExceptionHandlers(); 61 | System.out.println("Disk space warnings/errors: " + count); 62 | // look for 5 disk space checks and some debug messages about slow disk checks. 63 | assertEquals(5.5, count, 1.5); 64 | } 65 | 66 | /** 67 | * This test was created to verify that the core monitoring loop actually runs more than once. It used to run once 68 | * and then never again. This test explicitly changes the threshold after the first run has happened to ensure that 69 | * a failure occurs on a subsequent run. 70 | */ 71 | @Test 72 | void ensureThatDiskSpaceMonitorRunsForMoreThanOneIteration() throws InterruptedException { 73 | SetTimeProvider timeProvider = new SetTimeProvider(); 74 | ignoreException("warning: the JVM may crash if it undertakes an operation with a memory-mapped file and the disk is out of space"); 75 | DiskSpaceMonitor.INSTANCE.pollDiskSpace(new File(".")); 76 | timeProvider.advanceMillis(1200); 77 | DiskSpaceMonitor.INSTANCE.setThresholdPercentage(100); 78 | timeProvider.advanceMillis(Duration.ofHours(24).toMillis()); 79 | Thread.sleep(1000); 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/example/SingleAndMultiThreadedExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.example; 5 | 6 | import net.openhft.chronicle.core.threads.EventLoop; 7 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 8 | import net.openhft.chronicle.threads.MediumEventLoop; 9 | import net.openhft.chronicle.threads.Pauser; 10 | 11 | import java.util.concurrent.CountDownLatch; 12 | import java.util.concurrent.ExecutionException; 13 | import java.util.concurrent.ExecutorService; 14 | import java.util.concurrent.Future; 15 | import java.util.concurrent.atomic.AtomicLong; 16 | 17 | import static java.util.concurrent.Executors.newCachedThreadPool; 18 | 19 | /** 20 | * An example that was used in a DZone article 21 | */ 22 | public class SingleAndMultiThreadedExample { 23 | 24 | private AtomicLong multiThreadedValue = new AtomicLong(); 25 | private long singleThreadedValue; 26 | 27 | /** 28 | * The two examples in this code do the same thing, they both increment a shared counter from 0 to 500 29 | * one is written using java threads and the other uses the Chronicle Event Loop. 30 | */ 31 | public static void main(String[] args) throws ExecutionException, InterruptedException { 32 | SingleAndMultiThreadedExample example = new SingleAndMultiThreadedExample(); 33 | 34 | // runs using java Executor - outputs 500 35 | example.multiThreadedExample(); 36 | 37 | // using the chronicle event loop 38 | example.eventLoopExample(); 39 | 40 | } 41 | 42 | private Void addOneHundred() { 43 | for (int i = 0; i < 100; i++) { 44 | multiThreadedValue.incrementAndGet(); 45 | } 46 | return null; 47 | } 48 | 49 | private void multiThreadedExample() throws ExecutionException, InterruptedException { 50 | 51 | // example using Java Threads 52 | final ExecutorService executorService = newCachedThreadPool(); 53 | Future f1 = executorService.submit(this::addOneHundred); 54 | Future f2 = executorService.submit(this::addOneHundred); 55 | Future f3 = executorService.submit(this::addOneHundred); 56 | Future f4 = executorService.submit(this::addOneHundred); 57 | Future f5 = executorService.submit(this::addOneHundred); 58 | 59 | f1.get(); 60 | f2.get(); 61 | f3.get(); 62 | f4.get(); 63 | f5.get(); 64 | System.out.println("multiThreadedValue=" + multiThreadedValue); 65 | } 66 | 67 | private void eventLoopExample() throws InterruptedException { 68 | final EventLoop eventLoop = new MediumEventLoop(null, "test", Pauser.balanced(), false, "none"); 69 | eventLoop.start(); 70 | CountDownLatch finished = new CountDownLatch(1); 71 | eventLoop.addHandler(() -> { 72 | 73 | singleThreadedValue++; 74 | // we throw this to un-register the event loop 75 | 76 | if (singleThreadedValue == 500) { 77 | finished.countDown(); 78 | throw new InvalidEventHandlerException("finished"); 79 | } 80 | 81 | // return false if you don't want to be called back for a while 82 | return true; 83 | }); 84 | 85 | finished.await(); 86 | System.out.println("eventLoopExample=" + singleThreadedValue); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/BusyTimedPauser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | import java.util.concurrent.TimeoutException; 10 | 11 | /** 12 | * Busy-spin pauser that also implements {@link TimingPauser}. 13 | *

14 | * Like {@link BusyPauser} it never yields or sleeps, so it occupies a CPU core 15 | * while waiting. In addition it tracks elapsed busy-spin time and can throw a 16 | * {@link TimeoutException} when a configured timeout is exceeded. 17 | */ 18 | public class BusyTimedPauser implements Pauser, TimingPauser { 19 | 20 | private long time = Long.MAX_VALUE; 21 | private long countPaused = 0; 22 | 23 | /** 24 | * Always returns {@code true}, indicating that this pauser predominantly keeps the thread busy. 25 | * 26 | * @return {@code true}, as the primary operation is a busy wait 27 | */ 28 | @Override 29 | public boolean isBusy() { 30 | return true; 31 | } 32 | 33 | /** 34 | * Clears any timeout state so the next timed pause starts afresh. 35 | */ 36 | @Override 37 | public void reset() { 38 | time = Long.MAX_VALUE; 39 | } 40 | 41 | /** 42 | * Busy-spins once and increments the pause count. 43 | * No yielding or sleeping occurs. 44 | */ 45 | @Override 46 | public void pause() { 47 | countPaused++; 48 | Jvm.nanoPause(); 49 | } 50 | 51 | /** 52 | * Busy-spins until the accumulated pause time exceeds the supplied timeout. 53 | * The timer starts with the first call after {@link #reset()}. 54 | * 55 | * @param timeout maximum time to spin before throwing an exception 56 | * @param timeUnit unit for {@code timeout} 57 | * @throws TimeoutException if the time since the first call exceeds the timeout 58 | */ 59 | @Override 60 | public void pause(long timeout, TimeUnit timeUnit) throws TimeoutException { 61 | if (time == Long.MAX_VALUE) 62 | time = System.nanoTime(); 63 | if (System.nanoTime() - time > timeUnit.toNanos(timeout)) 64 | throw new TimeoutException("Pause timed out after " + timeout + " " + timeUnit); 65 | pause(); 66 | } 67 | 68 | /** 69 | * Does nothing since this implementation has no state to unpause from. The method exists to fulfill the interface contract. 70 | */ 71 | @Override 72 | public void unpause() { 73 | // nothing to unpause. 74 | } 75 | 76 | /** 77 | * Always returns {@code 0} as this pauser does not actually track total pause time. 78 | * 79 | * @return {@code 0}, indicating no measurable pause duration 80 | */ 81 | @Override 82 | public long timePaused() { 83 | return 0; 84 | } 85 | 86 | /** 87 | * Returns the count of how many times the {@code pause()} method has been called. 88 | * 89 | * @return the number of pauses that have been initiated 90 | */ 91 | @Override 92 | public long countPaused() { 93 | return countPaused; 94 | } 95 | 96 | /** 97 | * Provides a string representation for this pauser, identifying it as "PauserMode.timedBusy". 98 | * 99 | * @return a string indicating the type of pauser 100 | */ 101 | @Override 102 | public String toString() { 103 | return "PauserMode.timedBusy"; 104 | } 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/ThreadMonitorHarness.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 7 | import net.openhft.chronicle.threads.ThreadHolder; 8 | import net.openhft.chronicle.threads.ThreadMonitor; 9 | 10 | import java.util.function.LongSupplier; 11 | 12 | import static net.openhft.chronicle.threads.CoreEventLoop.NOT_IN_A_LOOP; 13 | 14 | /** 15 | * Monitoring harness that drives a {@link ThreadHolder} via the 16 | * {@link ThreadMonitor} interface. The harness delegates all 17 | * monitoring actions to the wrapped holder and records the last time 18 | * an action was run. 19 | */ 20 | public class ThreadMonitorHarness implements ThreadMonitor { 21 | private final ThreadHolder thread; 22 | private final LongSupplier timeSupplier; 23 | private long lastActionCall = Long.MAX_VALUE; 24 | private long lastStartedNS = NOT_IN_A_LOOP; 25 | 26 | /** 27 | * Creates a harness that reports on the supplied holder using the given 28 | * time supplier. 29 | * 30 | * @param thread holder describing the monitored thread 31 | * @param timeSupplier provider of the current time in nanoseconds 32 | */ 33 | public ThreadMonitorHarness(ThreadHolder thread, LongSupplier timeSupplier) { 34 | this.thread = thread; 35 | this.timeSupplier = timeSupplier; 36 | } 37 | 38 | /** 39 | * Creates a harness using {@link System#nanoTime()} as the time provider. 40 | * 41 | * @param thread holder describing the monitored thread 42 | */ 43 | public ThreadMonitorHarness(ThreadHolder thread) { 44 | this(thread, System::nanoTime); 45 | } 46 | 47 | /** 48 | * Called periodically to check the state of the wrapped thread. 49 | * Throws {@link InvalidEventHandlerException} if the thread has 50 | * finished. If a delay greater than the tolerance is observed the 51 | * holder is notified and {@code true} is returned. 52 | * 53 | * @return {@code true} when the holder reports a delay 54 | * @throws InvalidEventHandlerException if the thread is no longer alive 55 | */ 56 | @Override 57 | public boolean action() throws InvalidEventHandlerException { 58 | if (!thread.isAlive()) { 59 | thread.reportFinished(); 60 | throw new InvalidEventHandlerException(); 61 | } 62 | long startedNS = thread.startedNS(); 63 | long nowNS = timeSupplier.getAsLong(); 64 | 65 | // Record lastActionCall time on every call to prevent false-positive "monitorThreadDelayed" reports 66 | long actionCallDelay = nowNS - this.lastActionCall; 67 | this.lastActionCall = nowNS; 68 | 69 | if (startedNS == 0 || startedNS == NOT_IN_A_LOOP) { 70 | return false; 71 | } 72 | if (startedNS != lastStartedNS) { 73 | thread.resetTimers(); 74 | lastStartedNS = startedNS; 75 | } 76 | if (actionCallDelay > thread.timingToleranceNS()) { 77 | if (thread.isAlive()) 78 | thread.monitorThreadDelayed(actionCallDelay); 79 | return true; 80 | } 81 | if (!thread.shouldLog(nowNS)) 82 | return false; 83 | thread.dumpThread(startedNS, nowNS); 84 | return false; // true assumes we are about to need to check again. 85 | } 86 | 87 | @Override 88 | public String toString() { 89 | return "ThreadMonitorHarness<" + thread.getName() + ">"; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/docs/thread-thread-safety-guide.adoc: -------------------------------------------------------------------------------- 1 | = Chronicle Threads Thread-Safety Guide 2 | :toc: 3 | :sectnums: 4 | :lang: en-GB 5 | 6 | == Scope 7 | 8 | This guide explains how Chronicle Threads enforces single-threaded handler execution and how developers should structure code that interacts with event loops. 9 | It expands on requirements THR-FN-006 through THR-NF-O-009 and aligns with Chronicle Core's `SingleThreadedChecked` utilities. 10 | 11 | == Event Loop Ownership Model 12 | 13 | * Each `EventLoop` runs on a dedicated Java platform thread; handlers registered on that loop must not share mutable state with other threads without explicit synchronisation (THR-FN-006). 14 | * When a handler needs to hand over work to another thread (e.g., a blocking worker), use thread-safe queues or Chronicle Queue to transfer data without violating loop confinement. 15 | * The boolean result of `action()` should reflect whether more immediate work is available; returning `true` repeatedly for idle handlers forces tight scheduling and increases contention for other handlers (THR-FN-007). 16 | 17 | == Safe Hand-off Patterns 18 | 19 | Initialise :: Construct handlers and supporting resources on the main thread, then call `singleThreadedCheckReset()` before registering them with the target loop. 20 | 21 | Operate :: Once registered, treat handler state as confined to the loop's thread. 22 | All mutations should occur inside `action()` or helper methods invoked from that loop. 23 | 24 | Dispose :: Use `InvalidEventHandlerException.reusable()` to self-deregister when the handler has completed its lifecycle (THR-FN-008). 25 | Ensure downstream resources honour Chronicle Core's `Closeable` and `ReferenceCounted` contracts. 26 | 27 | == Interaction with Shared Services 28 | 29 | * Shared caches or maps must expose lock-free APIs that are safe for single-writer, multi-reader scenarios, or provide appropriate synchronisation. 30 | * When invoking Chronicle Queue appenders or tailers from handlers, rely on their single-threaded guarantees and avoid sharing instances across loops without resetting ownership. 31 | * If a handler must update shared analytics or metrics collectors, prefer non-blocking data structures (e.g., `LongAdder`) to minimise stall risk. 32 | 33 | == Error Handling Discipline 34 | 35 | Unchecked exceptions :: 36 | * The loop removes the offending handler and logs via `Jvm.warn()`; implement catch-and-report patterns where recovery is possible (THR-NF-O-009). 37 | 38 | Timeouts :: 39 | * Use monitor-loop thresholds to detect blocked handlers early (THR-NF-O-018). 40 | Handlers can emit domain-specific heartbeats to aid diagnosis. 41 | 42 | Defensive coding :: 43 | * Validate external inputs before entering tight loops to avoid unbounded CPU usage. 44 | * Leverage Chronicle Core's `SingleThreadedChecked` exceptions during testing to catch accidental cross-thread access. 45 | 46 | == Testing Strategies 47 | 48 | * Run unit tests with assertions enabled to surface `SingleThreadedChecked` violations. 49 | * Use deterministic executors in integration tests to simulate loop progression and ensure handlers remain idempotent. 50 | * Incorporate concurrency stress tests that replay boundary scenarios (e.g., handler self-deregistration while monitor loop samples metrics). 51 | 52 | == Documentation and Traceability 53 | 54 | * Annotate handler classes with the relevant requirement IDs (e.g., `THR-FN-006`) in code comments or design docs to aid reviews. 55 | * Update operational run-books to describe ownership expectations and hand-off procedures. 56 | * Ensure new handlers ship with accompanying tests that prove thread-safety assumptions, referencing requirement IDs in test names where practical. 57 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/NamedThreadFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.StackTrace; 8 | import net.openhft.chronicle.core.threads.CleaningThread; 9 | import net.openhft.chronicle.core.threads.ThreadDump; 10 | import org.jetbrains.annotations.NotNull; 11 | 12 | import java.util.concurrent.ThreadFactory; 13 | import java.util.concurrent.atomic.AtomicInteger; 14 | 15 | /** 16 | * Creates named threads within a dedicated group. The first thread is named 17 | * {@code groupName} and each subsequent one is suffixed with {@code -n} where 18 | * {@code n} increments from one. Every thread is a {@link CleaningThread} so 19 | * that thread-local resources are cleared when it terminates. 20 | */ 21 | public class NamedThreadFactory extends ThreadGroup implements ThreadFactory { 22 | private final AtomicInteger id = new AtomicInteger(); 23 | private final String nameShadow; 24 | private final Boolean daemonShadow; 25 | private final Integer priority; 26 | private final StackTrace createdHere; 27 | private final boolean inEventLoop; 28 | 29 | public NamedThreadFactory(String name) { 30 | this(name, null, null); 31 | } 32 | 33 | public NamedThreadFactory(String name, Boolean daemon) { 34 | this(name, daemon, null); 35 | } 36 | 37 | public NamedThreadFactory(String name, Boolean daemon, Integer priority) { 38 | this(name, daemon, priority, false); 39 | } 40 | 41 | /** 42 | * Constructs a factory with the supplied options. 43 | * 44 | * @param name prefix used for the thread group and thread names 45 | * @param daemon set to {@code true} if created threads should be daemons 46 | * @param priority priority to assign or {@code null} for the JVM default 47 | * @param inEventLoop mark threads as part of an event loop for monitoring 48 | */ 49 | public NamedThreadFactory(String name, Boolean daemon, Integer priority, boolean inEventLoop) { 50 | super(name); 51 | this.nameShadow = name; 52 | this.daemonShadow = daemon; 53 | this.priority = priority; 54 | this.inEventLoop = inEventLoop; 55 | createdHere = Jvm.isResourceTracing() ? new StackTrace("NamedThreadFactory created here") : null; 56 | } 57 | 58 | /** 59 | * Returns a new {@link CleaningThread} executing the given task. The 60 | * thread name is formed by {@link Threads#threadGroupPrefix()} followed by 61 | * the factory name. Subsequent threads append {@code -n} where {@code n} 62 | * is an incrementing number. 63 | */ 64 | @Override 65 | @NotNull 66 | public Thread newThread(@NotNull Runnable r) { 67 | final int idSnapshot = this.id.getAndIncrement(); 68 | final String nameN = Threads.threadGroupPrefix() + (idSnapshot == 0 ? this.nameShadow : (this.nameShadow + '-' + idSnapshot)); 69 | Thread t = new CleaningThread(r, nameN, inEventLoop); 70 | ThreadDump.add(t, createdHere); 71 | if (daemonShadow != null) 72 | t.setDaemon(daemonShadow); 73 | if (priority != null) 74 | t.setPriority(priority); 75 | return t; 76 | } 77 | 78 | /** 79 | * Interrupts every thread currently in this group. Threads that have 80 | * already finished are ignored. 81 | */ 82 | public void interruptAll() { 83 | Thread[] list = new Thread[activeCount() + 1]; 84 | super.enumerate(list); 85 | for (Thread thread : list) { 86 | if (thread != null) 87 | thread.interrupt(); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /docs/images/source/image1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 24 | 25 | 26 | 28 | 30 | 32 | 33 | 34 | Event Handler 1 35 | 36 | Event Handler 3 37 | 38 | Event Handler 2 39 | 40 | Event Handler 4 41 | 42 | Event Handler 5 43 | Event Loop 44 | 45 | -------------------------------------------------------------------------------- /src/main/docs/thread-performance-targets.adoc: -------------------------------------------------------------------------------- 1 | = Chronicle Threads Performance Targets 2 | :toc: 3 | :sectnums: 4 | :lang: en-GB 5 | 6 | == Scope 7 | 8 | This document enumerates the latency, jitter, throughput, and allocation targets for Chronicle Threads and describes how teams must measure and report them. 9 | It elaborates the non-functional requirements captured in `project-requirements.adoc` (THR-NF-P-014 through THR-NF-P-031). 10 | 11 | == Reference Hardware Profile 12 | 13 | Baseline :: 14 | * Dual-socket x86_64 server, 3.2 GHz or faster, Turbo disabled. 15 | * 64 GiB RAM, uniform memory access within a socket. 16 | * Linux kernel 5.15 or newer with `isolcpus`, `nohz_full`, and `rcu_nocbs` tuned for fast cores. 17 | * OpenJDK 21 LTS, G1 GC, `-XX:+UseNUMA`, `-XX:+AlwaysPreTouch`. 18 | 19 | Variations :: 20 | * ARM64 hosts must document deviations from the x86 baseline and retune thresholds accordingly. 21 | * Virtualised environments require an additional jitter budget that is recorded alongside benchmark artefacts. 22 | 23 | == Target Matrix 24 | 25 | [cols="2,3,3",options="header"] 26 | |=== 27 | |Requirement |Target |Measurement Notes 28 | |THR-NF-P-027 (Latency) |<= 10 microseconds at 99.99 percentile for single-hop handler runs |Profiling harness schedules 10 million iterations with a busy pauser and isolated core. 29 | |THR-NF-P-028 (Jitter) |<= 2 microseconds peak-to-peak jitter under steady load |Continuous histogram per handler, sampled via monitor loop over 15 minute windows. 30 | |THR-NF-P-029 (Throughput) |>= 5 million 64-byte events per second on a fast loop |Benchmark harness dispatches fixed-size payloads, recording sustained processing rate. 31 | |THR-NF-P-030 (Heap Allocation) |<= 0.1 Bytes per event averaged across handlers |Java Flight Recorder or allocation profiler attached during workload replay. 32 | |THR-NF-P-014 (Pauser Hot Path) |0 allocations in `Pauser.pause()` / `reset()` |Unit tests instrumented with allocation counters; CI gate fails on non-zero heap activity. 33 | |THR-NF-P-031 (CPU Utilisation) |Loop CPU utilisation tracks input rate; idle loops drop below 5 percent |Derived from pauser metrics (`timePaused`, `countPaused`); reported via telemetry dashboards. 34 | |=== 35 | 36 | == Measurement Methodology 37 | 38 | Workload Selection :: 39 | * Use representative handlers (queue tailer, order matching micro-benchmark, timed maintenance task). 40 | * Include at least one blocking handler routed to the `BLOCKING` priority to validate segregation. 41 | 42 | Warm-up :: 43 | * Discard initial 30 seconds to allow JIT compilation and cache priming. 44 | * Verify monitor-loop metrics stabilise before collecting results. 45 | 46 | Sampling :: 47 | * Persist HDR histograms for latency and jitter with 2 decimal microsecond precision. 48 | * Capture CPU affinity maps and pauser states alongside results to prove configuration fidelity. 49 | 50 | Repeatability :: 51 | * Run each scenario three times; publish mean and worst-case metrics. 52 | * Store benchmark artefacts in build pipelines so regressions can be bisected. 53 | 54 | == Instrumentation Guidelines 55 | 56 | * Enable loop-block monitor logging at WARN to capture threshold breaches (THR-NF-O-019). 57 | * Attach `PauserMonitorFactory` exporters to push pause counts and durations into time-series storage. 58 | * Tag benchmark runs with Git commit, JVM build, and operating system version for traceability. 59 | 60 | == Regression Gates 61 | 62 | * CI pipelines must reject changes that exceed any target by more than 5 percent unless accompanied by an approved waiver referencing the relevant requirement ID. 63 | * Nightly builds execute an extended soak (minimum 8 hours) to surface low-frequency jitter outliers; findings feed into operational run-books. 64 | 65 | == Reporting 66 | 67 | * Summarise performance results in release notes with explicit references to the requirements satisfied (e.g., "Maintains THR-NF-P-027 latency target"). 68 | * Archive raw benchmark logs and histograms for audit and future tuning. 69 | * When targets cannot be met on non-reference hardware, document compensating controls and adjustments to operational thresholds. 70 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/ThreadsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.util.concurrent.ExecutorService; 10 | import java.util.concurrent.Executors; 11 | import java.util.concurrent.atomic.AtomicBoolean; 12 | 13 | import static org.junit.jupiter.api.Assertions.assertEquals; 14 | 15 | class ThreadsTest extends ThreadsTestCommon { 16 | 17 | @Test 18 | void shouldDumpStackTracesForStuckDelegatedExecutors() { 19 | final AtomicBoolean running = new AtomicBoolean(true); 20 | final ExecutorService service = Executors.newSingleThreadExecutor(new NamedThreadFactory("non-daemon-test")); 21 | service.submit(() -> { 22 | while (running.get()) { 23 | Jvm.pause(10L); 24 | } 25 | }); 26 | 27 | Threads.shutdown(service); 28 | running.set(false); 29 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$"); 30 | expectException("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***"); 31 | assertExceptionThrown("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***"); 32 | } 33 | 34 | @Test 35 | void shouldDumpStackTracesForStuckDaemonDelegatedExecutors() { 36 | final AtomicBoolean running = new AtomicBoolean(true); 37 | final ExecutorService service = Executors.newSingleThreadExecutor(new NamedThreadFactory("daemon-test")); 38 | service.submit(() -> { 39 | while (running.get()) { 40 | Jvm.pause(10L); 41 | } 42 | }); 43 | 44 | Threads.shutdownDaemon(service); 45 | running.set(false); 46 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$"); 47 | expectException("**** THE main/daemon-test THREAD DID NOT SHUTDOWN ***"); 48 | assertExceptionThrown("**** THE main/daemon-test THREAD DID NOT SHUTDOWN ***"); 49 | } 50 | 51 | @Test 52 | void shouldDumpStackTracesForStuckNestedDelegatedExecutors() { 53 | final AtomicBoolean running = new AtomicBoolean(true); 54 | final ExecutorService service = Executors.unconfigurableExecutorService( 55 | Executors.unconfigurableExecutorService( 56 | Executors.unconfigurableExecutorService( 57 | Executors.newSingleThreadExecutor(new NamedThreadFactory("non-daemon-test")) 58 | ) 59 | ) 60 | ); 61 | service.submit(() -> { 62 | while (running.get()) { 63 | Jvm.pause(10L); 64 | } 65 | }); 66 | 67 | Threads.shutdown(service); 68 | running.set(false); 69 | expectException("*** FAILED TO TERMINATE java.util.concurrent.Executors$"); 70 | expectException("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***"); 71 | assertExceptionThrown("**** THE main/non-daemon-test THREAD DID NOT SHUTDOWN ***"); 72 | } 73 | 74 | @Test 75 | void testRenderStackTrace() { 76 | StackTraceElement[] stackTrace = new StackTraceElement[]{ 77 | new StackTraceElement("com.test.Something", "doSomething", "Something.java", 123), 78 | new StackTraceElement("com.test.SomethingElse", "doSomethingElse", "SomethingElse.java", 456), 79 | new StackTraceElement("com.test.SomethingElseAgain", "doSomethingElseAgain", "SomethingElseAgain.java", 789), 80 | }; 81 | StringBuilder stringBuilder = new StringBuilder(); 82 | Threads.renderStackTrace(stringBuilder, stackTrace); 83 | assertEquals( 84 | " com.test.Something.doSomething(Something.java:123)\n" + 85 | " com.test.SomethingElse.doSomethingElse(SomethingElse.java:456)\n" + 86 | " com.test.SomethingElseAgain.doSomethingElseAgain(SomethingElseAgain.java:789)\n", 87 | stringBuilder.toString()); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/PauserMode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import java.util.function.Supplier; 7 | 8 | /** 9 | * Enumerates the built-in pausing strategies provided by {@link Pauser}. 10 | * 11 | *

{@code Pauser} implementations are not {@code enum}s and cannot easily be 12 | * referred to from configuration files. {@code PauserMode} gives each common 13 | * strategy a serialisable name so that YAML and similar configuration formats 14 | * can specify the desired pauser.

15 | * 16 | *

The README contains a table under the "PauserMode" section that summarises 17 | * the latency and CPU characteristics for each mode.

18 | */ 19 | public enum PauserMode implements Supplier { 20 | 21 | /** 22 | * Busy waits for a short time before yielding and eventually sleeping. 23 | * Latency is moderate but CPU use is reduced compared to {@link #busy}. 24 | * Typical choice for event loops dealing with bursty traffic. 25 | * Can be monitored and does not need CPU isolation. 26 | */ 27 | balanced { 28 | @Override 29 | public Pauser get() { 30 | return Pauser.balanced(); 31 | } 32 | }, 33 | 34 | /** 35 | * Continuously busy spins to minimise jitter and give the lowest latency. 36 | * Best used when a dedicated core is available. 37 | * Not monitorable and prefers CPU isolation. 38 | */ 39 | busy { 40 | @Override 41 | public Pauser get() { 42 | return Pauser.busy(); 43 | } 44 | 45 | @Override 46 | public boolean isolcpus() { 47 | return true; 48 | } 49 | 50 | @Override 51 | public boolean monitor() { 52 | return false; 53 | } 54 | }, 55 | 56 | /** 57 | * Always sleeps for roughly one millisecond and never busy waits. 58 | * Latency can be around one millisecond but CPU usage is very low. 59 | * Useful for low priority polling where jitter is acceptable. 60 | */ 61 | milli { 62 | @Override 63 | public Pauser get() { 64 | return Pauser.millis(1); 65 | } 66 | }, 67 | 68 | /** 69 | * Less aggressive than {@link #balanced}; mainly sleeps to conserve CPU. 70 | * Offers high jitter and therefore suits background or diagnostic work. 71 | */ 72 | sleepy { 73 | @Override 74 | public Pauser get() { 75 | return Pauser.sleepy(); 76 | } 77 | }, 78 | 79 | /** 80 | * Behaves like {@link #busy} but also supports timeout based pauses. 81 | * Maintains minimal jitter while allowing a time limit to be enforced. 82 | * Not monitorable and prefers CPU isolation. 83 | */ 84 | timedBusy { 85 | @Override 86 | public Pauser get() { 87 | return Pauser.timedBusy(); 88 | } 89 | 90 | @Override 91 | public boolean isolcpus() { 92 | return true; 93 | } 94 | 95 | @Override 96 | public boolean monitor() { 97 | return false; 98 | } 99 | }, 100 | /** 101 | * Briefly busy spins then yields the CPU. 102 | * Latency is low and the pauser can be shared between threads. 103 | * Suitable when threads share CPUs but responsiveness is still important. 104 | */ 105 | yielding { 106 | @Override 107 | public Pauser get() { 108 | return Pauser.yielding(); 109 | } 110 | }; 111 | 112 | /** 113 | * Indicates whether the provided {@link Pauser} is suitable for CPU isolation. 114 | * 115 | * @return {@code true} if CPU isolation is suitable, otherwise {@code false} 116 | */ 117 | public boolean isolcpus() { 118 | return false; 119 | } 120 | 121 | /** 122 | * Indicates whether the provided {@link Pauser} can be monitored. 123 | * 124 | * @return {@code true} if the pauser can be monitored, otherwise {@code false} 125 | */ 126 | public boolean monitor() { 127 | return true; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/main/docs/thread-operational-controls.adoc: -------------------------------------------------------------------------------- 1 | = Chronicle Threads Operational Controls 2 | :toc: 3 | :sectnums: 4 | :lang: en-GB 5 | 6 | == CPU Isolation and Affinity Governance 7 | 8 | Why :: 9 | Latency-sensitive handlers rely on predictable scheduling and cache residency. 10 | 11 | Core controls :: 12 | * Reserve dedicated CPU cores for loops using busy pausers, aligning with documented recommendations (THR-DOC-016). 13 | * Validate runtime affinity strings against estate topology before deployment (THR-FN-015, THR-FN-017). 14 | * Record the chosen affinity mapping in run-books so support engineers can confirm compliance during incident response. 15 | 16 | Review hot-spots :: 17 | * K8s or container orchestrators that may reassign cores. 18 | * BIOS or hypervisor changes that alter NUMA layout. 19 | * Third-party tooling that repins threads (profilers, debuggers). 20 | 21 | == Loop-Block Monitoring and Alerting 22 | 23 | Why :: 24 | A stalled handler compromises all work on its loop and introduces systemic jitter. 25 | 26 | Core controls :: 27 | * Keep the monitor loop enabled in production to enforce execution thresholds (THR-NF-O-018, THR-NF-O-019). 28 | * Tune `loop.block.threshold.ns` and `MONITOR_INTERVAL_MS` via system properties to reflect acceptable tail latency (THR-OPS-023, THR-OPS-024). 29 | * Integrate `PauserMonitorFactory` outputs with telemetry collectors so SLO breaches surface quickly (THR-NF-O-021). 30 | 31 | Review hot-spots :: 32 | * Handlers that call out to external services. 33 | * Contended locks inside business logic. 34 | * JVM safepoint pauses observable as correlated spikes across all loops. 35 | 36 | == Startup, Shutdown, and Recovery 37 | 38 | Why :: 39 | Predictable lifecycle management prevents resource leaks and eases maintenance. 40 | 41 | Core controls :: 42 | * Configure shutdown hooks or explicit close ordering so loops stop gracefully and relinquish resources (THR-FN-002, THR-OPS-025). 43 | * Use builder precedence rules to override unsuitable host-wide defaults (THR-OPS-024). 44 | * Include loop topology and handler binding in operational documentation to guide failover drills. 45 | 46 | Review hot-spots :: 47 | * Mutable static state shared across handlers that survives restart. 48 | * Incomplete handler deregistration causing repeated warnings during shutdown. 49 | * JVM exit sequences where native resources must release before process termination. 50 | 51 | == Configuration Hygiene 52 | 53 | Why :: 54 | Misconfiguration can disable safety features or erode performance targets. 55 | 56 | Core controls :: 57 | * Maintain an allow-listed set of Chronicle Threads system properties and validate them in CI pipelines. 58 | * Version control default builder profiles for each environment (development, certification, production) and peer review changes. 59 | * Capture pauser and monitor settings in infrastructure-as-code artefacts to avoid snowflake deployments. 60 | 61 | Review hot-spots :: 62 | * Ad-hoc overrides applied via command-line flags. 63 | * Legacy scripts that pre-date the Nine-Box taxonomy and omit traceability IDs. 64 | * Environment variable templating that truncates affinity strings. 65 | 66 | == Telemetry and Observability 67 | 68 | Why :: 69 | Workload visibility enables tuning and rapid diagnosis. 70 | 71 | Core controls :: 72 | * Export pauser and loop-block metrics to the organisation-wide metrics pipeline (e.g., Prometheus, Graphite). 73 | * Correlate Chronicle Threads metrics with downstream components (Queues, Maps) to contextualise latency spikes. 74 | * Ensure monitor-loop warnings are promoted to actionable alerts rather than suppressed in logs. 75 | 76 | Review hot-spots :: 77 | * Handlers that bypass standard logging frameworks. 78 | * Log rotation policies that discard stack traces before investigation. 79 | * Telemetry exporters that share threads with latency-sensitive loops. 80 | 81 | == Change Management 82 | 83 | Why :: 84 | Threading behaviour influences end-to-end latency; uncontrolled change increases risk. 85 | 86 | Core controls :: 87 | * Pair configuration modifications with updated documentation and automated tests (THR-OPS-023). 88 | * Track requirement IDs (e.g., THR-NF-P-027) in change tickets so reviewers can verify continued compliance. 89 | * Simulate workload impact in a staging environment whenever pausers, affinities, or monitor thresholds change. 90 | 91 | Review hot-spots :: 92 | * Hot fixes applied directly to production nodes. 93 | * Divergent configuration between active-active sites. 94 | * Missing rollback plans for affinity or pauser adjustments. 95 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/StopVCloseTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.StackTrace; 8 | import net.openhft.chronicle.core.threads.EventHandler; 9 | import net.openhft.chronicle.core.threads.EventLoop; 10 | import net.openhft.chronicle.core.threads.HandlerPriority; 11 | import org.jetbrains.annotations.NotNull; 12 | import org.junit.jupiter.api.BeforeEach; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.util.Collections; 16 | import java.util.EnumSet; 17 | import java.util.Set; 18 | import java.util.concurrent.BlockingQueue; 19 | import java.util.concurrent.LinkedBlockingQueue; 20 | import java.util.concurrent.TimeUnit; 21 | import java.util.concurrent.atomic.AtomicBoolean; 22 | import java.util.concurrent.atomic.AtomicReference; 23 | import java.util.concurrent.locks.LockSupport; 24 | 25 | import static org.junit.jupiter.api.Assertions.assertEquals; 26 | import static org.junit.jupiter.api.Assertions.assertTrue; 27 | 28 | /** 29 | * Demonstrates how stopping and closing an {@link EventLoop} differ. 30 | * 31 | *

Stopping allows existing handlers to finish so the loop may be started 32 | * again. Closing interrupts blocking work and frees the loop's resources. 33 | */ 34 | public class StopVCloseTest extends ThreadsTestCommon { 35 | 36 | @BeforeEach 37 | void handlersInit() { 38 | ignoreException("Monitoring a task which has finished "); 39 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = 1; 40 | } 41 | 42 | @Override 43 | public void preAfter() { 44 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = 10_000; 45 | } 46 | 47 | @Test 48 | void eventGroupStop() { 49 | final EnumSet allPriorities = EnumSet.allOf(HandlerPriority.class); 50 | try (final EventLoop eventGroup = EventGroup.builder() 51 | .withConcurrentThreadsNum(1) 52 | .withPriorities(allPriorities) 53 | .build()) { 54 | eventGroup.start(); 55 | 56 | Set started = Collections.synchronizedSet(EnumSet.noneOf(HandlerPriority.class)); 57 | Set stopped = Collections.synchronizedSet(EnumSet.noneOf(HandlerPriority.class)); 58 | for (HandlerPriority hp : allPriorities) 59 | eventGroup.addHandler(new EventHandler() { 60 | @Override 61 | public boolean action() { 62 | return true; 63 | } 64 | 65 | @Override 66 | public void loopStarted() { 67 | started.add(hp); 68 | } 69 | 70 | @Override 71 | public void loopFinished() { 72 | stopped.add(hp); 73 | } 74 | 75 | @Override 76 | public @NotNull HandlerPriority priority() { 77 | return hp; 78 | } 79 | }); 80 | 81 | for (int i = 0; i < 100; i++) 82 | if (!started.equals(allPriorities)) 83 | Jvm.pause(1); 84 | eventGroup.stop(); 85 | assertTrue(eventGroup.isStopped()); 86 | assertEquals(allPriorities, started); 87 | assertEquals(allPriorities, stopped); 88 | } 89 | } 90 | 91 | @Test 92 | void blockingStopped() throws InterruptedException { 93 | BlockingEventLoop bel = new BlockingEventLoop("blocking"); 94 | bel.start(); 95 | BlockingQueue q = new LinkedBlockingQueue<>(); 96 | AtomicBoolean stopped = new AtomicBoolean(); 97 | AtomicReference thread = new AtomicReference<>(); 98 | bel.addHandler(() -> { 99 | try { 100 | thread.set(Thread.currentThread()); 101 | q.add("token"); 102 | LockSupport.parkNanos(2_000_000_000L); 103 | return false; 104 | } finally { 105 | stopped.set(true); 106 | } 107 | }); 108 | q.poll(1, TimeUnit.SECONDS); 109 | bel.close(); 110 | if (thread.get().isAlive()) 111 | StackTrace.forThread(thread.get()).printStackTrace(); 112 | assertTrue(stopped.get()); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/internal/EventLoopStateRendererTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.EventLoop; 8 | import net.openhft.chronicle.threads.*; 9 | import org.junit.jupiter.api.Test; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | import static org.junit.jupiter.api.Assertions.assertTrue; 13 | 14 | class EventLoopStateRendererTest extends ThreadsTestCommon { 15 | 16 | @Test 17 | void isNullSafe() { 18 | assertEquals("Foo event loop is null", EventLoopStateRenderer.INSTANCE.render("Foo", null)); 19 | } 20 | 21 | @Test 22 | void testCanRenderMediumEventLoop() { 23 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) { 24 | mediumEventLoop.start(); 25 | while (!mediumEventLoop.isAlive()) { 26 | Jvm.pause(10); 27 | } 28 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop); 29 | Jvm.startup().on(EventLoopStateRendererTest.class, dump); 30 | assertTrue(dump.contains("Medium event loop state")); 31 | assertTrue(dump.contains("Closed: false")); 32 | assertTrue(dump.contains("Closing: false")); 33 | assertTrue(dump.contains("Lifecycle: STARTED")); 34 | assertTrue(dump.contains("Thread state: ")); 35 | } 36 | } 37 | 38 | @Test 39 | void testCanRenderStoppedMediumEventLoop() { 40 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) { 41 | mediumEventLoop.start(); 42 | while (!mediumEventLoop.isAlive()) { 43 | Jvm.pause(10); 44 | } 45 | mediumEventLoop.stop(); 46 | while (!mediumEventLoop.isStopped()) { 47 | Jvm.pause(10); 48 | } 49 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop); 50 | Jvm.startup().on(EventLoopStateRendererTest.class, dump); 51 | assertTrue(dump.contains("Medium event loop state")); 52 | assertTrue(dump.contains("Closed: false")); 53 | assertTrue(dump.contains("Closing: false")); 54 | assertTrue(dump.contains("Lifecycle: STOPPED")); 55 | assertTrue(dump.contains("Thread state: ")); 56 | } 57 | } 58 | 59 | @Test 60 | void testCanRenderUnstartedMediumEventLoop() { 61 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "foobar", Pauser.sleepy(), true, "any")) { 62 | final String dump = EventLoopStateRenderer.INSTANCE.render("Medium", mediumEventLoop); 63 | Jvm.startup().on(EventLoopStateRendererTest.class, dump); 64 | assertTrue(dump.contains("Medium event loop state")); 65 | assertTrue(dump.contains("Closed: false")); 66 | assertTrue(dump.contains("Closing: false")); 67 | assertTrue(dump.contains("Lifecycle: NEW")); 68 | } 69 | } 70 | 71 | @Test 72 | void testCanRenderMonitorEventLoop() { 73 | try (final MonitorEventLoop monitorEventLoop = new MonitorEventLoop(null, Pauser.sleepy())) { 74 | monitorEventLoop.start(); 75 | while (!monitorEventLoop.isAlive()) { 76 | Jvm.pause(10); 77 | } 78 | final String dump = EventLoopStateRenderer.INSTANCE.render("Monitor", monitorEventLoop); 79 | Jvm.startup().on(EventLoopStateRendererTest.class, dump); 80 | assertTrue(dump.contains("Monitor event loop state")); 81 | assertTrue(dump.contains("Closed: false")); 82 | assertTrue(dump.contains("Closing: false")); 83 | assertTrue(dump.contains("Lifecycle: STARTED")); 84 | } 85 | } 86 | 87 | @Test 88 | void testCanRenderEventGroup() { 89 | try (final EventLoop eventGroup = EventGroup.builder().build()) { 90 | eventGroup.start(); 91 | while (!eventGroup.isAlive()) { 92 | Jvm.pause(10); 93 | } 94 | final String dump = EventLoopStateRenderer.INSTANCE.render("EG", eventGroup); 95 | Jvm.startup().on(EventLoopStateRendererTest.class, dump); 96 | assertTrue(dump.contains("EG event loop state")); 97 | assertTrue(dump.contains("Closed: false")); 98 | assertTrue(dump.contains("Closing: false")); 99 | assertTrue(dump.contains("Lifecycle: STARTED")); 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/docs/thread-security-review.adoc: -------------------------------------------------------------------------------- 1 | = Chronicle Threads Security Review 2 | :toc: 3 | :sectnums: 4 | :lang: en-GB 5 | 6 | == Handler Admission and Privilege Escalation 7 | 8 | Why :: 9 | Handlers execute with the full privileges of the hosting JVM; untrusted code can compromise sensitive data paths. 10 | 11 | Core risks :: 12 | * Malicious handler registration at runtime via exposed management endpoints (THR-FN-004). 13 | * Unsandboxed handlers accessing shared mutable state or credentials. 14 | * Reflection-based injection of handlers that bypass intended builder configuration (THR-FN-001). 15 | 16 | Mitigations :: 17 | * Restrict handler installation to trusted bootstrap code paths; gate dynamic registration behind authentication and authorisation. 18 | * Use code reviews and static analysis to enforce least-privilege principles within handlers. 19 | * Log handler class names and source artefacts during registration for audit trails. 20 | 21 | Review hot-spots :: 22 | * Deployment scripts that allow arbitrary classpath extensions. 23 | * OSGi or plugin frameworks injecting handlers dynamically. 24 | 25 | == Affinity and Resource Isolation 26 | 27 | Why :: 28 | Incorrect core binding can leak workload information across tenants or undermine performance isolation. 29 | 30 | Core risks :: 31 | * Shared core usage permits timing side channels between sensitive workloads. 32 | * NUMA misalignment causes cross-node memory access patterns exposing high-resolution timing data (THR-FN-017). 33 | * System properties overridden by untrusted inputs altering affinity strings (THR-OPS-023). 34 | 35 | Mitigations :: 36 | * Validate affinity strings against an approved list before instantiating `EventGroup` builders (THR-FN-015). 37 | * Store affinity selections in configuration repositories with change control. 38 | * Monitor actual thread-to-core bindings via OS tooling (e.g., `taskset`, `ps -Lo pid,psr`) and alert on drift. 39 | 40 | Review hot-spots :: 41 | * Container orchestrators with relaxed CPU quotas. 42 | * Multi-tenant hosts lacking hardware partitioning. 43 | 44 | == Monitoring and Telemetry Integrity 45 | 46 | Why :: 47 | Accurate telemetry is essential for detecting anomalous behaviour and limit breaches. 48 | 49 | Core risks :: 50 | * Attackers disable loop-block monitoring through system properties (THR-OPS-020). 51 | * Log tampering obscures stack traces that evidence suspicious handler execution times (THR-NF-O-019). 52 | * Telemetry collectors overloaded by attacker-generated events, leading to blind spots (THR-NF-O-021). 53 | 54 | Mitigations :: 55 | * Lock down JVM arguments in production; apply checksum or signature validation to launch scripts. 56 | * Forward critical monitor events to secure log aggregation platforms with tamper detection. 57 | * Rate-limit telemetry ingestion and validate payload sizes from handlers publishing metrics. 58 | 59 | Review hot-spots :: 60 | * Support run-books that recommend disabling monitors during troubleshooting. 61 | * Nodes operating with reduced logging due to storage constraints. 62 | 63 | == Shutdown and Resource Hygiene 64 | 65 | Why :: 66 | Handlers often manage off-heap or file-backed resources via Chronicle Core abstractions; improper shutdown can leak descriptors or expose data. 67 | 68 | Core risks :: 69 | * `EventGroup` instances left open, keeping sensitive files mapped (THR-FN-002). 70 | * Shutdown hooks overridden by untrusted code, preventing orderly release (THR-OPS-025). 71 | * Race conditions during shutdown causing inconsistent state for dependent services. 72 | 73 | Mitigations :: 74 | * Apply Chronicle Core's `ReferenceCounted` policies, ensuring handlers close dependent resources during loop shutdown. 75 | * Harden shutdown hook registration; disallow multiple components from mutating the same hook. 76 | * Capture and audit shutdown logs for every production cycle. 77 | 78 | Review hot-spots :: 79 | * Handlers that interact with Chronicle Queue or Chronicle Map without corresponding close semantics. 80 | * Scripted restarts that do not wait for `EventGroup.close()` completion. 81 | 82 | == Supply Chain and Dependency Considerations 83 | 84 | Why :: 85 | Chronicle Threads relies on Chronicle Core and Affinity; vulnerabilities propagate through these dependencies. 86 | 87 | Core risks :: 88 | * Outdated dependencies lacking recent security patches or mitigations. 89 | * Misaligned versions introducing behavioural regressions in pauser or affinity handling. 90 | 91 | Mitigations :: 92 | * Track dependency versions in BOM files; enforce minimum patch levels aligned with security advisories. 93 | * Execute dependency-update dry runs in staging to validate core functionality and performance targets. 94 | * Subscribe to Chronicle Software security bulletins and integrate alerts into incident response procedures. 95 | 96 | Review hot-spots :: 97 | * Custom forks of Chronicle libraries. 98 | * Environments that block outbound network access, delaying vulnerability scanning updates. 99 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/YieldingPauser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import org.jetbrains.annotations.NotNull; 8 | 9 | import java.util.concurrent.TimeUnit; 10 | import java.util.concurrent.TimeoutException; 11 | 12 | /** 13 | * Pauser that spins for a fixed number of calls and then yields. 14 | * 15 | *

It consumes less CPU than {@link BusyPauser} yet avoids the sleeping 16 | * stage used by {@link LongPauser}. Use it when short bursts of activity are 17 | * expected but the thread must remain responsive.

18 | */ 19 | public class YieldingPauser implements TimingPauser { 20 | final int minBusy; 21 | int count = 0; 22 | private long timePaused = 0; 23 | private long countPaused = 0; 24 | private long yieldStart = 0; 25 | private long timeOutStart = Long.MAX_VALUE; 26 | 27 | /** 28 | * @param minBusy number of {@link #pause()} calls to spin before yielding. 29 | * A value of {@code 0} yields immediately. 30 | */ 31 | public YieldingPauser(int minBusy) { 32 | this.minBusy = minBusy; 33 | } 34 | 35 | @Override 36 | public void reset() { 37 | checkYieldTime(); 38 | count = 0; 39 | timeOutStart = Long.MAX_VALUE; 40 | } 41 | 42 | /** 43 | * Increments an internal counter and either spins or yields. 44 | * 45 | *

While the count is below {@code minBusy} a safepoint is executed and 46 | * the method returns. Once the threshold is passed the thread yields and the 47 | * time spent yielding is measured.

48 | */ 49 | @Override 50 | public void pause() { 51 | ++count; 52 | if (count < minBusy) { 53 | ++countPaused; 54 | Jvm.safepoint(); 55 | return; 56 | } 57 | yield0(); 58 | checkYieldTime(); 59 | } 60 | 61 | /** 62 | * Variant of {@link #pause()} that fails after the given timeout. 63 | * 64 | *

The first call records the start time. Once yielding begins the elapsed 65 | * time is checked and a {@link TimeoutException} is thrown when the limit is 66 | * exceeded.

67 | * 68 | * @param timeout maximum time to wait 69 | * @param timeUnit unit of the timeout 70 | * @throws TimeoutException if the elapsed time passes the timeout 71 | */ 72 | @Override 73 | public void pause(long timeout, @NotNull TimeUnit timeUnit) throws TimeoutException { 74 | if (timeOutStart == Long.MAX_VALUE) 75 | timeOutStart = System.nanoTime(); 76 | 77 | ++count; 78 | if (count < minBusy) 79 | return; 80 | yield0(); 81 | 82 | if (System.nanoTime() - timeOutStart > timeUnit.toNanos(timeout)) 83 | throw new TimeoutException(); 84 | checkYieldTime(); 85 | } 86 | 87 | /** 88 | * Records and accumulates the duration of yielding if any, and resets the start time of yielding. 89 | */ 90 | void checkYieldTime() { 91 | if (yieldStart > 0) { 92 | long time = System.nanoTime() - yieldStart; 93 | timePaused += time; 94 | countPaused++; 95 | yieldStart = 0; 96 | } 97 | } 98 | 99 | /** 100 | * Initiates or continues a yielding phase for this pauser. 101 | */ 102 | void yield0() { 103 | if (yieldStart == 0) 104 | yieldStart = System.nanoTime(); 105 | Thread.yield(); 106 | } 107 | 108 | @Override 109 | public void unpause() { 110 | // Do nothing 111 | } 112 | 113 | /** 114 | * Returns the total time this pauser has spent yielding, measured in milliseconds. 115 | * 116 | * @return total yielding time in milliseconds 117 | */ 118 | @Override 119 | public long timePaused() { 120 | return timePaused / 1_000_000; 121 | } 122 | 123 | /** 124 | * Returns the number of times this pauser has been activated, including both busy-wait and yield iterations. 125 | * 126 | * @return the total number of pause activations 127 | */ 128 | @Override 129 | public long countPaused() { 130 | return countPaused; 131 | } 132 | 133 | /** 134 | * Provides a string representation of this pauser, which varies based on the {@code minBusy} configuration. 135 | * 136 | * @return a string representation identifying the mode and settings of this pauser 137 | */ 138 | @Override 139 | public String toString() { 140 | if (minBusy == 2) 141 | return "PauserMode.yielding"; 142 | return "YieldingPauser{" + 143 | "minBusy=" + minBusy + 144 | '}'; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/ThreadMonitors.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.threads.internal.ThreadMonitorHarness; 8 | import net.openhft.chronicle.threads.internal.ThreadsThreadHolder; 9 | import org.jetbrains.annotations.NotNull; 10 | 11 | import java.util.function.BooleanSupplier; 12 | import java.util.function.Consumer; 13 | import java.util.function.LongSupplier; 14 | import java.util.function.Supplier; 15 | 16 | public enum ThreadMonitors { 17 | ; // none 18 | 19 | /** 20 | * Create a monitor for a single thread. 21 | * 22 | * @param description text used in log messages 23 | * @param timeLimit threshold in nanoseconds before a stack trace is logged 24 | * @param timeSupplier supplies the current time, usually {@link System#nanoTime} 25 | * @param threadSupplier returns the thread to observe 26 | * @return a monitor handler for installation on a monitor loop 27 | */ 28 | public static ThreadMonitor forThread(String description, long timeLimit, 29 | LongSupplier timeSupplier, 30 | Supplier threadSupplier) { 31 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description, 32 | timeLimit, timeSupplier, threadSupplier, () -> true, perfOn())); 33 | } 34 | 35 | @NotNull 36 | private static Consumer perfOn() { 37 | return msg -> Jvm.perf().on(ThreadMonitor.class, msg); 38 | } 39 | 40 | /** 41 | * Variant of {@link #forThread(String, long, LongSupplier, Supplier)} that 42 | * allows the caller to control logging. 43 | * 44 | * @param description text used in log messages 45 | * @param timeLimit threshold in nanoseconds before a stack trace is logged 46 | * @param timeSupplier supplies the current time 47 | * @param threadSupplier returns the thread to observe 48 | * @param logEnabled predicate controlling whether logging occurs 49 | * @param logConsumer receives the formatted log message 50 | * @return a monitor handler for installation on a monitor loop 51 | */ 52 | public static ThreadMonitor forThread(String description, long timeLimit, 53 | LongSupplier timeSupplier, 54 | Supplier threadSupplier, 55 | BooleanSupplier logEnabled, 56 | Consumer logConsumer) { 57 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description, 58 | timeLimit, timeSupplier, threadSupplier, logEnabled, logConsumer)); 59 | } 60 | 61 | /** 62 | * Create a monitor aimed at a service thread. 63 | * 64 | * @param description text used in log messages 65 | * @param timeLimit threshold in nanoseconds before a stack trace is logged 66 | * @param timeSupplier supplies the current time 67 | * @param threadSupplier returns the thread to observe 68 | * @return a monitor handler for installation on a monitor loop 69 | */ 70 | public static ThreadMonitor forServices(String description, long timeLimit, 71 | LongSupplier timeSupplier, 72 | Supplier threadSupplier) { 73 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description, 74 | timeLimit, timeSupplier, threadSupplier, () -> true, perfOn())); 75 | } 76 | 77 | /** 78 | * Variant of {@link #forServices(String, long, LongSupplier, Supplier)} with 79 | * caller controlled logging. 80 | * 81 | * @param description text used in log messages 82 | * @param timeLimit threshold in nanoseconds before a stack trace is logged 83 | * @param timeSupplier supplies the current time 84 | * @param threadSupplier returns the thread to observe 85 | * @param logEnabled predicate controlling whether logging occurs 86 | * @param logConsumer receives the formatted log message 87 | * @return a monitor handler for installation on a monitor loop 88 | */ 89 | public static ThreadMonitor forServices(String description, long timeLimit, 90 | LongSupplier timeSupplier, 91 | Supplier threadSupplier, 92 | BooleanSupplier logEnabled, 93 | Consumer logConsumer) { 94 | return new ThreadMonitorHarness(new ThreadsThreadHolder(description, 95 | timeLimit, timeSupplier, threadSupplier, logEnabled, logConsumer)); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/EventGroupStressTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.OS; 8 | import net.openhft.chronicle.core.io.Closeable; 9 | import net.openhft.chronicle.core.threads.EventHandler; 10 | import net.openhft.chronicle.core.threads.HandlerPriority; 11 | import net.openhft.chronicle.testframework.process.JavaProcessBuilder; 12 | import org.jetbrains.annotations.NotNull; 13 | import org.junit.jupiter.api.Disabled; 14 | import org.junit.jupiter.api.Test; 15 | import org.junit.jupiter.api.Timeout; 16 | 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.concurrent.ThreadLocalRandom; 20 | import java.util.concurrent.TimeUnit; 21 | import java.util.stream.IntStream; 22 | 23 | import static org.junit.jupiter.api.Assumptions.assumeFalse; 24 | 25 | /** 26 | * Spawns many event groups across several processes to check that a large 27 | * number of event loops can be created and closed without exhausting the CPU. 28 | */ 29 | class EventGroupStressTest extends ThreadsTestCommon { 30 | 31 | private static final int NUM_PROCESSES = 10; 32 | private static final int NUM_GROUPS_PER_PROCESS = 20; 33 | 34 | @Disabled("https://github.com/OpenHFT/Chronicle-Threads/issues/186") 35 | @Test 36 | @Timeout(30) 37 | void canOverloadTheCPUWithEventGroupsSafely() { 38 | assumeFalse(OS.isWindows()); 39 | IntStream.range(0, NUM_PROCESSES).mapToObj(i -> JavaProcessBuilder.create(EventGroupStarterProcess.class) 40 | .withProgramArguments(String.valueOf(NUM_GROUPS_PER_PROCESS)) 41 | .start()) 42 | .forEach(process -> { 43 | try { 44 | if (!process.waitFor(10, TimeUnit.SECONDS) || process.exitValue() != 0) { 45 | Jvm.error().on(EventGroupStressTest.class, "Process didn't end or ended in error"); 46 | JavaProcessBuilder.printProcessOutput("event group getter", process); 47 | } 48 | } catch (InterruptedException e) { 49 | Jvm.error().on(EventGroupStressTest.class, "Interrupted waiting for process to end"); 50 | Thread.currentThread().interrupt(); 51 | } 52 | }); 53 | } 54 | 55 | static class EventGroupStarterProcess { 56 | 57 | public static void main(String[] args) { 58 | int groupsToStart = Integer.parseInt(args[0]); 59 | List eventGroups = new ArrayList<>(); 60 | List handlers = new ArrayList<>(); 61 | try { 62 | for (int j = 0; j < groupsToStart; j++) { 63 | final EventGroup eventGroup = EventGroup.builder().withBinding("any").build(); 64 | final TestEventHandler beforeStartHandler = new TestEventHandler(); 65 | eventGroup.addHandler(beforeStartHandler); 66 | eventGroup.start(); 67 | final TestEventHandler afterStartHandler = new TestEventHandler(); 68 | eventGroup.addHandler(afterStartHandler); 69 | handlers.add(beforeStartHandler); 70 | handlers.add(afterStartHandler); 71 | eventGroups.add(eventGroup); 72 | } 73 | while (!handlers.stream().allMatch(handler -> handler.loopStarted)) { 74 | Jvm.pause(100); 75 | } 76 | } finally { 77 | eventGroups.forEach(Closeable::closeQuietly); 78 | } 79 | } 80 | } 81 | 82 | static class TestEventHandler implements EventHandler { 83 | 84 | private static final HandlerPriority[] PRIORITIES = new HandlerPriority[]{ 85 | HandlerPriority.HIGH, HandlerPriority.MEDIUM, HandlerPriority.REPLICATION, HandlerPriority.TIMER, 86 | HandlerPriority.BLOCKING, HandlerPriority.DAEMON 87 | }; 88 | 89 | private final HandlerPriority priority; 90 | private volatile boolean loopStarted = false; 91 | 92 | TestEventHandler() { 93 | this.priority = PRIORITIES[ThreadLocalRandom.current().nextInt(PRIORITIES.length)]; 94 | } 95 | 96 | @Override 97 | public void loopStarted() { 98 | loopStarted = true; 99 | } 100 | 101 | @Override 102 | public boolean action() { 103 | // Does nothing 104 | return false; 105 | } 106 | 107 | @Override 108 | public @NotNull HandlerPriority priority() { 109 | return priority; 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/docs/thread-architecture-overview.adoc: -------------------------------------------------------------------------------- 1 | = Chronicle Threads Architecture Overview 2 | :toc: 3 | :sectnums: 4 | :lang: en-GB 5 | 6 | == Purpose 7 | 8 | This guide explains how Chronicle Threads composes event loops, handlers, pausers, and monitoring into a cohesive runtime so that engineers can reason about placement, affinity, and operational behaviour. 9 | It complements the functional catalogue in `project-requirements.adoc` and provides concrete design cues for solution architects. 10 | 11 | == Event Loop Topologies 12 | 13 | Chronicle Threads organises work into named `EventLoop` instances that the `EventGroup` manages (THR-FN-001). 14 | Each loop is single-threaded for handler execution (THR-FN-006) and is categorised by handler priority (THR-FN-005). 15 | 16 | .... 17 | EventGroup 18 | | 19 | +-- CoreLoop[HIGH|MEDIUM] ---> fast path handlers (trading logic, matching) 20 | | 21 | +-- BlockingPool[BLOCKING] --> dedicated threads for I/O or storage waits 22 | | 23 | +-- TimerLoop[TIMER] ------> scheduled maintenance and time-based work 24 | | 25 | +-- MonitorLoop[MONITOR] -> observes loop-block latency and pauser metrics 26 | .... 27 | 28 | Handlers attach to the loop whose priority matches their declared `HandlerPriority`. 29 | An `EventGroup` materialises blocking and monitor loops only when required. 30 | Applications can deploy multiple `EventGroup` instances in the same JVM to isolate subsystems whilst sharing pauser implementations. 31 | 32 | == Handler Lifecycle and Serial Execution 33 | 34 | Handlers are added at runtime via `EventGroup.addHandler()` (THR-FN-004). 35 | The loop invokes each handler serially, ensuring stateful logic can remain lock-free (THR-FN-006). 36 | The handler signals its progress via the boolean return value of `action()` (THR-FN-007). 37 | Self-removal uses `InvalidEventHandlerException` (THR-FN-008); the loop removes the handler, logs through the standard `Jvm` channel, and continues running (THR-NF-O-009). 38 | 39 | Handlers should bound their execution time so that monitor loops can flag outliers reliably (THR-NF-O-018). 40 | Long-running work belongs on the `BLOCKING` priority where independent threads handle it. 41 | When reconfiguring a live loop, call `EventLoop.addHandler()` on the owning loop thread or rely on the concurrency-safe wrappers provided by `EventGroup`. 42 | 43 | == Pauser Strategy and Scheduler Interaction 44 | 45 | Pausers implement the idle strategy for each loop and are configured via builders or per-loop overrides (THR-FN-010, THR-FN-011). 46 | Adaptive pausers expose tuning parameters that balance busy-spin and sleeping phases (THR-FN-012) while exposing metrics for observability (THR-NF-O-013, THR-NF-O-021). 47 | 48 | * `BUSY` / `TIMED_BUSY`: Bind to isolated cores, targeting nanosecond wake-up latency (THR-DOC-016). 49 | * `BALANCED` / `SLEEPY`: Combine spin, yield, and park for mixed workloads. 50 | * Custom: Provide a bespoke `Pauser` for domain-specific throttling. 51 | 52 | Hot paths avoid allocations (THR-NF-P-014) so a pauser change cannot introduce garbage. 53 | Each loop records the time spent paused, supporting utilisation diagnostics. 54 | 55 | == Affinity and NUMA Alignment 56 | 57 | Affinity strings supplied via builders control how loops bind to hardware threads (THR-FN-015). 58 | They accept the Chronicle Affinity syntax, including NUMA-aware layouts (THR-FN-017). 59 | Example: 60 | 61 | ---- 62 | EventGroup eg = EventGroup.builder() 63 | .withName("risk-eg") 64 | .withBinding("0,2-3") 65 | .build(); 66 | ---- 67 | 68 | * `0` binds the primary high-priority loop to core 0. 69 | * `2-3` pins additional loops (e.g., MONITOR or BLOCKING) across cores 2 and 3. 70 | 71 | When multiple `EventGroup` instances coexist, coordinate bindings to avoid core contention. 72 | Document selected affinities alongside deployment manifests so operators can validate CPU isolation. 73 | 74 | == Monitoring Plane 75 | 76 | Each `EventGroup` provisions a monitor loop that samples execution times and resets pausers at configurable intervals (THR-NF-O-018, THR-NF-O-019, THR-OPS-020). 77 | The monitor loop: 78 | 79 | * Measures handler invocation duration, logging stack traces for breaches. 80 | * Publishes pauser metrics through configured `PauserMonitorFactory` hooks. 81 | * Responds to system properties that disable or tune monitoring (THR-OPS-023). 82 | 83 | The monitoring loop is not latency-critical but must keep pace with the core loops to avoid stale diagnostics. 84 | Ensure JVM logging levels capture WARN messages from monitor handlers in production. 85 | 86 | == Integration Touchpoints 87 | 88 | Chronicle Threads commonly underpins Chronicle Queue tailers, Chronicle Map maintenance tasks, and application-specific pipelines. 89 | When integrating: 90 | 91 | * Use `net.openhft.chronicle.core.io.Closeable` semantics to align handler lifecycle with queue appenders or tailers. 92 | * Combine telemetry exports with the monitor loop to funnel utilisation metrics to the estate-wide monitoring system. 93 | * Align handler priorities with data criticality so that core loops handle order flow while auxiliary loops manage persistence, replay, or housekeeping. 94 | 95 | Refer to `README.adoc` for code-level examples and to the operational controls document for deployment-time safeguards. 96 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/internal/ThreadsThreadHolder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.affinity.Affinity; 7 | import net.openhft.chronicle.core.Jvm; 8 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 9 | import net.openhft.chronicle.threads.ThreadHolder; 10 | 11 | import java.util.function.BooleanSupplier; 12 | import java.util.function.Consumer; 13 | import java.util.function.LongSupplier; 14 | import java.util.function.Supplier; 15 | 16 | /** 17 | * Helper used by {@link ThreadMonitorHarness} to monitor a service thread. 18 | *

19 | * The harness polls the thread and the supplied time source. When the 20 | * thread appears to be blocked for longer than the configured limit the 21 | * stack trace is logged via {@link #logConsumer} if {@link #logEnabled} is 22 | * true. 23 | *

24 | */ 25 | public class ThreadsThreadHolder implements ThreadHolder { 26 | private final String description; 27 | private final long timeLimitNS; 28 | private final LongSupplier timeSupplier; 29 | private final Supplier threadSupplier; 30 | /** 31 | * Allows logging to be enabled or disabled at run time. 32 | */ 33 | private final BooleanSupplier logEnabled; 34 | /** 35 | * Receives formatted log messages. 36 | */ 37 | private final Consumer logConsumer; 38 | private long lastTime = 0; 39 | 40 | /** 41 | * Create an instance configured to monitor the supplied thread. 42 | * 43 | * @param description text appended to log messages 44 | * @param timeLimitNS threshold in nanoseconds before logging occurs 45 | * @param timeSupplier provides the current time 46 | * @param threadSupplier supplies the thread to observe 47 | * @param logEnabled predicate controlling whether logging happens 48 | * @param logConsumer receives the formatted log message 49 | */ 50 | public ThreadsThreadHolder(String description, long timeLimitNS, LongSupplier timeSupplier, Supplier threadSupplier, BooleanSupplier logEnabled, Consumer logConsumer) { 51 | this.description = description; 52 | this.timeLimitNS = timeLimitNS; 53 | this.timeSupplier = timeSupplier; 54 | this.threadSupplier = threadSupplier; 55 | this.logEnabled = logEnabled; 56 | this.logConsumer = logConsumer; 57 | } 58 | 59 | @Override 60 | public boolean isAlive() throws InvalidEventHandlerException { 61 | return threadSupplier.get().isAlive(); 62 | } 63 | 64 | @Override 65 | public void resetTimers() { 66 | // nothing to do. 67 | } 68 | 69 | @Override 70 | public void reportFinished() { 71 | // assumes it never dies?? 72 | } 73 | 74 | @Override 75 | public long startedNS() { 76 | return timeSupplier.getAsLong(); 77 | } 78 | 79 | @Override 80 | public void monitorThreadDelayed(long actionCallDelayNS) { 81 | logConsumer.accept("Monitor thread for " + getName() + " cpuId: " + Affinity.getCpu() + " was delayed by " + actionCallDelayNS / 100000 / 10.0 + " ms"); 82 | } 83 | 84 | @Override 85 | public boolean shouldLog(long nowNS) { 86 | return nowNS - startedNS() > timeLimitNS 87 | && logEnabled.getAsBoolean(); 88 | } 89 | 90 | @Override 91 | public void dumpThread(long startedNS, long nowNS) { 92 | long latencyNS = nowNS - startedNS; 93 | Thread thread = threadSupplier.get(); 94 | 95 | String type = (startedNS == lastTime) ? "re-reporting" : "new report"; 96 | StringBuilder out = new StringBuilder() 97 | .append("THIS IS NOT AN ERROR, but a profile of the thread, ").append(description) 98 | .append(" thread ").append(thread.getName()) 99 | .append(" interrupted ").append(thread.isInterrupted()) 100 | .append(" blocked for ").append(nanosecondsToMillisWithTenthsPrecision(latencyNS)) 101 | .append(" ms. ").append(type); 102 | Jvm.trimStackTrace(out, thread.getStackTrace()); 103 | logConsumer.accept(out.toString()); 104 | 105 | lastTime = startedNS; 106 | } 107 | 108 | /** 109 | * Results in a double that retains only it's 1/10ths precision 110 | * 111 | * @param timeInNS The time in nanoseconds 112 | * @return The time in milliseconds represented as a float with limited precision 113 | */ 114 | @SuppressWarnings(/* we mean to do the integer division first */ 115 | {"java:S2184", "IntegerDivisionInFloatingPointContext"}) 116 | static double nanosecondsToMillisWithTenthsPrecision(long timeInNS) { 117 | return (timeInNS / 100_000) / 10d; 118 | } 119 | 120 | @Override 121 | public long timingToleranceNS() { 122 | return timeLimitNS + timingErrorNS(); 123 | } 124 | 125 | protected long timingErrorNS() { 126 | return TIMING_ERROR; 127 | } 128 | 129 | @Override 130 | public String getName() { 131 | Thread thread = threadSupplier.get(); 132 | return thread == null ? "null" : thread.getName(); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/ThreadsTestCommon.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.io.AbstractCloseable; 8 | import net.openhft.chronicle.core.io.AbstractReferenceCounted; 9 | import net.openhft.chronicle.core.onoes.ExceptionKey; 10 | import net.openhft.chronicle.core.onoes.Slf4jExceptionHandler; 11 | import net.openhft.chronicle.core.threads.CleaningThread; 12 | import net.openhft.chronicle.core.threads.ThreadDump; 13 | import net.openhft.chronicle.core.time.SystemTimeProvider; 14 | import org.junit.jupiter.api.AfterEach; 15 | import org.junit.jupiter.api.BeforeEach; 16 | 17 | import java.util.LinkedHashMap; 18 | import java.util.Map; 19 | import java.util.function.Predicate; 20 | 21 | import static java.lang.String.format; 22 | import static org.junit.jupiter.api.Assertions.fail; 23 | 24 | public class ThreadsTestCommon { 25 | private final Map, String> ignoreExceptions = new LinkedHashMap<>(); 26 | private Map, String> expectedExceptions = new LinkedHashMap<>(); 27 | private ThreadDump threadDump; 28 | private Map exceptions; 29 | 30 | @BeforeEach 31 | public void enableReferenceTracing() { 32 | AbstractReferenceCounted.enableReferenceTracing(); 33 | } 34 | 35 | private void assertReferencesReleased() { 36 | AbstractReferenceCounted.assertReferencesReleased(); 37 | } 38 | 39 | @BeforeEach 40 | public void threadDump() { 41 | threadDump = new ThreadDump(); 42 | } 43 | 44 | private void checkThreadDump() { 45 | threadDump.assertNoNewThreads(); 46 | } 47 | 48 | @BeforeEach 49 | public void recordExceptions() { 50 | exceptions = Jvm.recordExceptions(); 51 | } 52 | 53 | void ignoreException(String message) { 54 | ignoreException(k -> contains(k.message, message) || (k.throwable != null && k.throwable.getMessage().contains(message)), message); 55 | } 56 | 57 | private static boolean contains(String text, String message) { 58 | return text != null && text.contains(message); 59 | } 60 | 61 | void expectException(String message) { 62 | expectException(k -> contains(k.message, message) || (k.throwable != null && contains(k.throwable.getMessage(), message)), message); 63 | } 64 | 65 | private void ignoreException(Predicate predicate, String description) { 66 | ignoreExceptions.put(predicate, description); 67 | } 68 | 69 | private void expectException(Predicate predicate, String description) { 70 | expectedExceptions.put(predicate, description); 71 | } 72 | 73 | private void checkExceptions() { 74 | for (Map.Entry, String> expectedException : expectedExceptions.entrySet()) { 75 | if (!exceptions.keySet().removeIf(expectedException.getKey())) 76 | throw new AssertionError("No error for " + expectedException.getValue()); 77 | } 78 | expectedExceptions.clear(); 79 | for (Map.Entry, String> expectedException : ignoreExceptions.entrySet()) { 80 | if (!exceptions.keySet().removeIf(expectedException.getKey())) 81 | Slf4jExceptionHandler.DEBUG.on(getClass(), "No error for " + expectedException.getValue()); 82 | } 83 | ignoreExceptions.clear(); 84 | for (String msg : "Shrinking ,Allocation of , ms to add mapping for ,jar to the classpath, ms to pollDiskSpace for , us to linearScan by position from ,File released ,Overriding roll length from existing metadata, was 3600000, overriding to 86400000 ".split(",")) { 85 | exceptions.keySet().removeIf(e -> e.message.contains(msg)); 86 | } 87 | if (Jvm.hasException(exceptions)) { 88 | Jvm.dumpException(exceptions); 89 | Jvm.resetExceptionHandlers(); 90 | throw new AssertionError(exceptions.keySet()); 91 | } 92 | } 93 | 94 | void assertExceptionThrown(String message) { 95 | String description = format("No exception found containing string `%s`", message); 96 | assertExceptionThrown(k -> k.message.contains(message) || (k.throwable != null && k.throwable.getMessage().contains(message)), description); 97 | } 98 | 99 | private void assertExceptionThrown(Predicate predicate, String description) { 100 | for (ExceptionKey key : exceptions.keySet()) { 101 | if (predicate.test(key)) { 102 | return; 103 | } 104 | } 105 | fail(description); 106 | } 107 | 108 | @AfterEach 109 | public void afterChecks() throws InterruptedException { 110 | preAfter(); 111 | SystemTimeProvider.CLOCK = SystemTimeProvider.INSTANCE; 112 | CleaningThread.performCleanup(Thread.currentThread()); 113 | 114 | System.gc(); 115 | AbstractCloseable.waitForCloseablesToClose(1000); 116 | assertReferencesReleased(); 117 | checkThreadDump(); 118 | checkExceptions(); 119 | 120 | tearDown(); 121 | } 122 | 123 | void preAfter() throws InterruptedException { 124 | } 125 | 126 | private void tearDown() { 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/TestEventHandlers.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.threads.EventHandler; 7 | import net.openhft.chronicle.core.threads.EventLoop; 8 | import net.openhft.chronicle.core.threads.HandlerPriority; 9 | import org.jetbrains.annotations.NotNull; 10 | 11 | import java.io.Closeable; 12 | import java.io.IOException; 13 | import java.util.concurrent.atomic.AtomicInteger; 14 | 15 | public class TestEventHandlers { 16 | 17 | /** 18 | * Utility handler used in tests. Counts how many times each lifecycle 19 | * method is invoked. 20 | */ 21 | public static class CountingHandler implements EventHandler, Closeable { 22 | final AtomicInteger loopStartedCalled = new AtomicInteger(); 23 | final AtomicInteger loopFinishedCalled = new AtomicInteger(); 24 | final AtomicInteger actionCalled = new AtomicInteger(); 25 | final AtomicInteger closeCalled = new AtomicInteger(); 26 | final HandlerPriority priority; 27 | EventLoop eventLoop; 28 | 29 | CountingHandler(HandlerPriority priority) { 30 | this.priority = priority; 31 | } 32 | 33 | @Override 34 | public void eventLoop(EventLoop eventLoop) { 35 | this.eventLoop = eventLoop; 36 | } 37 | 38 | public EventLoop eventLoop() { 39 | return eventLoop; 40 | } 41 | 42 | @Override 43 | public @NotNull HandlerPriority priority() { 44 | return priority; 45 | } 46 | 47 | @Override 48 | public void loopStarted() { 49 | loopStartedCalled.incrementAndGet(); 50 | } 51 | 52 | public int loopStartedCalled() { 53 | return loopStartedCalled.get(); 54 | } 55 | 56 | @Override 57 | public boolean action() { 58 | actionCalled.incrementAndGet(); 59 | return false; 60 | } 61 | 62 | public int actionCalled() { 63 | return actionCalled.get(); 64 | } 65 | 66 | @Override 67 | public void loopFinished() { 68 | loopFinishedCalled.incrementAndGet(); 69 | } 70 | 71 | public int loopFinishedCalled() { 72 | return loopFinishedCalled.get(); 73 | } 74 | 75 | @Override 76 | public void close() throws IOException { 77 | closeCalled.incrementAndGet(); 78 | } 79 | 80 | public int closeCalled() { 81 | return closeCalled.get(); 82 | } 83 | } 84 | 85 | public static final String HANDLER_LOOP_STARTED_EXCEPTION_TXT = "Something went wrong in loopStarted!!!"; 86 | public static final String HANDLER_LOOP_FINISHED_EXCEPTION_TXT = "Something went wrong in loopFinished!!!"; 87 | public static final String HANDLER_CLOSE_EXCEPTION_TXT = "Something went wrong in close!!!"; 88 | public static final String HANDLER_EVENT_LOOP_EXCEPTION_TXT = "Something went wrong in set eventLoop!!!"; 89 | private static final String HANDLER_PRIORITY_EXCEPTION_TXT = "Something went wrong in priority!!!"; 90 | 91 | /** 92 | * Handler that throws from selected lifecycle methods so tests can 93 | * exercise error paths in the event loop. 94 | */ 95 | public static class ThrowingHandler extends CountingHandler { 96 | final boolean throwsEventLoop; 97 | final boolean throwsPriority; 98 | final boolean throwsLoopStarted; 99 | final boolean throwsLoopFinished; 100 | final boolean throwsClose; 101 | 102 | ThrowingHandler(HandlerPriority priority, boolean throwsEventLoop, boolean throwsPriority) { 103 | super(priority); 104 | this.throwsEventLoop = throwsEventLoop; 105 | this.throwsPriority = throwsPriority; 106 | if (throwsEventLoop || throwsPriority) { 107 | throwsLoopStarted = false; 108 | throwsLoopFinished = false; 109 | throwsClose = false; 110 | } else { 111 | throwsLoopStarted = true; 112 | throwsLoopFinished = true; 113 | throwsClose = true; 114 | } 115 | } 116 | 117 | @Override 118 | public void eventLoop(EventLoop eventLoop) { 119 | super.eventLoop(eventLoop); 120 | if (throwsEventLoop) { 121 | throw new IllegalStateException(HANDLER_EVENT_LOOP_EXCEPTION_TXT + priority); 122 | } 123 | } 124 | 125 | @Override 126 | public void loopStarted() { 127 | super.loopStarted(); 128 | if (throwsLoopStarted) { 129 | throw new IllegalStateException(HANDLER_LOOP_STARTED_EXCEPTION_TXT + priority); 130 | } 131 | } 132 | 133 | @Override 134 | public void loopFinished() { 135 | super.loopFinished(); 136 | if (throwsLoopFinished) { 137 | throw new IllegalStateException(HANDLER_LOOP_FINISHED_EXCEPTION_TXT + priority); 138 | } 139 | } 140 | 141 | @Override 142 | public void close() throws IOException { 143 | super.close(); 144 | if (throwsClose) { 145 | throw new IllegalStateException(HANDLER_CLOSE_EXCEPTION_TXT + priority); 146 | } 147 | } 148 | 149 | @Override 150 | public @NotNull HandlerPriority priority() { 151 | HandlerPriority result = super.priority(); 152 | if (throwsPriority) { 153 | throw new IllegalStateException(HANDLER_PRIORITY_EXCEPTION_TXT + priority); 154 | } 155 | return result; 156 | } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/internal/ThreadMonitorHarnessTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads.internal; 5 | 6 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 7 | import net.openhft.chronicle.threads.ThreadHolder; 8 | import org.junit.jupiter.api.BeforeEach; 9 | import org.junit.jupiter.api.Test; 10 | import org.junit.jupiter.api.extension.ExtendWith; 11 | import org.mockito.Mock; 12 | import org.mockito.junit.jupiter.MockitoExtension; 13 | 14 | import java.util.concurrent.atomic.AtomicLong; 15 | import java.util.function.LongSupplier; 16 | 17 | import static net.openhft.chronicle.threads.CoreEventLoop.NOT_IN_A_LOOP; 18 | import static org.junit.jupiter.api.Assertions.*; 19 | import static org.mockito.Mockito.*; 20 | 21 | @ExtendWith(MockitoExtension.class) 22 | class ThreadMonitorHarnessTest { 23 | 24 | private static final long TIMING_TOLERANCE_NS = 10_000_000; 25 | 26 | private ThreadMonitorHarness threadMonitorHarness; 27 | 28 | @Mock 29 | private ThreadHolder threadHolder; 30 | @Mock 31 | private LongSupplier timeSupplier; 32 | 33 | @BeforeEach 34 | void setUp() throws InvalidEventHandlerException { 35 | threadMonitorHarness = new ThreadMonitorHarness(threadHolder, timeSupplier); 36 | lenient().when(threadHolder.isAlive()).thenReturn(true); 37 | lenient().when(threadHolder.timingToleranceNS()).thenReturn(TIMING_TOLERANCE_NS); 38 | lenient().when(timeSupplier.getAsLong()).thenReturn(System.nanoTime()); 39 | } 40 | 41 | @Test 42 | void willCallThreadFinishedThenTerminateWhenThreadIsNoLongerAlive() throws InvalidEventHandlerException { 43 | when(threadHolder.isAlive()).thenReturn(false); 44 | 45 | assertThrows(InvalidEventHandlerException.class, () -> threadMonitorHarness.action()); 46 | verify(threadHolder).reportFinished(); 47 | } 48 | 49 | @Test 50 | void willResetTimersOnFirstIteration() throws InvalidEventHandlerException { 51 | when(threadHolder.startedNS()).thenReturn(System.nanoTime()); 52 | 53 | assertFalse(threadMonitorHarness.action()); 54 | 55 | verify(threadHolder).resetTimers(); 56 | } 57 | 58 | @Test 59 | void willAbortCheckingWhenLoopStartedTimeIsZero() throws InvalidEventHandlerException { 60 | when(threadHolder.startedNS()).thenReturn(0L); 61 | 62 | assertFalse(threadMonitorHarness.action()); 63 | 64 | verify(threadHolder, never()).shouldLog(anyLong()); 65 | } 66 | 67 | @Test 68 | void willAbortCheckingWhenLoopStartedTimeIsNotInALoop() throws InvalidEventHandlerException { 69 | when(threadHolder.startedNS()).thenReturn(NOT_IN_A_LOOP); 70 | 71 | assertFalse(threadMonitorHarness.action()); 72 | 73 | verify(threadHolder, never()).shouldLog(anyLong()); 74 | } 75 | 76 | @Test 77 | void willResetTimersWhenLoopStartedTimeHasChanged() throws InvalidEventHandlerException { 78 | AtomicLong loopStartedTime = new AtomicLong(System.nanoTime()); 79 | when(threadHolder.startedNS()).thenAnswer(iom -> loopStartedTime.incrementAndGet()); 80 | 81 | assertFalse(threadMonitorHarness.action()); 82 | assertFalse(threadMonitorHarness.action()); 83 | assertFalse(threadMonitorHarness.action()); 84 | 85 | verify(threadHolder, times(3)).resetTimers(); 86 | } 87 | 88 | @Test 89 | void willNotResetTimersWhenLoopStartedTimeHasNotChanged() throws InvalidEventHandlerException { 90 | when(threadHolder.startedNS()).thenReturn(System.nanoTime()); 91 | 92 | assertFalse(threadMonitorHarness.action()); // this will trigger a reset because it's the first iteration 93 | assertFalse(threadMonitorHarness.action()); 94 | assertFalse(threadMonitorHarness.action()); 95 | 96 | verify(threadHolder, times(1)).resetTimers(); 97 | } 98 | 99 | @Test 100 | void willCallMonitorThreadDelayedWhenDelayIsGreaterThanThreshold() throws InvalidEventHandlerException { 101 | final long firstCallTime = System.nanoTime(); 102 | when(threadHolder.startedNS()).thenReturn(System.nanoTime()); 103 | when(timeSupplier.getAsLong()).thenReturn(firstCallTime); 104 | 105 | // reset timers on first iteration 106 | threadMonitorHarness.action(); 107 | 108 | long actionCallDelayNs = TIMING_TOLERANCE_NS + 1; 109 | 110 | when(timeSupplier.getAsLong()).thenReturn(firstCallTime + actionCallDelayNs); 111 | assertTrue(threadMonitorHarness.action()); 112 | verify(threadHolder).monitorThreadDelayed(actionCallDelayNs); 113 | } 114 | 115 | @Test 116 | void willNotCallDumpThreadWhenShouldNotLog() throws InvalidEventHandlerException { 117 | final long nowTime = System.nanoTime(); 118 | when(threadHolder.startedNS()).thenReturn(System.nanoTime()); 119 | when(timeSupplier.getAsLong()).thenReturn(nowTime); 120 | 121 | // reset timers on first iteration 122 | threadMonitorHarness.action(); 123 | 124 | when(threadHolder.shouldLog(nowTime)).thenReturn(false); 125 | assertFalse(threadMonitorHarness.action()); 126 | verify(threadHolder, never()).dumpThread(anyLong(), anyLong()); 127 | } 128 | 129 | @Test 130 | void willCallDumpThreadWhenShouldLog() throws InvalidEventHandlerException { 131 | final long nowTime = System.nanoTime(); 132 | final long loopStartedTime = System.nanoTime(); 133 | when(threadHolder.startedNS()).thenReturn(loopStartedTime); 134 | when(timeSupplier.getAsLong()).thenReturn(nowTime); 135 | 136 | // reset timers on first iteration 137 | threadMonitorHarness.action(); 138 | 139 | when(threadHolder.shouldLog(anyLong())).thenReturn(true); 140 | assertFalse(threadMonitorHarness.action()); 141 | verify(threadHolder).dumpThread(loopStartedTime, nowTime); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/MilliPauser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import org.jetbrains.annotations.NotNull; 7 | import org.jetbrains.annotations.Nullable; 8 | 9 | import java.util.concurrent.TimeUnit; 10 | import java.util.concurrent.TimeoutException; 11 | import java.util.concurrent.atomic.AtomicBoolean; 12 | import java.util.concurrent.locks.LockSupport; 13 | 14 | /** 15 | * Pauser that waits a fixed number of milliseconds. 16 | *

17 | * The implementation parks the thread with {@link LockSupport#parkNanos(long)} 18 | * so CPU usage stays low. The delay is configured via {@link #pauseTimeMS(long)} 19 | * and can be limited with {@link #minPauseTimeMS(long)}. 20 | */ 21 | public class MilliPauser implements Pauser { 22 | private final AtomicBoolean pausing = new AtomicBoolean(); 23 | private long pauseTimeMS; 24 | private long timePaused = 0; 25 | private long countPaused = 0; 26 | private long pauseUntilMS = 0; 27 | @Nullable 28 | private transient volatile Thread thread = null; 29 | 30 | /** 31 | * Constructs a new {@code MilliPauser} with a specified pause time in milliseconds. 32 | * 33 | * @param pauseTimeMS the pause time for each pause operation, in milliseconds 34 | */ 35 | public MilliPauser(long pauseTimeMS) { 36 | this.pauseTimeMS = pauseTimeMS; 37 | } 38 | 39 | /** 40 | * Sets the delay for future pauses. 41 | * 42 | * @param pauseTimeMS pause duration in milliseconds 43 | * @return this instance for chaining 44 | */ 45 | public MilliPauser pauseTimeMS(long pauseTimeMS) { 46 | this.pauseTimeMS = pauseTimeMS; 47 | return this; 48 | } 49 | 50 | /** 51 | * Reduces the delay if the supplied value is lower. 52 | * Always enforces a minimum of one millisecond. 53 | * 54 | * @param pauseTimeMS proposed minimum pause in milliseconds 55 | * @return this instance for chaining 56 | */ 57 | public MilliPauser minPauseTimeMS(long pauseTimeMS) { 58 | this.pauseTimeMS = Math.min(this.pauseTimeMS, pauseTimeMS); 59 | if (this.pauseTimeMS < 1) 60 | this.pauseTimeMS = 1; 61 | return this; 62 | } 63 | 64 | /** 65 | * Retrieves the current pause time in milliseconds. 66 | * 67 | * @return the pause time in milliseconds 68 | */ 69 | public long pauseTimeMS() { 70 | return pauseTimeMS; 71 | } 72 | 73 | @Override 74 | public void reset() { 75 | pauseUntilMS = 0; 76 | } 77 | 78 | /** 79 | * Pauses the current thread for the configured duration using millisecond precision. 80 | */ 81 | @Override 82 | public void pause() { 83 | doPauseMS(pauseTimeMS); 84 | } 85 | 86 | /** 87 | * Start an asynchronous pause for the configured delay. 88 | * The call returns at once and {@link #asyncPausing()} can be polled. 89 | */ 90 | @Override 91 | public void asyncPause() { 92 | pauseUntilMS = System.currentTimeMillis() + pauseTimeMS; 93 | } 94 | 95 | /** 96 | * Test whether the asynchronous pause has expired. 97 | * 98 | * @return {@code true} while the pause should continue 99 | */ 100 | @Override 101 | public boolean asyncPausing() { 102 | return pauseUntilMS > System.currentTimeMillis(); 103 | } 104 | 105 | /** 106 | * Pauses the current thread for a specified duration in milliseconds. 107 | * 108 | * @param timeout the maximum time to pause in the specified {@code timeUnit} 109 | * @param timeUnit the unit of time for {@code timeout} 110 | * @throws TimeoutException if the pause operation is not completed within the specified timeout 111 | */ 112 | @Override 113 | public void pause(long timeout, @NotNull TimeUnit timeUnit) throws TimeoutException { 114 | doPauseMS(timeUnit.toMillis(timeout)); 115 | } 116 | 117 | /** 118 | * Perform the pause for the given delay. 119 | * Uses {@link LockSupport#parkNanos(long)} so the CPU stays mostly idle. 120 | * 121 | * @param delayMS delay in milliseconds 122 | */ 123 | void doPauseMS(long delayMS) { 124 | long start = System.nanoTime(); 125 | thread = Thread.currentThread(); 126 | pausing.set(true); 127 | if (!thread.isInterrupted()) 128 | LockSupport.parkNanos(delayMS * 1_000_000L); 129 | pausing.set(false); 130 | long time = System.nanoTime() - start; 131 | timePaused += time; 132 | countPaused++; 133 | } 134 | 135 | /** 136 | * Unpauses the currently paused thread if it is in a paused state. 137 | */ 138 | @Override 139 | public void unpause() { 140 | final Thread threadSnapshot = this.thread; 141 | if (threadSnapshot != null && pausing.get()) 142 | LockSupport.unpark(threadSnapshot); 143 | } 144 | 145 | /** 146 | * Returns the total time that the thread has been paused, measured in milliseconds. 147 | * 148 | * @return the total paused time in milliseconds 149 | */ 150 | @Override 151 | public long timePaused() { 152 | return timePaused / 1_000_000; 153 | } 154 | 155 | /** 156 | * Returns the number of times this pauser has been activated to pause the thread. 157 | * 158 | * @return the total count of pauses 159 | */ 160 | @Override 161 | public long countPaused() { 162 | return countPaused; 163 | } 164 | 165 | /** 166 | * Provides a string representation of this pauser, identifying the configured pause time. 167 | * 168 | * @return a string representation of this {@code MilliPauser} 169 | */ 170 | @Override 171 | public String toString() { 172 | if (pauseTimeMS == 1) 173 | return "PauserMode.milli"; 174 | return "Pauser.millis(" + pauseTimeMS + ')'; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/EventLoopsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.io.InvalidMarshallableException; 8 | import net.openhft.chronicle.core.io.ThreadingIllegalStateException; 9 | import net.openhft.chronicle.core.onoes.ExceptionHandler; 10 | import net.openhft.chronicle.core.threads.EventHandler; 11 | import net.openhft.chronicle.core.threads.EventLoop; 12 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 13 | import org.junit.jupiter.api.Assertions; 14 | import org.junit.jupiter.api.Test; 15 | import org.junit.jupiter.api.Timeout; 16 | import org.junit.jupiter.params.ParameterizedTest; 17 | import org.junit.jupiter.params.provider.MethodSource; 18 | 19 | import java.util.Arrays; 20 | import java.util.Collections; 21 | import java.util.concurrent.Semaphore; 22 | import java.util.concurrent.atomic.AtomicBoolean; 23 | import java.util.stream.Stream; 24 | 25 | import static org.junit.jupiter.api.Assertions.assertFalse; 26 | import static org.junit.jupiter.api.Assertions.assertTrue; 27 | 28 | /** 29 | * Exercises the helper routines in {@link EventLoops} and the life-cycle 30 | * checks in {@link EventLoop}. 31 | *

32 | * The tests confirm that {@link EventLoops#stopAll(Object...)} accepts 33 | * {@code null} values and waits for every loop to stop. They also verify 34 | * that calling {@link EventLoop#close()} from the loop's own thread triggers 35 | * a {@link ThreadingIllegalStateException}. 36 | */ 37 | class EventLoopsTest extends ThreadsTestCommon { 38 | 39 | @Test 40 | void stopAllCanHandleNulls() { 41 | final StringBuilder sb = new StringBuilder(); 42 | final ExceptionHandler eh = (c, m, t) -> sb.append(m); 43 | ExceptionHandler exceptionHandler = Jvm.warn(); 44 | try { 45 | Jvm.setWarnExceptionHandler(exceptionHandler); 46 | EventLoops.stopAll(null, Arrays.asList(null, null, null), null); 47 | // Should silently accept nulls 48 | assertTrue(sb.toString().isEmpty()); 49 | } finally { 50 | Jvm.setWarnExceptionHandler(exceptionHandler); 51 | } 52 | } 53 | 54 | @Timeout(5_000) 55 | @Test 56 | void stopAllWillBlockUntilTheLastEventLoopStops() { 57 | try (final MediumEventLoop mediumEventLoop = new MediumEventLoop(null, "test", Pauser.balanced(), false, "none"); 58 | final BlockingEventLoop blockingEventLoop = new BlockingEventLoop("blocker")) { 59 | doTest(blockingEventLoop, mediumEventLoop); 60 | } 61 | } 62 | 63 | private static void doTest(BlockingEventLoop blockingEventLoop, MediumEventLoop mediumEventLoop) { 64 | blockingEventLoop.start(); 65 | mediumEventLoop.start(); 66 | 67 | Semaphore semaphore = new Semaphore(0); 68 | blockingEventLoop.addHandler(() -> { 69 | semaphore.acquireUninterruptibly(); 70 | return false; 71 | }); 72 | while (!semaphore.hasQueuedThreads()) { 73 | Jvm.pause(10); 74 | } 75 | 76 | AtomicBoolean stoppedEm = new AtomicBoolean(false); 77 | new Thread(() -> { 78 | EventLoops.stopAll(mediumEventLoop, Arrays.asList(null, Collections.singleton(blockingEventLoop))); 79 | stoppedEm.set(true); 80 | }).start(); 81 | long endTime = System.currentTimeMillis() + 300; 82 | while (System.currentTimeMillis() < endTime) { 83 | assertFalse(stoppedEm.get()); 84 | } 85 | semaphore.release(); 86 | while (System.currentTimeMillis() < endTime) { 87 | if (stoppedEm.get()) { 88 | break; 89 | } 90 | Jvm.pause(1); 91 | } 92 | } 93 | 94 | private static Stream eventLoopsToClose() { 95 | return Stream.of( 96 | new MediumEventLoop(null, "medium", Pauser.balanced(), false, null), 97 | new BlockingEventLoop("blocking") 98 | ); 99 | } 100 | 101 | @ParameterizedTest 102 | @MethodSource("eventLoopsToClose") 103 | void closeFromEventLoopThreadThrowsException(EventLoop el) { 104 | try { 105 | AtomicBoolean exceptionThrownInHandler = new AtomicBoolean(); 106 | AtomicBoolean eventHandlerFinished = new AtomicBoolean(); 107 | 108 | EventHandler closingEventHandler = new EventHandler() { 109 | @Override 110 | public boolean action() throws InvalidEventHandlerException, InvalidMarshallableException { 111 | try { 112 | el.close(); 113 | return true; 114 | } catch (ThreadingIllegalStateException e) { 115 | exceptionThrownInHandler.set(true); 116 | throw InvalidEventHandlerException.reusable(); 117 | } 118 | } 119 | 120 | @Override 121 | public void loopFinished() { 122 | eventHandlerFinished.set(true); 123 | } 124 | }; 125 | 126 | el.addHandler(closingEventHandler); 127 | el.start(); 128 | 129 | long timeoutTime = System.currentTimeMillis() + 500; 130 | while (!exceptionThrownInHandler.get()) { 131 | if (System.currentTimeMillis() > timeoutTime) { 132 | Assertions.fail("Event loop " + el.name() + " didn't " + (eventHandlerFinished.get() ? "throw an exception when attempting to close" : "run in this time")); 133 | } 134 | Jvm.pause(10); 135 | } 136 | 137 | assertTrue(el.isAlive()); 138 | assertFalse(el.isStopped()); 139 | assertFalse(el.isClosed()); 140 | assertFalse(el.isClosing()); 141 | } finally { 142 | el.close(); 143 | 144 | assertTrue(el.isClosed()); 145 | } 146 | 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/AbstractLifecycleEventLoop.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.io.AbstractCloseable; 8 | import net.openhft.chronicle.core.io.ThreadingIllegalStateException; 9 | import net.openhft.chronicle.core.threads.EventHandler; 10 | import net.openhft.chronicle.core.threads.EventLoop; 11 | import org.jetbrains.annotations.NotNull; 12 | 13 | import java.util.concurrent.TimeUnit; 14 | import java.util.concurrent.atomic.AtomicReference; 15 | 16 | /** 17 | * Base implementation that manages the life-cycle of an {@link EventLoop}. 18 | * 19 | *

It extends {@link AbstractCloseable}, and integrates with the 20 | * closeable hierarchy.

21 | * 22 | *

The life-cycle follows {@link EventLoopLifecycle}:

23 | *
    24 | *
  • {@code NEW} – constructed but not running.
  • 25 | *
  • {@code STARTED} – handlers are executing.
  • 26 | *
  • {@code STOPPING} – {@link #stop()} has been requested.
  • 27 | *
  • {@code STOPPED} – all work is finished.
  • 28 | *
29 | * Transitions are linear in that order. Invoking {@code stop()} while in 30 | * {@code NEW} skips {@code STARTED} entirely. Both {@code start()} and 31 | * {@code stop()} are idempotent and {@code stop()} blocks until the loop is 32 | * {@code STOPPED}. 33 | */ 34 | @SuppressWarnings("this-escape") 35 | public abstract class AbstractLifecycleEventLoop extends AbstractCloseable implements EventLoop { 36 | 37 | /** 38 | * After this time, awaitTermination will log an error and return, this is really only so 39 | * tests don't block forever. This time should be kept as "effectively forever". 40 | */ 41 | private static final long AWAIT_TERMINATION_TIMEOUT_MS = TimeUnit.MINUTES.toMillis(5); 42 | private final AtomicReference lifecycle = new AtomicReference<>(EventLoopLifecycle.NEW); 43 | protected final String name; 44 | boolean privateGroup; 45 | 46 | /** 47 | * Create an instance with the supplied name. 48 | *

49 | * The {@link AbstractCloseable} thread ownership check is disabled so the 50 | * loop may be started or stopped from threads other than the creating 51 | * thread. 52 | * 53 | * @param name descriptive name for the loop 54 | */ 55 | protected AbstractLifecycleEventLoop(@NotNull String name) { 56 | this.name = name.replaceAll("/$", ""); 57 | 58 | // event loops operate on dedicated threads but may be closed elsewhere 59 | singleThreadedCheckDisabled(true); 60 | } 61 | 62 | protected String nameWithSlash() { 63 | return withSlash(name); 64 | } 65 | 66 | @Override 67 | public final void start() { 68 | throwExceptionIfClosed(); 69 | 70 | if (lifecycle.compareAndSet(EventLoopLifecycle.NEW, EventLoopLifecycle.STARTED)) { 71 | performStart(); 72 | } 73 | } 74 | 75 | @Override 76 | public final String name() { 77 | return name; 78 | } 79 | 80 | /** 81 | * Perform the concrete start up work. 82 | * Invoked exactly once when the life-cycle moves from 83 | * {@link EventLoopLifecycle#NEW} to {@link EventLoopLifecycle#STARTED}. 84 | */ 85 | protected abstract void performStart(); 86 | 87 | @Override 88 | public final void stop() { 89 | if (lifecycle.compareAndSet(EventLoopLifecycle.NEW, EventLoopLifecycle.STOPPING)) { 90 | performStopFromNew(); 91 | lifecycle.set(EventLoopLifecycle.STOPPED); 92 | } else if (lifecycle.compareAndSet(EventLoopLifecycle.STARTED, EventLoopLifecycle.STOPPING)) { 93 | performStopFromStarted(); 94 | lifecycle.set(EventLoopLifecycle.STOPPED); 95 | } else { 96 | awaitTermination(); 97 | } 98 | } 99 | 100 | /** 101 | * Stop the loop when {@link #stop()} is invoked before it has started. 102 | * Implementations should block until every handler has received 103 | * {@link EventHandler#loopFinished()}. 104 | */ 105 | protected abstract void performStopFromNew(); 106 | 107 | /** 108 | * Stop the loop once it has begun processing. 109 | * Implementations should wait for the current iteration to finish and then 110 | * invoke {@link EventHandler#loopFinished()} on every handler. 111 | */ 112 | protected abstract void performStopFromStarted(); 113 | 114 | /** 115 | * Wait for the loop to reach {@link EventLoopLifecycle#STOPPED}. 116 | * 117 | *

If the state does not change within 118 | * {@link #AWAIT_TERMINATION_TIMEOUT_MS} milliseconds an error is logged and 119 | * the method returns. The timeout is primarily to avoid tests hanging 120 | * indefinitely.

121 | */ 122 | protected final void awaitTermination() { 123 | long endTime = System.currentTimeMillis() + AWAIT_TERMINATION_TIMEOUT_MS; 124 | while (!Thread.currentThread().isInterrupted()) { 125 | if (lifecycle.get() == EventLoopLifecycle.STOPPED) 126 | return; 127 | if (System.currentTimeMillis() > endTime) { 128 | Jvm.error().on(getClass(), "awaitTermination() timed out, continuing. This probably represents a bug."); 129 | } 130 | Jvm.pause(1); 131 | } 132 | if (lifecycle.get() != EventLoopLifecycle.STOPPED) { 133 | Jvm.warn().on(getClass(), "awaitTermination() interrupted, returning in state " + lifecycle.get()); 134 | } 135 | } 136 | 137 | @Override 138 | protected void performClose() { 139 | stop(); 140 | } 141 | 142 | @Override 143 | protected void assertCloseable() { 144 | if (!privateGroup && isRunningOnThread(Thread.currentThread())) { 145 | throw new ThreadingIllegalStateException(getClass() + ": Attempting to close " + name + " from within!", createdHere()); 146 | } 147 | } 148 | 149 | public abstract boolean isRunningOnThread(Thread thread); 150 | 151 | protected boolean isStarted() { 152 | return lifecycle.get() == EventLoopLifecycle.STARTED; 153 | } 154 | 155 | @Override 156 | public boolean isStopped() { 157 | return lifecycle.get().isStopped(); 158 | } 159 | 160 | static String withSlash(String n) { 161 | return n.isEmpty() ? n : n + "/"; 162 | } 163 | 164 | public void privateGroup(boolean privateGroup) { 165 | this.privateGroup = privateGroup; 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/test/java/net/openhft/chronicle/threads/LoopIntrospectionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.EventHandler; 8 | import net.openhft.chronicle.core.threads.HandlerPriority; 9 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 10 | import net.openhft.chronicle.testframework.Waiters; 11 | import net.openhft.chronicle.threads.TestEventHandlers.CountingHandler; 12 | import org.jetbrains.annotations.NotNull; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.util.EnumSet; 16 | import java.util.concurrent.CountDownLatch; 17 | import java.util.concurrent.TimeUnit; 18 | import java.util.concurrent.atomic.AtomicReference; 19 | 20 | import static org.junit.jupiter.api.Assertions.*; 21 | 22 | class LoopIntrospectionTest extends ThreadsTestCommon { 23 | 24 | @Test 25 | void mediumEventLoopReportsRunningThread() throws InterruptedException { 26 | AtomicReference loopThread = new AtomicReference<>(); 27 | CountDownLatch firstInvocation = new CountDownLatch(1); 28 | 29 | try (MediumEventLoop loop = new MediumEventLoop(null, "introspection-medium", 30 | Pauser.balanced(), true, null)) { 31 | loop.start(); 32 | Waiters.waitForCondition("Medium loop did not start", loop::isStarted, 5_000); 33 | 34 | loop.addHandler(new EventHandler() { 35 | @Override 36 | public @NotNull HandlerPriority priority() { 37 | return HandlerPriority.MEDIUM; 38 | } 39 | 40 | @Override 41 | public boolean action() { 42 | loopThread.compareAndSet(null, Thread.currentThread()); 43 | firstInvocation.countDown(); 44 | return false; 45 | } 46 | }); 47 | 48 | assertTrue(firstInvocation.await(5, TimeUnit.SECONDS), "Handler never ran on medium loop"); 49 | Thread executing = loopThread.get(); 50 | assertNotNull(executing, "Medium loop thread was not captured"); 51 | 52 | assertTrue(loop.isRunningOnThread(executing), "Loop failed to recognise its worker thread"); 53 | assertFalse(loop.isRunningOnThread(new Thread()), "Loop incorrectly matched unrelated thread"); 54 | } 55 | } 56 | 57 | @Test 58 | void blockingEventLoopReportsRunningThread() throws InterruptedException { 59 | AtomicReference loopThread = new AtomicReference<>(); 60 | CountDownLatch firstInvocation = new CountDownLatch(1); 61 | 62 | try (BlockingEventLoop loop = new BlockingEventLoop("introspection-blocking")) { 63 | loop.start(); 64 | 65 | loop.addHandler(() -> { 66 | loopThread.compareAndSet(null, Thread.currentThread()); 67 | firstInvocation.countDown(); 68 | Jvm.pause(10); 69 | return false; 70 | }); 71 | 72 | assertTrue(firstInvocation.await(5, TimeUnit.SECONDS), "Handler never ran on blocking loop"); 73 | Thread executing = loopThread.get(); 74 | assertNotNull(executing, "Blocking loop thread was not captured"); 75 | 76 | assertTrue(loop.isRunningOnThread(executing), "Blocking loop failed to recognise its worker thread"); 77 | assertFalse(loop.isRunningOnThread(Thread.currentThread()), "Blocking loop matched caller thread"); 78 | } 79 | } 80 | 81 | @Test 82 | void eventGroupAggregatesRunningThreadChecks() throws InterruptedException { 83 | AtomicReference highThread = new AtomicReference<>(); 84 | AtomicReference blockingThread = new AtomicReference<>(); 85 | AtomicReference monitorThread = new AtomicReference<>(); 86 | 87 | int previousDelay = MonitorEventLoop.MONITOR_INITIAL_DELAY_MS; 88 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = 1; 89 | try (EventGroup group = EventGroup.builder() 90 | .withPriorities(EnumSet.of(HandlerPriority.HIGH, HandlerPriority.BLOCKING, HandlerPriority.MONITOR)) 91 | .withPauser(Pauser.balanced()) 92 | .build()) { 93 | group.start(); 94 | Waiters.waitForCondition("Event group did not start", group::isStarted, 5_000); 95 | 96 | group.addHandler(new RecordingHandler(HandlerPriority.HIGH, highThread)); 97 | group.addHandler(new RecordingHandler(HandlerPriority.BLOCKING, blockingThread)); 98 | group.addHandler(new RecordingHandler(HandlerPriority.MONITOR, monitorThread)); 99 | 100 | Waiters.waitForCondition("High loop thread not captured", () -> highThread.get() != null, 5_000); 101 | Waiters.waitForCondition("Blocking loop thread not captured", () -> blockingThread.get() != null, 5_000); 102 | Waiters.waitForCondition("Monitor loop thread not captured", () -> monitorThread.get() != null, 5_000); 103 | 104 | assertTrue(group.isRunningOnThread(highThread.get()), "Group did not recognise high-priority loop thread"); 105 | assertTrue(group.isRunningOnThread(blockingThread.get()), "Group did not recognise blocking loop thread"); 106 | assertTrue(group.isRunningOnThread(monitorThread.get()), "Group did not recognise monitor loop thread"); 107 | assertFalse(group.isRunningOnThread(new Thread()), "Group matched unrelated thread"); 108 | } finally { 109 | MonitorEventLoop.MONITOR_INITIAL_DELAY_MS = previousDelay; 110 | } 111 | } 112 | 113 | @Test 114 | void mediumEventLoopClosesPendingHandlersOnClose() { 115 | CountingHandler handler = new CountingHandler(HandlerPriority.MEDIUM); 116 | 117 | try (MediumEventLoop loop = new MediumEventLoop(null, "pending-medium", 118 | Pauser.balanced(), true, null)) { 119 | loop.addHandler(handler); 120 | assertEquals(0, handler.loopStartedCalled(), "Handler should not start before loop runs"); 121 | } 122 | 123 | assertEquals(0, handler.loopStartedCalled(), "loopStarted should not be called"); 124 | assertEquals(0, handler.actionCalled(), "action should not be called"); 125 | assertEquals(1, handler.closeCalled(), "Handler should be closed when loop closes"); 126 | } 127 | 128 | private static final class RecordingHandler implements EventHandler { 129 | private final HandlerPriority priority; 130 | private final AtomicReference threadRef; 131 | 132 | private RecordingHandler(HandlerPriority priority, AtomicReference threadRef) { 133 | this.priority = priority; 134 | this.threadRef = threadRef; 135 | } 136 | 137 | @Override 138 | public @NotNull HandlerPriority priority() { 139 | return priority; 140 | } 141 | 142 | @Override 143 | public boolean action() throws InvalidEventHandlerException { 144 | threadRef.compareAndSet(null, Thread.currentThread()); 145 | return false; 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/BlockingEventLoop.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.threads.EventHandler; 8 | import net.openhft.chronicle.core.threads.EventLoop; 9 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 10 | import org.jetbrains.annotations.NotNull; 11 | 12 | import java.util.List; 13 | import java.util.concurrent.CopyOnWriteArrayList; 14 | import java.util.concurrent.ExecutorService; 15 | import java.util.concurrent.Executors; 16 | import java.util.concurrent.RejectedExecutionException; 17 | import java.util.function.Supplier; 18 | 19 | import static net.openhft.chronicle.core.io.Closeable.closeQuietly; 20 | import static net.openhft.chronicle.threads.Threads.*; 21 | 22 | /** 23 | * Event loop suited for I/O or other long running tasks. 24 | * Each handler is executed on its own thread. 25 | * 26 | *

The {@link Pauser} supplied at construction is used to create a fresh 27 | * instance for every handler thread. Idle handlers therefore pause 28 | * independently of one another.

29 | * 30 | *

Calling {@link #start()} launches a thread for each added handler. 31 | * When {@link #stop()} is invoked those threads are interrupted and the 32 | * executor service is shut down.

33 | * 34 | *

Handlers with priorities other than 35 | * {@link net.openhft.chronicle.core.threads.HandlerPriority#BLOCKING} 36 | * are accepted but treated the same as blocking handlers.

37 | */ 38 | public class BlockingEventLoop extends AbstractLifecycleEventLoop implements EventLoop { 39 | 40 | @NotNull 41 | private transient final EventLoop parent; 42 | @NotNull 43 | private transient final ExecutorService service; 44 | private final List handlers = new CopyOnWriteArrayList<>(); 45 | private final List runners = new CopyOnWriteArrayList<>(); 46 | private final NamedThreadFactory threadFactory; 47 | private final Supplier pauserSupplier; 48 | 49 | public BlockingEventLoop(@NotNull final EventLoop parent, 50 | @NotNull final String name, 51 | @NotNull final Supplier pauser) { 52 | super(name); 53 | this.parent = parent; 54 | this.threadFactory = new NamedThreadFactory(name, null, null, true); 55 | this.service = Executors.newCachedThreadPool(threadFactory); 56 | this.pauserSupplier = pauser; 57 | } 58 | 59 | public BlockingEventLoop(@NotNull final String name) { 60 | super(name); 61 | this.parent = this; 62 | this.threadFactory = new NamedThreadFactory(name, null, null, true); 63 | this.service = Executors.newCachedThreadPool(threadFactory); 64 | this.pauserSupplier = Pauser::balanced; 65 | } 66 | 67 | /** 68 | * Registers a new handler. Every call spawns another thread for the 69 | * handler. 70 | *

Priorities other than 71 | * {@link net.openhft.chronicle.core.threads.HandlerPriority#BLOCKING} 72 | * are permitted but are not treated specially.

73 | * 74 | * @param handler to execute 75 | */ 76 | @Override 77 | public synchronized void addHandler(@NotNull final EventHandler handler) { 78 | if (DEBUG_ADDING_HANDLERS) 79 | Jvm.debug().on(getClass(), "Adding " + handler.priority() + " " + handler + " to " + this.name); 80 | if (isClosed()) 81 | throw new IllegalStateException("Event Group has been closed"); 82 | eventLoopQuietly(parent, handler); 83 | this.handlers.add(handler); 84 | if (isStarted()) 85 | this.startHandler(handler); 86 | } 87 | 88 | @Override 89 | protected synchronized void performStart() { 90 | handlers.forEach(this::startHandler); 91 | } 92 | 93 | private void startHandler(final EventHandler handler) { 94 | try { 95 | final Runner runner = new Runner(handler, pauserSupplier.get()); 96 | runners.add(runner); 97 | service.submit(runner); 98 | 99 | } catch (RejectedExecutionException e) { 100 | if (!service.isShutdown()) 101 | Jvm.warn().on(getClass(), e); 102 | } 103 | } 104 | 105 | @Override 106 | public void unpause() { 107 | runners.forEach(Runner::unpause); 108 | unpark(service); 109 | } 110 | 111 | @Override 112 | protected void performStopFromNew() { 113 | shutdownExecutorService(); 114 | } 115 | 116 | @Override 117 | protected void performStopFromStarted() { 118 | shutdownExecutorService(); 119 | } 120 | 121 | private void shutdownExecutorService() { 122 | /* 123 | * It's necessary for blocking handlers to be interrupted, so they abort what they're 124 | * doing and run to completion immediately. 125 | */ 126 | service.shutdownNow(); 127 | unpause(); 128 | Threads.shutdown(service); 129 | } 130 | 131 | @Override 132 | public boolean isAlive() { 133 | return !service.isShutdown(); 134 | } 135 | 136 | @Override 137 | protected void performClose() { 138 | super.performClose(); 139 | closeQuietly(handlers); 140 | runners.clear(); 141 | } 142 | 143 | @Override 144 | public String toString() { 145 | return "BlockingEventLoop{" + 146 | "name=" + name + 147 | '}'; 148 | } 149 | 150 | @Override 151 | public boolean isRunningOnThread(Thread thread) { 152 | for (int i=0; i < runners.size(); i++) { 153 | if (thread == runners.get(i).thread()) { 154 | return true; 155 | } 156 | } 157 | return false; 158 | } 159 | 160 | private final class Runner implements Runnable { 161 | private final EventHandler handler; 162 | private final Pauser pauser; 163 | private boolean endedGracefully = false; 164 | private transient volatile Thread thread = null; 165 | 166 | public Runner(final EventHandler handler, Pauser pauser) { 167 | this.handler = handler; 168 | this.pauser = pauser; 169 | } 170 | 171 | @Override 172 | public void run() { 173 | try { 174 | throwExceptionIfClosed(); 175 | thread = Thread.currentThread(); 176 | handler.loopStarted(); 177 | 178 | while (isStarted()) { 179 | if (handler.action()) 180 | pauser.reset(); 181 | else 182 | pauser.pause(); 183 | } 184 | endedGracefully = true; 185 | } catch (InvalidEventHandlerException e) { 186 | // expected and logged below. 187 | } catch (Throwable t) { 188 | if (!isClosed()) 189 | Jvm.warn().on(handler.getClass(), asString(handler) + " threw ", t); 190 | 191 | } finally { 192 | if (Jvm.isDebugEnabled(handler.getClass())) 193 | Jvm.debug().on(handler.getClass(), "handler " + asString(handler) + " done."); 194 | loopFinishedQuietly(handler); 195 | if (!endedGracefully) { 196 | // remove handler for clarity when debugging 197 | if (DEBUG_REMOVING_HANDLERS) 198 | Jvm.debug().on(getClass(), "Removing " + handler.priority() + " " + handler); 199 | handlers.remove(handler); 200 | closeQuietly(handler); 201 | } 202 | runners.remove(this); 203 | } 204 | } 205 | 206 | private String asString(final Object handler) { 207 | return Integer.toHexString(System.identityHashCode(handler)); 208 | } 209 | 210 | public void unpause() { 211 | pauser.unpause(); 212 | } 213 | 214 | public Thread thread() { 215 | return thread; 216 | } 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/DiskSpaceMonitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.time.SystemTimeProvider; 8 | import net.openhft.chronicle.core.time.TimeProvider; 9 | import org.jetbrains.annotations.VisibleForTesting; 10 | 11 | import java.io.Closeable; 12 | import java.io.File; 13 | import java.io.IOException; 14 | import java.nio.file.FileStore; 15 | import java.nio.file.Files; 16 | import java.nio.file.Path; 17 | import java.nio.file.Paths; 18 | import java.util.*; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | import java.util.concurrent.ScheduledExecutorService; 21 | import java.util.concurrent.TimeUnit; 22 | 23 | /** 24 | * Monitors free space on the disks used by this JVM. 25 | * 26 | *

Paths are registered via {@link #pollDiskSpace(File)}. The first call 27 | * obtains the {@link FileStore} for the supplied file and adds it to the 28 | * internal maps. Each subsequent call merely updates the cached entry. This 29 | * method is typically invoked when opening a queue or a memory-mapped file. A 30 | * scheduled executor named {@value #DISK_SPACE_CHECKER_NAME} then runs 31 | * the monitor once a second.

32 | * 33 | *

The monitor may be disabled with the system property 34 | * {@code chronicle.disk.monitor.disable}. The threshold that triggers a 35 | * warning is controlled by {@code chronicle.disk.monitor.threshold.percent}.

36 | * 37 | *

When the available space falls below these limits the monitor invokes a 38 | * {@link NotifyDiskLow} service. Implementations are discovered with 39 | * {@link java.util.ServiceLoader} and the default simply logs a warning.

40 | * 41 | *

The {@link #run()} loop iterates over the tracked {@link DiskAttributes} 42 | * entries. Each record stores a {@link FileStore}, the time for the next check 43 | * and the total size. When the free space is less than two hundred megabytes a 44 | * panic notification is sent. Otherwise the next check is delayed based on the 45 | * amount of free space.

46 | */ 47 | public enum DiskSpaceMonitor implements Runnable, Closeable { 48 | INSTANCE; 49 | 50 | public static final String DISK_SPACE_CHECKER_NAME = "disk~space~checker"; 51 | static final boolean WARN_DELETED = Jvm.getBoolean("disk.monitor.deleted.warning"); 52 | private static final boolean DISABLED = Jvm.getBoolean("chronicle.disk.monitor.disable"); 53 | public static final int TIME_TAKEN_WARN_THRESHOLD_US = Jvm.getInteger("chronicle.disk.monitor.warn.threshold.us", 250); 54 | private final NotifyDiskLow notifyDiskLow; 55 | final Map fileStoreCacheMap = new ConcurrentHashMap<>(); 56 | final Map diskAttributesMap = new ConcurrentHashMap<>(); 57 | final ScheduledExecutorService executor; 58 | private volatile int thresholdPercentage = Jvm.getInteger("chronicle.disk.monitor.threshold.percent", 5); 59 | private TimeProvider timeProvider = SystemTimeProvider.INSTANCE; 60 | 61 | DiskSpaceMonitor() { 62 | final ServiceLoader services = ServiceLoader.load(NotifyDiskLow.class); 63 | if (services.iterator().hasNext()) { 64 | final List warners = new ArrayList<>(); 65 | services.iterator().forEachRemaining(warners::add); 66 | this.notifyDiskLow = new NotifyDiskLowIterator(warners); 67 | } else { 68 | this.notifyDiskLow = new NotifyDiskLowLogWarn(); 69 | } 70 | boolean diabled = Jvm.getBoolean("chronicle.disk.monitor.disable"); 71 | if (!diabled) { 72 | executor = Threads.acquireScheduledExecutorService(DISK_SPACE_CHECKER_NAME, true); 73 | long period = Jvm.getLong("chronicle.disk.monitor.period", 10L); 74 | executor.scheduleAtFixedRate(this, period, period, TimeUnit.SECONDS); 75 | } else { 76 | executor = null; 77 | } 78 | } 79 | 80 | // used for testing purposes 81 | public void clear() { 82 | fileStoreCacheMap.clear(); 83 | diskAttributesMap.clear(); 84 | } 85 | 86 | public void pollDiskSpace(File file) { 87 | if (DISABLED) 88 | return; 89 | long start = timeProvider.currentTimeNanos(); 90 | 91 | final String absolutePath = file.getAbsolutePath(); 92 | FileStore fs = fileStoreCacheMap.get(absolutePath); 93 | if (fs == null) { 94 | if (file.exists()) { 95 | 96 | Path path = Paths.get(absolutePath); 97 | try { 98 | fs = Files.getFileStore(path); 99 | fileStoreCacheMap.put(absolutePath, fs); 100 | } catch (IOException e) { 101 | Jvm.warn().on(getClass(), "Error trying to obtain the FileStore for " + path, e); 102 | return; 103 | } 104 | } else { 105 | // nothing to monitor if it doesn't exist. 106 | return; 107 | } 108 | } 109 | diskAttributesMap.computeIfAbsent(fs, DiskAttributes::new); 110 | 111 | final long tookUs = (timeProvider.currentTimeNanos() - start) / 1_000; 112 | if (tookUs > TIME_TAKEN_WARN_THRESHOLD_US) 113 | Jvm.perf().on(getClass(), "Took " + tookUs / 1000.0 + " ms to pollDiskSpace for " + file.getAbsolutePath()); 114 | } 115 | 116 | @Override 117 | public void run() { 118 | for (Iterator iterator = diskAttributesMap.values().iterator(); iterator.hasNext(); ) { 119 | DiskAttributes da = iterator.next(); 120 | try { 121 | da.run(); 122 | } catch (IOException e) { 123 | if (WARN_DELETED) 124 | Jvm.warn().on(getClass(), "Unable to get disk space for " + da.fileStore, e); 125 | iterator.remove(); 126 | } 127 | } 128 | } 129 | 130 | public int getThresholdPercentage() { 131 | return thresholdPercentage; 132 | } 133 | 134 | public void setThresholdPercentage(int thresholdPercentage) { 135 | this.thresholdPercentage = thresholdPercentage; 136 | } 137 | 138 | @VisibleForTesting 139 | protected void setTimeProvider(TimeProvider timeProvider) { 140 | this.timeProvider = timeProvider; 141 | } 142 | 143 | @Override 144 | public void close() { 145 | if (executor != null) 146 | Threads.shutdown(executor); 147 | } 148 | 149 | final class DiskAttributes { 150 | 151 | private final FileStore fileStore; 152 | 153 | long timeNextCheckedMS; 154 | long totalSpace; 155 | 156 | DiskAttributes(FileStore fileStore) { 157 | this.fileStore = fileStore; 158 | } 159 | 160 | void run() throws IOException { 161 | long now = timeProvider.currentTimeMillis(); 162 | if (timeNextCheckedMS > now) 163 | return; 164 | 165 | long start = System.nanoTime(); 166 | if (totalSpace <= 0) 167 | totalSpace = fileStore.getTotalSpace(); 168 | 169 | long unallocatedBytes = fileStore.getUnallocatedSpace(); 170 | if (unallocatedBytes < (200 << 20)) { 171 | // if less than 200 Megabytes 172 | notifyDiskLow.panic(fileStore); 173 | 174 | } else if (unallocatedBytes < totalSpace * DiskSpaceMonitor.INSTANCE.thresholdPercentage / 100) { 175 | final double diskSpaceFull = ((long) (1000d * (totalSpace - unallocatedBytes) / totalSpace + 0.999)) / 10.0; 176 | notifyDiskLow.warning(diskSpaceFull, fileStore); 177 | 178 | } else { 179 | // wait 1 ms per MB or approx 1 sec per GB free. 180 | timeNextCheckedMS = now + (unallocatedBytes >> 20); 181 | } 182 | long time = System.nanoTime() - start; 183 | if (time > 1_000_000) 184 | Jvm.perf().on(getClass(), "Took " + time / 10_000 / 100.0 + " ms to check the disk space of " + fileStore); 185 | } 186 | } 187 | 188 | private static class NotifyDiskLowIterator implements NotifyDiskLow { 189 | private final List list; 190 | 191 | public NotifyDiskLowIterator(List list) { 192 | this.list = list; 193 | } 194 | 195 | @Override 196 | public void panic(FileStore fileStore) { 197 | for (NotifyDiskLow mfy : list) 198 | mfy.panic(fileStore); 199 | } 200 | 201 | @Override 202 | public void warning(double diskSpaceFullPercent, FileStore fileStore) { 203 | for (NotifyDiskLow mfy : list) 204 | mfy.warning(diskSpaceFullPercent, fileStore); 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # Guidance for AI agents, bots, and humans contributing to Chronicle Software's OpenHFT projects. 2 | 3 | LLM-based agents can accelerate development only if they respect our house rules. This file tells you: 4 | 5 | * how to run and verify the build; 6 | * what *not* to comment; 7 | * when to open pull requests. 8 | 9 | ## Language & character-set policy 10 | 11 | | Requirement | Rationale | 12 | |--------------|-----------| 13 | | **British English** spelling (`organisation`, `licence`, *not* `organization`, `license`) except technical US spellings like `synchronized` | Keeps wording consistent with Chronicle's London HQ and existing docs. See the University of Oxford style guide for reference. | 14 | | **ASCII-7 only** (code-points 0-127). Avoid smart quotes, non-breaking spaces and accented characters. | ASCII-7 survives every toolchain Chronicle uses, incl. low-latency binary wire formats that expect the 8th bit to be 0. | 15 | | If a symbol is not available in ASCII-7, use a textual form such as `micro-second`, `>=`, `:alpha:`, `:yes:`. This is the preferred approach and Unicode must not be inserted. | Extended or '8-bit ASCII' variants are *not* portable and are therefore disallowed. | 16 | 17 | ## Javadoc guidelines 18 | 19 | **Goal:** Every Javadoc block should add information you cannot glean from the method signature alone. Anything else is 20 | noise and slows readers down. 21 | 22 | | Do | Don't | 23 | |----|-------| 24 | | State *behavioural contracts*, edge-cases, thread-safety guarantees, units, performance characteristics and checked exceptions. | Restate the obvious ("Gets the value", "Sets the name"). | 25 | | Keep the first sentence short; it becomes the summary line in aggregated docs. | Duplicate parameter names/ types unless more explanation is needed. | 26 | | Prefer `@param` for *constraints* and `@throws` for *conditions*, following Oracle's style guide. | Pad comments to reach a line-length target. | 27 | | Remove or rewrite autogenerated Javadoc for trivial getters/setters. | Leave stale comments that now contradict the code. | 28 | 29 | The principle that Javadoc should only explain what is *not* manifest from the signature is well-established in the 30 | wider Java community. 31 | 32 | ## Build & test commands 33 | 34 | Agents must verify that the project still compiles and all unit tests pass before opening a PR: 35 | 36 | ```bash 37 | # From repo root 38 | mvn -q verify 39 | ``` 40 | 41 | ## Commit-message & PR etiquette 42 | 43 | 1. **Subject line <= 72 chars**, imperative mood: "Fix roll-cycle offset in `ExcerptAppender`". 44 | 2. Reference the JIRA/GitHub issue if it exists. 45 | 3. In *body*: *root cause -> fix -> measurable impact* (latency, allocation, etc.). Use ASCII bullet points. 46 | 4. **Run `mvn verify`** again after rebasing. 47 | 48 | ## What to ask the reviewers 49 | 50 | * *Is this AsciiDoc documentation precise enough for a clean-room re-implementation?* 51 | * Does the Javadoc explain the code's *why* and *how* that a junior developer would not be expected to work out? 52 | * Are the documentation, tests and code updated together so the change is clear? 53 | * Does the commit point back to the relevant requirement or decision tag? 54 | * Would an example or small diagram help future maintainers? 55 | 56 | ## Project requirements 57 | 58 | See the [Decision Log](src/main/adoc/decision-log.adoc) for the latest project decisions. 59 | See the [Project Requirements](src/main/adoc/project-requirements.adoc) for details on project requirements. 60 | 61 | ## Elevating the Workflow with Real-Time Documentation 62 | 63 | Building upon our existing Iterative Workflow, the newest recommendation is to emphasise *real-time updates* to documentation. 64 | Ensure the relevant `.adoc` files are updated when features, requirements, implementation details, or tests change. 65 | This tight loop informs the AI accurately and creates immediate clarity for all team members. 66 | 67 | ### Benefits of Real-Time Documentation 68 | 69 | * **Confidence in documentation**: Accurate docs prevent miscommunications that derail real-world outcomes. 70 | * **Reduced drift**: Real-time updates keep requirements, tests and code aligned. 71 | * **Faster feedback**: AI can quickly highlight inconsistencies when everything is in sync. 72 | * **Better quality**: Frequent checks align the implementation with the specified behaviour. 73 | * **Smoother onboarding**: Up-to-date AsciiDoc clarifies the system for new developers. 74 | * **Incremental changes**: AIDE flags newly updated files so you can keep the documentation synchronised. 75 | 76 | ### Best Practices 77 | 78 | * **Maintain Sync**: Keep documentation (AsciiDoc), tests, and code synchronised in version control. Changes in one area should prompt reviews and potential updates in the others. 79 | * **Doc-First for New Work**: For *new* features or requirements, aim to update documentation first, then use AI to help produce or refine corresponding code and tests. For refactoring or initial bootstrapping, updates might flow from code/tests back to documentation, which should then be reviewed and finalised. 80 | * **Small Commits**: Each commit should ideally relate to a single requirement or coherent change, making reviews easier for humans and AI analysis tools. 81 | - **Team Buy-In**: Encourage everyone to review AI outputs critically and contribute to maintaining the synchronicity of all artefacts. 82 | 83 | ## AI Agent Guidelines 84 | 85 | When using AI agents to assist with development, please adhere to the following guidelines: 86 | 87 | * **Respect the Language & Character-set Policy**: Ensure all AI-generated content follows the British English and ASCII-7 guidelines outlined above. 88 | Focus on Clarity: AI-generated documentation should be clear and concise and add value beyond what is already present in the code or existing documentation. 89 | * **Avoid Redundancy**: Do not generate content that duplicates existing documentation or code comments unless it provides additional context or clarification. 90 | * **Review AI Outputs**: Always review AI-generated content for accuracy, relevance, and adherence to the project's documentation standards before committing it to the repository. 91 | 92 | ## Company-Wide Tagging 93 | 94 | This section records **company-wide** decisions that apply to *all* Chronicle projects. All identifiers use the --xxx prefix. The `xxx` are unique across in the same Scope even if the tags are different. Component-specific decisions live in their xxx-decision-log.adoc files. 95 | 96 | ### Tag Taxonomy (Nine-Box Framework) 97 | 98 | To improve traceability, we adopt the Nine-Box taxonomy for requirement and decision identifiers. These tags are used in addition to the existing ALL prefix, which remains reserved for global decisions across every project. 99 | 100 | .Adopt a Nine-Box Requirement Taxonomy 101 | 102 | |Tag | Scope | Typical examples | 103 | |----|-------|------------------| 104 | |FN |Functional user-visible behaviour | Message routing, business rules | 105 | |NF-P |Non-functional - Performance | Latency budgets, throughput targets | 106 | |NF-S |Non-functional - Security | Authentication method, TLS version | 107 | |NF-O |Non-functional - Operability | Logging, monitoring, health checks | 108 | |TEST |Test / QA obligations | Chaos scenarios, benchmarking rigs | 109 | |DOC |Documentation obligations | Sequence diagrams, user guides | 110 | |OPS |Operational / DevOps concerns | Helm values, deployment checklist | 111 | |UX |Operator or end-user experience | CLI ergonomics, dashboard layouts | 112 | |RISK |Compliance / risk controls | GDPR retention, audit trail | 113 | 114 | `ALL-*` stays global, case-exact tags. Pick one primary tag if multiple apply. 115 | 116 | ### Decision Record Template 117 | 118 | ```asciidoc 119 | === [Identifier] Title of Decision 120 | 121 | Date:: YYYY-MM-DD 122 | Context:: 123 | * What is the issue that this decision addresses? 124 | * What are the driving forces, constraints, and requirements? 125 | Decision Statement:: 126 | * What is the change that is being proposed or was decided? 127 | Alternatives Considered:: 128 | * [Alternative 1 Name/Type]: 129 | ** *Description:* Brief description of the alternative. 130 | ** *Pros:* ... 131 | ** *Cons:* ... 132 | * [Alternative 2 Name/Type]: 133 | ** *Description:* Brief description of the alternative. 134 | ** *Pros:* ... 135 | ** *Cons:* ... 136 | Rationale for Decision:: 137 | * Why was the chosen decision selected? 138 | * How does it address the context and outweigh the cons of alternatives? 139 | Impact & Consequences:: 140 | * What are the positive and negative consequences of this decision? 141 | * How does this decision affect the system, developers, users, or operations? 142 | - What are the trade-offs made? 143 | Notes/Links:: 144 | ** (Optional: Links to relevant issues, discussions, documentation, proof-of-concepts) 145 | ``` 146 | 147 | ## Asciidoc formatting guidelines 148 | 149 | ### List Indentation 150 | 151 | Do not rely on indentation for list items in AsciiDoc documents. Use the following pattern instead: 152 | 153 | ```asciidoc 154 | section:: Top Level Section 155 | * first level 156 | ** nested level 157 | ``` 158 | 159 | ### Emphasis and Bold Text 160 | 161 | In AsciiDoc, an underscore `_` is _emphasis_; `*text*` is *bold*. 162 | -------------------------------------------------------------------------------- /src/main/java/net/openhft/chronicle/threads/MonitorEventLoop.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2025 chronicle.software; SPDX-License-Identifier: Apache-2.0 3 | */ 4 | package net.openhft.chronicle.threads; 5 | 6 | import net.openhft.chronicle.core.Jvm; 7 | import net.openhft.chronicle.core.annotation.HotMethod; 8 | import net.openhft.chronicle.core.io.Closeable; 9 | import net.openhft.chronicle.core.io.SimpleCloseable; 10 | import net.openhft.chronicle.core.threads.EventHandler; 11 | import net.openhft.chronicle.core.threads.EventLoop; 12 | import net.openhft.chronicle.core.threads.HandlerPriority; 13 | import net.openhft.chronicle.core.threads.InvalidEventHandlerException; 14 | import org.jetbrains.annotations.NotNull; 15 | 16 | import java.util.List; 17 | import java.util.concurrent.CopyOnWriteArrayList; 18 | import java.util.concurrent.ExecutorService; 19 | import java.util.concurrent.Executors; 20 | 21 | import static net.openhft.chronicle.threads.Threads.*; 22 | 23 | /** 24 | * Event loop dedicated to low-frequency monitoring tasks. Handlers added to this loop are 25 | * expected to use {@link HandlerPriority#MONITOR} so they do not interfere with application 26 | * work. The provided {@link Pauser} determines how often the handlers are polled and is reset 27 | * whenever a handler reports activity. 28 | * 29 | *

The loop waits for {@link #MONITOR_INITIAL_DELAY_MS} milliseconds after startup before 30 | * invoking any handlers.

31 | */ 32 | public class MonitorEventLoop extends AbstractLifecycleEventLoop implements Runnable, EventLoop { 33 | public static final String MONITOR_INITIAL_DELAY = "MonitorInitialDelay"; 34 | static int MONITOR_INITIAL_DELAY_MS = Jvm.getInteger(MONITOR_INITIAL_DELAY, 10_000); 35 | 36 | private transient final ExecutorService service; 37 | private transient final EventLoop parent; 38 | private final List handlers = new CopyOnWriteArrayList<>(); 39 | private final Pauser pauser; 40 | private transient volatile Thread thread = null; 41 | 42 | public MonitorEventLoop(final EventLoop parent, final Pauser pauser) { 43 | this(parent, "", pauser); 44 | } 45 | 46 | public MonitorEventLoop(final EventLoop parent, final String name, final Pauser pauser) { 47 | super(name + (withSlash(parent == null ? "" : parent.name())) + "event~loop~monitor"); 48 | this.parent = parent; 49 | this.pauser = pauser; 50 | service = Executors.newSingleThreadExecutor( 51 | new NamedThreadFactory(name, true, null, true)); 52 | } 53 | 54 | @Override 55 | protected void performStart() { 56 | service.submit(this); 57 | } 58 | 59 | @Override 60 | public void unpause() { 61 | pauser.unpause(); 62 | } 63 | 64 | @Override 65 | protected void performStopFromNew() { 66 | performStop(); 67 | } 68 | 69 | @Override 70 | protected void performStopFromStarted() { 71 | performStop(); 72 | } 73 | 74 | private void performStop() { 75 | unpause(); 76 | Threads.shutdownDaemon(service); 77 | } 78 | 79 | @Override 80 | public boolean isAlive() { 81 | return isStarted(); 82 | } 83 | 84 | /** 85 | * Registers a monitoring handler. The handler should have 86 | * {@link HandlerPriority#MONITOR} priority. It is wrapped in an 87 | * {@link IdempotentLoopStartedEventHandler} so that its 88 | * {@link EventHandler#loopStarted()} method runs exactly once on this 89 | * loop's thread. Adding the same handler twice is ignored. 90 | */ 91 | @Override 92 | public synchronized void addHandler(@NotNull final EventHandler handler) { 93 | throwExceptionIfClosed(); 94 | 95 | if (DEBUG_ADDING_HANDLERS) 96 | Jvm.debug().on(getClass(), "Adding " + handler.priority() + " " + handler + " to " + this.name); 97 | if (isClosed()) 98 | throw new IllegalStateException("Event Group has been closed"); 99 | eventLoopQuietly(parent, handler); 100 | if (!handlers.contains(handler)) 101 | handlers.add(new IdempotentLoopStartedEventHandler(handler)); 102 | } 103 | 104 | @Override 105 | @HotMethod 106 | public void run() { 107 | throwExceptionIfClosed(); 108 | 109 | try { 110 | thread = Thread.currentThread(); 111 | // don't do any monitoring for the first MONITOR_INITIAL_DELAY_MS ms 112 | final long waitUntilMs = System.currentTimeMillis() + MONITOR_INITIAL_DELAY_MS; 113 | while (System.currentTimeMillis() < waitUntilMs && isStarted()) 114 | pauser.pause(); 115 | pauser.reset(); 116 | while (isStarted() && !Thread.currentThread().isInterrupted()) { 117 | boolean busy; 118 | busy = runHandlers(); 119 | pauser.pause(); 120 | if (busy) 121 | pauser.reset(); 122 | } 123 | } catch (Throwable e) { 124 | Jvm.warn().on(getClass(), e); 125 | } finally { 126 | synchronized (this) { 127 | handlers.forEach(Threads::loopFinishedQuietly); 128 | } 129 | } 130 | } 131 | 132 | @HotMethod 133 | private boolean runHandlers() { 134 | boolean busy = false; 135 | for (int i = 0; i < handlers.size(); i++) { 136 | final EventHandler handler = handlers.get(i); 137 | try { 138 | if (loopStartedCall(this, handler)) { 139 | removeHandler(i--); 140 | continue; 141 | } 142 | busy |= handler.action(); 143 | } catch (InvalidEventHandlerException e) { 144 | removeHandler(i--); 145 | } catch (Exception e) { 146 | Jvm.warn().on(getClass(), "Exception thrown by handler " + handler, e); 147 | removeHandler(i--); 148 | } 149 | } 150 | return busy; 151 | } 152 | 153 | private synchronized void removeHandler(int handlerIndex) { 154 | try { 155 | EventHandler removedHandler = handlers.remove(handlerIndex); 156 | loopFinishedQuietly(removedHandler); 157 | Closeable.closeQuietly(removedHandler); 158 | if (DEBUG_REMOVING_HANDLERS) 159 | Jvm.debug().on(getClass(), "Removing " + removedHandler.priority() + " " + removedHandler + " from " + this.name); 160 | } catch (ArrayIndexOutOfBoundsException e) { 161 | if (!handlers.isEmpty()) { 162 | Jvm.warn().on(MonitorEventLoop.class, "Error removing handler!"); 163 | } 164 | } 165 | } 166 | 167 | @Override 168 | protected void performClose() { 169 | super.performClose(); 170 | 171 | net.openhft.chronicle.core.io.Closeable.closeQuietly(handlers); 172 | } 173 | 174 | @Override 175 | public boolean isRunningOnThread(Thread thread) { 176 | return this.thread == thread; 177 | } 178 | 179 | /** 180 | * Decorator that invokes {@link EventHandler#loopStarted()} exactly once on 181 | * the loop thread before any calls to {@link EventHandler#action()}. The 182 | * monitor event loop wraps every handler in this class and calls 183 | * {@link #loopStarted()} at the beginning of each iteration. 184 | */ 185 | private static final class IdempotentLoopStartedEventHandler extends SimpleCloseable implements EventHandler { 186 | 187 | private transient final EventHandler eventHandler; 188 | private final String handler; 189 | private boolean loopStarted = false; 190 | 191 | public IdempotentLoopStartedEventHandler(@NotNull EventHandler eventHandler) { 192 | this.eventHandler = eventHandler; 193 | handler = eventHandler.toString(); 194 | } 195 | 196 | @Override 197 | public boolean action() throws InvalidEventHandlerException { 198 | return eventHandler.action(); 199 | } 200 | 201 | @Override 202 | public void eventLoop(EventLoop eventLoop) { 203 | eventHandler.eventLoop(eventLoop); 204 | } 205 | 206 | @Override 207 | public void loopStarted() { 208 | if (!loopStarted) { 209 | loopStarted = true; 210 | eventHandler.loopStarted(); 211 | } 212 | } 213 | 214 | @Override 215 | public void loopFinished() { 216 | eventHandler.loopFinished(); 217 | } 218 | 219 | @Override 220 | public @NotNull HandlerPriority priority() { 221 | return eventHandler.priority(); 222 | } 223 | 224 | @Override 225 | public boolean equals(Object o) { 226 | return eventHandler.equals(o); 227 | } 228 | 229 | @Override 230 | public int hashCode() { 231 | return eventHandler.hashCode(); 232 | } 233 | 234 | @Override 235 | protected void performClose() throws IllegalStateException { 236 | Closeable.closeQuietly(eventHandler); 237 | } 238 | 239 | @Override 240 | public String toString() { 241 | return "IdempotentLoopStartedEventHandler{" + 242 | "handler=" + handler + 243 | '}'; 244 | } 245 | } 246 | 247 | @Override 248 | public String toString() { 249 | return "MonitorEventLoop{" + 250 | "service=" + service + 251 | ", parent=" + parent + 252 | ", handlers=" + handlers + 253 | ", pauser=" + pauser + 254 | ", name='" + name + '\'' + 255 | '}'; 256 | } 257 | } 258 | --------------------------------------------------------------------------------