24 | *
25 | * @param dst
26 | * The {@link ByteBuffer} to read into
27 | *
28 | * @param position
29 | * The position within the file at which to start reading
30 | *
31 | * @return How many bytes were placed into dst
32 | * @throws IOException
33 | */
34 | int read(ByteBuffer dst, long position) throws IOException;
35 |
36 | /**
37 | * @return The file size for this channel
38 | */
39 | long size();
40 |
41 | /**
42 | * @return true if this channel is read only, false otherwise
43 | */
44 | boolean isReadOnly();
45 |
46 | /**
47 | * Truncates this file's length to fileLength.
48 | *
49 | * @param fileLength The length to which to truncate
50 | *
51 | * @return This UnsafeByteAlignedChannel
52 | *
53 | * @throws IOException
54 | */
55 | DirectChannel truncate(long fileLength) throws IOException;
56 |
57 | /**
58 | * @return The file descriptor for this channel
59 | */
60 | int getFD();
61 | }
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/directio/DirectChannelImpl.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.directio;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.nio.ByteBuffer;
6 | import java.nio.channels.ClosedChannelException;
7 | import java.nio.channels.NonWritableChannelException;
8 |
9 | public class DirectChannelImpl implements DirectChannel {
10 | private DirectIOLib lib;
11 | private int fd;
12 | private boolean isOpen;
13 | private long fileLength;
14 | private boolean isReadOnly;
15 |
16 | public static DirectChannel getChannel(File file, boolean readOnly) throws IOException {
17 | DirectIOLib lib = DirectIOLib.getLibForPath(file.toString());
18 | return getChannel(lib, file, readOnly);
19 | }
20 |
21 | public static DirectChannel getChannel(DirectIOLib lib, File file, boolean readOnly) throws IOException {
22 | int fd = lib.oDirectOpen(file.toString(), readOnly);
23 | long length = file.length();
24 | return new DirectChannelImpl(lib, fd, length, readOnly);
25 | }
26 |
27 | private DirectChannelImpl(DirectIOLib lib, int fd, long fileLength, boolean readOnly) {
28 | this.lib = lib;
29 | this.fd = fd;
30 | this.isOpen = true;
31 | this.isReadOnly = readOnly;
32 | this.fileLength = fileLength;
33 | }
34 |
35 | private void ensureOpen() throws ClosedChannelException {
36 | if (!isOpen()) {
37 | throw new ClosedChannelException();
38 | }
39 | }
40 |
41 | private void ensureWritable() {
42 | if (isReadOnly()) {
43 | throw new NonWritableChannelException();
44 | }
45 | }
46 |
47 | @Override
48 | public int read(ByteBuffer dst, long position) throws IOException {
49 | ensureOpen();
50 | return lib.pread(fd, dst, position);
51 | }
52 |
53 | @Override
54 | public int write(ByteBuffer src, long position) throws IOException {
55 | ensureOpen();
56 | ensureWritable();
57 | assert src.position() == lib.blockStart(src.position());
58 |
59 | int written = lib.pwrite(fd, src, position);
60 |
61 | // update file length if we wrote past it
62 | fileLength = Math.max(position + written, fileLength);
63 | return written;
64 | }
65 |
66 | @Override
67 | public DirectChannel truncate(final long length) throws IOException {
68 | ensureOpen();
69 | ensureWritable();
70 | if (DirectIOLib.ftruncate(fd, length) < 0) {
71 | throw new IOException("Error during truncate on descriptor " + fd + ": " +
72 | DirectIOLib.getLastError());
73 | }
74 | fileLength = length;
75 | return this;
76 | }
77 |
78 | @Override
79 | public long size() {
80 | return fileLength;
81 | }
82 |
83 | @Override
84 | public int getFD() {
85 | return fd;
86 | }
87 |
88 |
89 | @Override
90 | public boolean isOpen() {
91 | return isOpen;
92 | }
93 |
94 | @Override
95 | public boolean isReadOnly() {
96 | return isReadOnly;
97 | }
98 |
99 | @Override
100 | public void close() throws IOException {
101 | if (!isOpen()) {
102 | return;
103 | }
104 | try {
105 | if (!isReadOnly()) {
106 | truncate(fileLength);
107 | }
108 | } finally {
109 | isOpen = false;
110 | if (lib.close(fd) < 0) {
111 | throw new IOException("Error closing file with descriptor " + fd + ": " +
112 | DirectIOLib.getLastError());
113 | }
114 | }
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/directio/DirectIOLib.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.directio;
2 |
3 | import com.sun.jna.Native;
4 | import com.sun.jna.NativeLong;
5 | import com.sun.jna.Platform;
6 | import com.sun.jna.Pointer;
7 | import com.sun.jna.ptr.PointerByReference;
8 | import org.apache.log4j.Logger;
9 | import sun.nio.ch.DirectBuffer;
10 |
11 | import java.io.IOException;
12 | import java.nio.ByteBuffer;
13 | import java.util.ArrayList;
14 | import java.util.List;
15 |
16 | /**
17 | * Class containing native hooks and utility methods for performing direct I/O, using
18 | * the Linux O_DIRECT flag.
19 | *
20 | *
This class is initialized at class load time, by registering JNA hooks into native methods.
21 | * It also calculates Linux kernel version-dependent alignment amount (in bytes) for use with the O_DIRECT flag,
22 | * when given a string for a file or directory.
23 | */
24 | public class DirectIOLib {
25 | private static final Logger logger = Logger.getLogger(DirectIOLib.class);
26 | public static boolean binit;
27 |
28 | static {
29 | binit = false;
30 | try {
31 | if (!Platform.isLinux()) {
32 | logger.warn("Not running Linux, jaydio support disabled");
33 | } else { // now check to see if we have O_DIRECT...
34 |
35 | final int linuxVersion = 0;
36 | final int majorRev = 1;
37 | final int minorRev = 2;
38 |
39 | List versionNumbers = new ArrayList();
40 | for (String v : System.getProperty("os.version").split("\\.|-")) {
41 | if (v.matches("\\d")) {
42 | versionNumbers.add(Integer.parseInt(v));
43 | }
44 | }
45 |
46 | /* From "man 2 open":
47 | *
48 | * O_DIRECT support was added under Linux in kernel version 2.4.10. Older Linux kernels simply ignore this flag. Some file systems may not implement
49 | * the flag and open() will fail with EINVAL if it is used.
50 | */
51 |
52 | // test to see whether kernel version >= 2.4.10
53 | if (versionNumbers.get(linuxVersion) > 2) {
54 | binit = true;
55 | } else if (versionNumbers.get(linuxVersion) == 2) {
56 | if (versionNumbers.get(majorRev) > 4) {
57 | binit = true;
58 | } else if (versionNumbers.get(majorRev) == 4 && versionNumbers.get(minorRev) >= 10) {
59 | binit = true;
60 | }
61 | }
62 |
63 | if (binit) {
64 | // get access to open(), pread(), etc
65 | Native.register(Platform.C_LIBRARY_NAME);
66 | } else {
67 | logger.warn(String.format("O_DIRECT not supported on your version of Linux: %d.%d.%d", linuxVersion, majorRev, minorRev));
68 | }
69 | }
70 | } catch (Throwable e) {
71 | logger.warn("Unable to register libc at class load time: " + e.getMessage(), e);
72 | }
73 | }
74 |
75 | private int fsBlockSize;
76 | private long fsBlockNotMask;
77 |
78 | public DirectIOLib(int fsBlockSize) {
79 | this.fsBlockSize = fsBlockSize;
80 | this.fsBlockNotMask = ~((long)fsBlockSize - 1);
81 | }
82 |
83 |
84 | /**
85 | * Static method to register JNA hooks for doing direct I/O
86 | *
87 | * @param workingDir
88 | * A directory within the mounted file system on which we'll be working
89 | * Should preferably BE the directory in which we'll be working.
90 | */
91 | public static DirectIOLib getLibForPath(String workingDir) {
92 | int fsBlockSize = initilizeSoftBlockSize(workingDir);
93 | if (fsBlockSize == -1) {
94 | logger.warn("O_DIRECT support non available on your version of Linux (" + System.getProperty("os.version") + "), " +
95 | "please upgrade your kernel in order to use jaydio.");
96 | return null;
97 | }
98 | return new DirectIOLib(fsBlockSize);
99 | }
100 |
101 | /**
102 | * Finds a block size for use with O_DIRECT. Choose it in the most paranoid
103 | * way possible to maximize probability that things work.
104 | *
105 | * @param fileOrDir
106 | * A file or directory within which O_DIRECT access will be performed.
107 | */
108 | private static int initilizeSoftBlockSize(String fileOrDir) {
109 |
110 | int fsBlockSize = -1;
111 |
112 | if (binit) {
113 | // get file system block size for use with workingDir
114 | // see "man 3 posix_memalign" for why we do this
115 | final int _PC_REC_XFER_ALIGN = 0x11;
116 |
117 | fsBlockSize = pathconf(fileOrDir, _PC_REC_XFER_ALIGN);
118 | /* conservative for version >= 2.6
119 | * "man 2 open":
120 | *
121 | * Under Linux 2.6, alignment
122 | * to 512-byte boundaries suffices.
123 | */
124 |
125 | // Since O_DIRECT requires pages to be memory aligned with the file system block size,
126 | // we will do this too in case the page size and the block size are different for
127 | // whatever reason. By taking the least common multiple, everything should be happy:
128 | int pageSize = getpagesize();
129 | fsBlockSize = lcm(fsBlockSize, pageSize);
130 |
131 | // just being completely paranoid:
132 | // (512 is the rule for 2.6+ kernels as mentioned before)
133 | fsBlockSize = lcm(fsBlockSize, 512);
134 |
135 | // lastly, a sanity check
136 | if (fsBlockSize <= 0 || ((fsBlockSize & (fsBlockSize-1)) != 0)) {
137 | logger.warn("file system block size should be a power of two, was found to be " + fsBlockSize);
138 | logger.warn("Disabling O_DIRECT support");
139 | return -1;
140 | }
141 | }
142 |
143 | return fsBlockSize;
144 | }
145 |
146 |
147 | // -- Java interfaces to native methods
148 |
149 | /**
150 | * Interface into native pread function. Always reads an entire buffer,
151 | * unlike {@link #pwrite(int, ByteBuffer, long) pwrite()} which uses buffer state
152 | * to determine how much of buffer to write.
153 | *
154 | * @param fd
155 | * A file discriptor to pass to native pread
156 | *
157 | * @param buf
158 | * The direct buffer into which to record the file read
159 | *
160 | * @param offset
161 | * The file offset at which to read
162 | *
163 | * @return The number of bytes successfully read from the file
164 | *
165 | * @throws IOException
166 | */
167 | public int pread(int fd, ByteBuffer buf, long offset) throws IOException {
168 | buf.clear(); // so that we read an entire buffer
169 | final long address = ((DirectBuffer) buf).address();
170 | Pointer pointer = new Pointer(address);
171 | int n = pread(fd, pointer, new NativeLong(buf.capacity()), new NativeLong(offset)).intValue();
172 | if (n < 0) {
173 | throw new IOException("error reading file at offset " + offset + ": " + getLastError());
174 | }
175 | return n;
176 | }
177 |
178 | /**
179 | * Interface into native pwrite function. Writes bytes corresponding to the nearest file
180 | * system block boundaries between buf.position() and buf.limit().
181 | *
182 | * @param fd
183 | * A file descriptor to pass to native pwrite
184 | *
185 | * @param buf
186 | * The direct buffer from which to write
187 | *
188 | * @param offset
189 | * The file offset at which to write
190 | *
191 | * @return The number of bytes successfully written to the file
192 | *
193 | * @throws IOException
194 | */
195 | public int pwrite(int fd, ByteBuffer buf, long offset) throws IOException {
196 |
197 | // must always write to end of current block
198 | // To handle writes past the logical file size,
199 | // we will later truncate.
200 | final int start = buf.position();
201 | assert start == blockStart(start);
202 | final int toWrite = blockEnd(buf.limit()) - start;
203 |
204 | final long address = ((DirectBuffer) buf).address();
205 | Pointer pointer = new Pointer(address);
206 |
207 | int n = pwrite(fd, pointer.share(start), new NativeLong(toWrite), new NativeLong(offset)).intValue();
208 | if (n < 0) {
209 | throw new IOException("error writing file at offset " + offset + ": " + getLastError());
210 | }
211 | return n;
212 | }
213 |
214 | /**
215 | * Use the open Linux system call and pass in the O_DIRECT flag.
216 | * Currently the only other flags passed in are O_RDONLY if readOnly
217 | * is true, and (if not) O_RDWR and O_CREAT.
218 | *
219 | * @param pathname
220 | * The path to the file to open. If file does not exist and we are opening
221 | * with readOnly, this will throw an error. Otherwise, if it does
222 | * not exist but we have readOnly set to false, create the file.
223 | *
224 | * @param readOnly
225 | * Whether to pass in O_RDONLY
226 | *
227 | * @return An integer file descriptor for the opened file
228 | *
229 | * @throws IOException
230 | */
231 | public int oDirectOpen(String pathname, boolean readOnly) throws IOException {
232 | int flags = OpenFlags.O_DIRECT;
233 | if (readOnly) {
234 | flags |= OpenFlags.O_RDONLY;
235 | } else {
236 | flags |= OpenFlags.O_RDWR | OpenFlags.O_CREAT;
237 | }
238 | int fd = open(pathname, flags, 00644);
239 | if (fd < 0) {
240 | throw new IOException("Error opening " + pathname + ", got " + getLastError());
241 | }
242 | return fd;
243 | }
244 |
245 | /**
246 | * Hooks into errno using Native.getLastError(), and parses it with native strerror function.
247 | *
248 | * @return An error message corresponding to the last errno
249 | */
250 | public static String getLastError() {
251 | return strerror(Native.getLastError());
252 | }
253 |
254 |
255 | // -- alignment logic utility methods
256 |
257 | /**
258 | * @return The soft block size for use with transfer multiples
259 | * and memory alignment multiples
260 | */
261 | public int blockSize() {
262 | return fsBlockSize;
263 | }
264 |
265 | /**
266 | * Returns the default buffer size for file channels doing O_DIRECT
267 | * I/O. By default this is equal to the block size.
268 | *
269 | * @return The default buffer size
270 | */
271 | public int defaultBufferSize() {
272 | return fsBlockSize;
273 | }
274 |
275 | /**
276 | * Given value, find the largest number less than or equal
277 | * to value which is a multiple of the fs block size.
278 | *
279 | * @param value
280 | * @return The largest number less than or equal to value
281 | * which is a multiple of the soft block size
282 | */
283 | public long blockStart(long value) {
284 | return value & fsBlockNotMask;
285 | }
286 |
287 |
288 | /**
289 | * @see #blockStart(long)
290 | */
291 | public int blockStart(int value) {
292 | return (int) (value & fsBlockNotMask);
293 | }
294 |
295 |
296 | /**
297 | * Given value, find the smallest number greater than or equal
298 | * to value which is a multiple of the fs block size.
299 | *
300 | * @param value
301 | * @return The smallest number greater than or equal to value
302 | * which is a multiple of the soft block size
303 | */
304 | public long blockEnd(long value) {
305 | return (value + fsBlockSize- 1) & fsBlockNotMask;
306 | }
307 |
308 |
309 |
310 | /**
311 | * @see #blockEnd(long)
312 | */
313 | public int blockEnd(int value) {
314 | return (int) ((value + fsBlockSize - 1) & fsBlockNotMask);
315 | }
316 |
317 |
318 | /**
319 | * Static variant of {@link #blockEnd(int)}.
320 | * @param blockSize
321 | * @param position
322 | * @return The smallest number greater than or equal to position
323 | * which is a multiple of the blockSize
324 | */
325 | public static long blockEnd(int blockSize, long position) {
326 | long ceil = (position + blockSize - 1)/blockSize;
327 | return ceil*blockSize;
328 | }
329 |
330 |
331 | /**
332 | * Euclid's algo for gcd is more general than we need
333 | * since we only have powers of 2, but w/e
334 | * @param x
335 | * @param y
336 | * @return The least common multiple of x and y
337 | */
338 | public static int lcm(long x, long y) {
339 | // will hold gcd
340 | long g = x;
341 | long yc = y;
342 |
343 | // get the gcd first
344 | while (yc != 0) {
345 | long t = g;
346 | g = yc;
347 | yc = t % yc;
348 | }
349 |
350 | return (int)(x*y/g);
351 | }
352 |
353 |
354 | /**
355 | * Given a pointer-to-pointer memptr, sets the dereferenced value to point to the start
356 | * of an allocated block of size bytes, where the starting address is a multiple of
357 | * alignment. It is guaranteed that the block may be freed by calling @{link {@link #free(Pointer)}
358 | * on the starting address. See "man 3 posix_memalign".
359 | *
360 | * @param memptr The pointer-to-pointer which will point to the address of the allocated aligned block
361 | *
362 | * @param alignment The alignment multiple of the starting address of the allocated block
363 | *
364 | * @param size The number of bytes to allocate
365 | *
366 | * @return 0 on success, one of the C error codes on failure.
367 | */
368 | public static native int posix_memalign(PointerByReference memptr, NativeLong alignment, NativeLong size);
369 |
370 |
371 | /**
372 | * See "man 3 free".
373 | *
374 | * @param ptr The pointer to the hunk of memory which needs freeing
375 | */
376 | public static native void free(Pointer ptr);
377 |
378 |
379 | /**
380 | * See "man 2 close"
381 | *
382 | * @param fd The file descriptor of the file to close
383 | *
384 | * @return 0 on success, -1 on error
385 | */
386 | public native int close(int fd); // musn't forget to do this
387 |
388 | // -- more native function hooks --
389 |
390 | public static native int ftruncate(int fd, long length);
391 |
392 | private static native NativeLong pwrite(int fd, Pointer buf, NativeLong count, NativeLong offset);
393 | private static native NativeLong pread(int fd, Pointer buf, NativeLong count, NativeLong offset);
394 | private static native int open(String pathname, int flags);
395 | private static native int open(String pathname, int flags, int mode);
396 | private static native int getpagesize();
397 | private static native int pathconf(String path, int name);
398 | private static native String strerror(int errnum);
399 |
400 | }
401 |
402 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/directio/DirectIOUtils.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.directio;
2 |
3 | import com.sun.jna.NativeLong;
4 | import com.sun.jna.Pointer;
5 | import com.sun.jna.ptr.PointerByReference;
6 |
7 | import java.lang.reflect.Method;
8 | import java.nio.ByteBuffer;
9 | import java.nio.ByteOrder;
10 |
11 | public class DirectIOUtils {
12 | public static final ByteOrder NATIVE_BYTE_ORDER = ByteOrder.nativeOrder();
13 |
14 | /**
15 | * Allocate capacity bytes of native memory for use as a buffer, and
16 | * return a {@link ByteBuffer} which gives an interface to this memory. The
17 | * memory is allocated with
18 | * {@link DirectIOLib#posix_memalign(PointerByReference, NativeLong, NativeLong) DirectIOLib#posix_memalign()}
19 | * to ensure that the buffer can be used with O_DIRECT.
20 | **
21 | * @param capacity The requested number of bytes to allocate
22 | *
23 | * @return A new JnaMemAlignedBuffer of capacity bytes aligned in native memory.
24 | */
25 | public static ByteBuffer allocateForDirectIO(DirectIOLib lib, int capacity) {
26 | if (capacity % lib.blockSize() > 0) {
27 | throw new IllegalArgumentException("Capacity (" + capacity + ") must be a multiple"
28 | + "of the block size (" + lib.blockSize() + ")");
29 | }
30 | NativeLong blockSize = new NativeLong(lib.blockSize());
31 | PointerByReference pointerToPointer = new PointerByReference();
32 |
33 | // align memory for use with O_DIRECT
34 | DirectIOLib.posix_memalign(pointerToPointer, blockSize, new NativeLong(capacity));
35 | return wrapPointer(Pointer.nativeValue(pointerToPointer.getValue()), capacity);
36 | }
37 |
38 | /**
39 | * @param ptr Pointer to wrap.
40 | * @param len Memory location length.
41 | * @return Byte buffer wrapping the given memory.
42 | */
43 | public static ByteBuffer wrapPointer(long ptr, int len) {
44 | try {
45 | ByteBuffer buf = (ByteBuffer)NEW_DIRECT_BUF_MTD.invoke(JAVA_NIO_ACCESS_OBJ, ptr, len, null);
46 |
47 | assert buf.isDirect();
48 | return buf;
49 | }
50 | catch (ReflectiveOperationException e) {
51 | throw new RuntimeException("JavaNioAccess#newDirectByteBuffer() method is unavailable.", e);
52 | }
53 | }
54 |
55 | /** JavaNioAccess object. */
56 | private static final Object JAVA_NIO_ACCESS_OBJ = javaNioAccessObject();
57 |
58 | /** JavaNioAccess#newDirectByteBuffer method. */
59 | private static final Method NEW_DIRECT_BUF_MTD = newDirectBufferMethod();
60 |
61 | /**
62 | * Returns reference to {@code JavaNioAccess.newDirectByteBuffer} method
63 | * from private API for corresponding Java version.
64 | *
65 | * @return Reference to {@code JavaNioAccess.newDirectByteBuffer} method
66 | * @throws RuntimeException If getting access to the private API is failed.
67 | */
68 | private static Method newDirectBufferMethod() {
69 |
70 | try {
71 | Class> cls = JAVA_NIO_ACCESS_OBJ.getClass();
72 |
73 | Method mtd = cls.getMethod("newDirectByteBuffer", long.class, int.class, Object.class);
74 |
75 | mtd.setAccessible(true);
76 |
77 | return mtd;
78 | }
79 | catch (ReflectiveOperationException e) {
80 | throw new RuntimeException(miscPackage() + ".JavaNioAccess#newDirectByteBuffer() method is unavailable.", e);
81 | }
82 | }
83 |
84 | /**
85 | * Returns {@code JavaNioAccess} instance from private API for corresponding Java version.
86 | *
87 | * @return {@code JavaNioAccess} instance for corresponding Java version.
88 | * @throws RuntimeException If getting access to the private API is failed.
89 | */
90 | private static Object javaNioAccessObject() {
91 | String pkgName = miscPackage();
92 |
93 | try {
94 | Class> cls = Class.forName(pkgName + ".misc.SharedSecrets");
95 |
96 | Method mth = cls.getMethod("getJavaNioAccess");
97 |
98 | return mth.invoke(null);
99 | }
100 | catch (ReflectiveOperationException e) {
101 | throw new RuntimeException(pkgName + ".misc.JavaNioAccess class is unavailable.", e);
102 | }
103 | }
104 |
105 | private static String miscPackage() {
106 | // Need return 'jdk.interna' if current Java version >= 9
107 | return "sun";
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/directio/DirectRandomAccessFile.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.directio;
2 |
3 | import java.io.*;
4 | import java.nio.ByteBuffer;
5 |
6 | /**
7 | * Class to emulate the behavior of {@link RandomAccessFile}, but using direct I/O.
8 | *
9 | */
10 | public class DirectRandomAccessFile implements Closeable {
11 |
12 | private DirectChannel channel;
13 |
14 |
15 | /**
16 | * @param file The file to open
17 | *
18 | * @param mode Either "rw" or "r", depending on whether this file is read only
19 | *
20 | * @throws IOException
21 | */
22 | public DirectRandomAccessFile(File file, String mode)
23 | throws IOException {
24 |
25 | boolean readOnly = false;
26 | if (mode.equals("r")) {
27 | readOnly = true;
28 | } else if (!mode.equals("rw")) {
29 | throw new IllegalArgumentException("only r and rw modes supported");
30 | }
31 |
32 | if (readOnly && !file.isFile()) {
33 | throw new FileNotFoundException("couldn't find file " + file);
34 | }
35 |
36 | this.channel = DirectChannelImpl.getChannel(file, readOnly);
37 | }
38 |
39 | @Override
40 | public void close() throws IOException {
41 | channel.close();
42 | }
43 |
44 |
45 | public int write(ByteBuffer src, long position) throws IOException {
46 | return channel.write(src, position);
47 | }
48 |
49 | public int read(ByteBuffer dst, long position) throws IOException {
50 | return channel.read(dst, position);
51 | }
52 |
53 | /**
54 | * @return The current position in the file
55 | */
56 | public long getFilePointer() {
57 | return channel.getFD();
58 | }
59 |
60 | /**
61 | * @return The current length of the file
62 | */
63 | public long length() {
64 | return channel.size();
65 | }
66 |
67 | }
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/directio/OpenFlags.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.directio;
2 |
3 | /**
4 | * Constants for {@link DirectIOLib#oDirectOpen(String, boolean)}.
5 | */
6 | public final class OpenFlags {
7 | public static final int O_RDONLY = 00;
8 | public static final int O_WRONLY = 01;
9 | public static final int O_RDWR = 02;
10 | public static final int O_CREAT = 0100;
11 | public static final int O_TRUNC = 01000;
12 | public static final int O_DIRECT = 040000;
13 | public static final int O_SYNC = 04000000;
14 |
15 | private OpenFlags() {}
16 | }
17 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/kiritodb/KiritoDB.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.kiritodb;
2 |
3 | import com.alibabacloud.polar_race.engine.common.AbstractVisitor;
4 | import com.alibabacloud.polar_race.engine.common.exceptions.EngineException;
5 | import com.alibabacloud.polar_race.engine.common.exceptions.RetCodeEnum;
6 | import moe.cnkirito.kiritodb.common.Constant;
7 | import moe.cnkirito.kiritodb.common.Util;
8 | import moe.cnkirito.kiritodb.data.CommitLog;
9 | import moe.cnkirito.kiritodb.index.CommitLogIndex;
10 | import moe.cnkirito.kiritodb.partition.HighTenPartitioner;
11 | import moe.cnkirito.kiritodb.partition.Partitionable;
12 | import moe.cnkirito.kiritodb.range.CacheItem;
13 | import moe.cnkirito.kiritodb.range.FetchDataProducer;
14 | import moe.cnkirito.kiritodb.range.RangeTask;
15 | import org.slf4j.Logger;
16 | import org.slf4j.LoggerFactory;
17 |
18 | import java.io.IOException;
19 | import java.nio.ByteBuffer;
20 | import java.util.concurrent.CountDownLatch;
21 | import java.util.concurrent.LinkedBlockingQueue;
22 | import java.util.concurrent.atomic.AtomicBoolean;
23 |
24 | /**
25 | * @author kirito.moe@foxmail.com
26 | * Date 2018-10-28
27 | */
28 | public class KiritoDB {
29 |
30 | private static final Logger logger = LoggerFactory.getLogger(KiritoDB.class);
31 | // partition num
32 | private final int partitionNum = Constant.partitionNum;
33 | // key -> partition
34 | private volatile Partitionable partitionable;
35 | // data
36 | public volatile CommitLog[] commitLogs;
37 | // index
38 | private volatile CommitLogIndex[] commitLogIndices;
39 | // true means need to load index into memory, false means no need
40 | private volatile boolean loadFlag = false;
41 |
42 | public KiritoDB() {
43 | partitionable = new HighTenPartitioner();
44 | }
45 |
46 | public void open(String path) throws EngineException {
47 | if (path.endsWith("/")) {
48 | path = path.substring(0, path.length() - 1);
49 | }
50 | commitLogs = new CommitLog[partitionNum];
51 | commitLogIndices = new CommitLogIndex[partitionNum];
52 | try {
53 | for (int i = 0; i < partitionNum; i++) {
54 | commitLogs[i] = new CommitLog();
55 | commitLogs[i].init(path, i);
56 | }
57 | for (int i = 0; i < partitionNum; i++) {
58 | commitLogIndices[i] = new CommitLogIndex();
59 | commitLogIndices[i].init(path, i);
60 | commitLogIndices[i].setCommitLog(commitLogs[i]);
61 | this.loadFlag = commitLogIndices[i].isLoadFlag();
62 | }
63 | if (!loadFlag) {
64 | loadAllIndex();
65 | }
66 | } catch (IOException e) {
67 | throw new EngineException(RetCodeEnum.IO_ERROR, "open exception");
68 | }
69 | }
70 |
71 | public void write(byte[] key, byte[] value) throws EngineException {
72 | int partition = partitionable.getPartition(key);
73 | CommitLog hitCommitLog = commitLogs[partition];
74 | CommitLogIndex hitIndex = commitLogIndices[partition];
75 | synchronized (hitCommitLog) {
76 | hitCommitLog.write(value);
77 | hitIndex.write(key);
78 | }
79 | }
80 |
81 | public byte[] read(byte[] key) throws EngineException {
82 | int partition = partitionable.getPartition(key);
83 | CommitLog hitCommitLog = commitLogs[partition];
84 | CommitLogIndex hitIndex = commitLogIndices[partition];
85 | Long offset = hitIndex.read(key);
86 | if (offset == null) {
87 | throw new EngineException(RetCodeEnum.NOT_FOUND, Util.bytes2Long(key) + " not found");
88 | }
89 | try {
90 | return hitCommitLog.read(offset);
91 | } catch (IOException e) {
92 | throw new EngineException(RetCodeEnum.IO_ERROR, "commit log read exception");
93 | }
94 | }
95 |
96 | // fetch thread flag
97 | private final AtomicBoolean rangFirst = new AtomicBoolean(false);
98 | private static ThreadLocal visitorCallbackValue = ThreadLocal.withInitial(() -> new byte[Constant.VALUE_LENGTH]);
99 | private static ThreadLocal visitorCallbackKey = ThreadLocal.withInitial(() -> new byte[Constant.INDEX_LENGTH]);
100 | private final static int THREAD_NUM = 64;
101 | private LinkedBlockingQueue rangeTaskLinkedBlockingQueue = new LinkedBlockingQueue<>();
102 |
103 | public void range(byte[] lower, byte[] upper, AbstractVisitor visitor) throws EngineException {
104 | // 第一次 range 的时候开启 fetch 线程
105 | if (rangFirst.compareAndSet(false, true)) {
106 | // logger.info("[jvm info] range first now {} ", Util.getFreeMemory());
107 | initPreFetchThreads();
108 | }
109 | RangeTask rangeTask = new RangeTask(visitor, new CountDownLatch(1));
110 | rangeTaskLinkedBlockingQueue.offer(rangeTask);
111 | try {
112 | rangeTask.getCountDownLatch().await();
113 | } catch (InterruptedException e) {
114 | e.printStackTrace();
115 | }
116 | }
117 |
118 | private volatile FetchDataProducer fetchDataProducer;
119 |
120 | private void initPreFetchThreads() {
121 | Thread fetchThread = new Thread(() -> {
122 | fetchDataProducer = new FetchDataProducer(this);
123 | for (int f = 0; f < 2; f++) {
124 | RangeTask[] rangeTasks = new RangeTask[THREAD_NUM];
125 | for (int i = 0; i < THREAD_NUM; i++) {
126 | try {
127 | rangeTasks[i] = rangeTaskLinkedBlockingQueue.take();
128 | } catch (InterruptedException e) {
129 | e.printStackTrace();
130 | }
131 | }
132 | fetchDataProducer.initFetch();
133 | fetchDataProducer.startFetch();
134 | for (int i = 0; i < THREAD_NUM; i++) {
135 | final int rangeIndex = i;
136 | Thread thread = new Thread(() -> {
137 | RangeTask myTask = rangeTasks[rangeIndex];
138 | for (int dbIndex = 0; dbIndex < partitionNum; dbIndex++) {
139 | CacheItem cacheItem;
140 | while (true) {
141 | cacheItem = fetchDataProducer.getCacheItem(dbIndex);
142 | if (cacheItem != null) {
143 | break;
144 | }
145 | sleep1us();
146 | }
147 | while (true) {
148 | if (cacheItem.ready) {
149 | break;
150 | }
151 | sleep1us();
152 | }
153 | byte[] value = visitorCallbackValue.get();
154 | byte[] key = visitorCallbackKey.get();
155 | ByteBuffer valueCache = cacheItem.buffer.slice();
156 | int keySize = commitLogIndices[dbIndex].getMemoryIndex().getSize();
157 | int[] offset = commitLogIndices[dbIndex].getMemoryIndex().getOffset();
158 | long[] keys = commitLogIndices[dbIndex].getMemoryIndex().getKeys();
159 | for (int j = 0; j < keySize; j++) {
160 | valueCache.position(offset[j] * Constant.VALUE_LENGTH);
161 | valueCache.get(value);
162 | Util.long2bytes(key, keys[j]);
163 | rangeTasks[rangeIndex].getAbstractVisitor().visit(key, value);
164 | }
165 | while (true) {
166 | if (cacheItem.allReach) {
167 | break;
168 | }
169 | sleep1us();
170 | }
171 | fetchDataProducer.release(dbIndex);
172 | }
173 | myTask.getCountDownLatch().countDown();
174 | });
175 | thread.setDaemon(true);
176 | thread.start();
177 | }
178 | }
179 | });
180 | fetchThread.setDaemon(true);
181 | fetchThread.start();
182 | }
183 |
184 | private void sleep1us() {
185 | try {
186 | Thread.sleep(0, 1);
187 | } catch (InterruptedException e) {
188 | e.printStackTrace();
189 | }
190 | }
191 |
192 | private void loadAllIndex() {
193 | int loadThreadNum = THREAD_NUM;
194 | CountDownLatch countDownLatch = new CountDownLatch(loadThreadNum);
195 | for (int i = 0; i < loadThreadNum; i++) {
196 | final int index = i;
197 | new Thread(() -> {
198 | for (int partition = 0; partition < partitionNum; partition++) {
199 | if (partition % loadThreadNum == index) {
200 | commitLogIndices[partition].load();
201 | }
202 | }
203 | countDownLatch.countDown();
204 | }).start();
205 | }
206 | try {
207 | countDownLatch.await();
208 | } catch (InterruptedException e) {
209 | logger.error("load index interrupted", e);
210 | }
211 | this.loadFlag = true;
212 | }
213 |
214 | public void close() {
215 | if (commitLogs != null) {
216 | for (CommitLog commitLog : commitLogs) {
217 | try {
218 | commitLog.destroy();
219 | } catch (IOException e) {
220 | logger.error("data destroy error", e);
221 | }
222 | }
223 | }
224 | if (commitLogIndices != null) {
225 | for (CommitLogIndex commitLogIndex : commitLogIndices) {
226 | try {
227 | commitLogIndex.destroy();
228 | } catch (IOException e) {
229 | logger.error("data destroy error", e);
230 | }
231 | }
232 | }
233 | }
234 | }
235 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/kiritodb/common/Constant.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.kiritodb.common;
2 |
3 | import moe.cnkirito.directio.DirectIOLib;
4 |
5 | public class Constant {
6 |
7 | public static final String DATA_PREFIX = "/data";
8 | public static final String DATA_SUFFIX = ".polar";
9 | public static final String INDEX_PREFIX = "/index";
10 | public static final String INDEX_SUFFIX = ".polar";
11 | public static final int VALUE_LENGTH = 4 * 1024;
12 | public static final int INDEX_LENGTH = 8;
13 | public static final int _4kb = 4 * 1024;
14 |
15 | public static int expectedNumPerPartition = 64000;
16 | public static int partitionNum = 1 << 10;
17 |
18 | public static DirectIOLib directIOLib = DirectIOLib.getLibForPath("test_directory");
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/kiritodb/common/LoopQuerySemaphore.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.kiritodb.common;
2 |
3 | /**
4 | * @author daofeng.xjf
5 | * @date 2018/11/30
6 | */
7 | public class LoopQuerySemaphore {
8 |
9 | private volatile boolean permits;
10 |
11 | public LoopQuerySemaphore(int permits) {
12 | if (permits > 0) {
13 | this.permits = true;
14 | } else {
15 | this.permits = false;
16 | }
17 | }
18 |
19 | public void acquire() throws InterruptedException {
20 | while (!permits) {
21 | Thread.sleep(0,1);
22 | }
23 | permits = false;
24 | }
25 |
26 | public void acquireNoSleep() throws InterruptedException {
27 | while (!permits) {
28 | }
29 | permits = false;
30 | }
31 |
32 | public void release() {
33 | permits = true;
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/kiritodb/common/UnsafeUtil.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.kiritodb.common;
2 |
3 | import sun.misc.Unsafe;
4 |
5 | import java.lang.reflect.Field;
6 |
7 | public class UnsafeUtil {
8 |
9 | public static final Unsafe UNSAFE;
10 |
11 | static {
12 | try {
13 | Field field = Unsafe.class.getDeclaredField("theUnsafe");
14 | field.setAccessible(true);
15 | UNSAFE = (Unsafe) field.get(null);
16 | } catch (Exception e) {
17 | throw new RuntimeException(e);
18 | }
19 | }
20 |
21 | }
--------------------------------------------------------------------------------
/engine_java/src/main/java/moe/cnkirito/kiritodb/common/Util.java:
--------------------------------------------------------------------------------
1 | package moe.cnkirito.kiritodb.common;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 | import java.io.InputStreamReader;
7 | import java.lang.management.ManagementFactory;
8 | import java.lang.reflect.Method;
9 | import java.nio.ByteBuffer;
10 | import java.nio.MappedByteBuffer;
11 | import java.security.AccessController;
12 | import java.security.PrivilegedAction;
13 | import java.text.SimpleDateFormat;
14 | import java.util.Date;
15 |
16 | public class Util {
17 |
18 | public static String getFreeMemory() {
19 | long free = Runtime.getRuntime().freeMemory() / 1024 / 1024;
20 | long total = Runtime.getRuntime().totalMemory() / 1024 / 1024;
21 | long max = Runtime.getRuntime().maxMemory() / 1024 / 1024;
22 | return "free=" + free + "M,total=" + total + "M,max=" + max + "M";
23 | }
24 |
25 | /**
26 | * 当前时间
27 | *
28 | * @return
29 | */
30 | public static String curTime() {
31 | SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
32 | // new Date()为获取当前系统时间
33 | return df.format(new Date());
34 | }
35 |
36 | /**
37 | * 当前进程
38 | *
39 | * @return
40 | */
41 | public static String pid() {
42 | String name = ManagementFactory.getRuntimeMXBean().getName();
43 | String pid = name.split("@")[0];
44 | return pid;
45 | }
46 |
47 | /**
48 | * 执行shell指令
49 | *
50 | * @param cmd
51 | * @return
52 | * @throws IOException
53 | */
54 | public static String runCmd(String cmd) throws IOException {
55 | Process process = Runtime.getRuntime().exec(cmd);
56 | InputStream is = process.getInputStream();
57 | BufferedReader reader = new BufferedReader(new InputStreamReader(is));
58 | StringBuffer sb = new StringBuffer();
59 | String tmp;
60 | int index = 0;
61 | while ((tmp = reader.readLine()) != null && index < 20) {
62 | sb.append(tmp).append("\n");
63 | ++index;
64 | }
65 | process.destroy();
66 | return sb.toString();
67 | }
68 |
69 | /**
70 | * bytes转long
71 | *
72 | * @param buffer
73 | * @return
74 | */
75 | public static long bytes2Long(byte[] buffer) {
76 | long values = 0;
77 | int len = 8;
78 | for (int i = 0; i < len; ++i) {
79 | values <<= 8;
80 | values |= (buffer[i] & 0xff);
81 | }
82 | return values;
83 | }
84 |
85 | public static void long2bytes(byte[] buffer, long value) {
86 | for (int i = 0; i < 8; ++i) {
87 | int offset = 64 - (i + 1) * 8;
88 | buffer[i] = (byte) ((value >> offset) & 0xff);
89 | }
90 | }
91 |
92 | /**
93 | * long转bytes
94 | *
95 | * @param values
96 | * @return
97 | */
98 | public static byte[] long2bytes(long values) {
99 | byte[] buffer = new byte[8];
100 | for (int i = 0; i < 8; ++i) {
101 | int offset = 64 - (i + 1) * 8;
102 | buffer[i] = (byte) ((values >> offset) & 0xff);
103 | }
104 | return buffer;
105 | }
106 |
107 | /**
108 | * long转bytes
109 | *
110 | * @param values
111 | * @return
112 | */
113 | public static byte[] int2bytes(int values) {
114 | byte[] buffer = new byte[4];
115 | for (int i = 0; i < 4; ++i) {
116 | int offset = 32 - (i + 1) * 8;
117 | buffer[i] = (byte) ((values >> offset) & 0xff);
118 | }
119 | return buffer;
120 | }
121 |
122 | /**
123 | * 模拟随机生成的4kb字节
124 | *
125 | * @param l
126 | * @return
127 | */
128 | public static byte[] _4kb(long l) {
129 | ByteBuffer buffer = ByteBuffer.allocate(4 * 1024);
130 | buffer.putLong(l);
131 | for (int i = 0; i < 4048 - 8; ++i) {
132 | buffer.put((byte) 0);
133 | }
134 | return buffer.array();
135 | }
136 |
137 | public static void clean(MappedByteBuffer mappedByteBuffer) {
138 | ByteBuffer buffer = mappedByteBuffer;
139 | if (buffer == null || !buffer.isDirect() || buffer.capacity() == 0)
140 | return;
141 | invoke(invoke(viewed(buffer), "cleaner"), "clean");
142 | }
143 |
144 | private static Object invoke(final Object target, final String methodName, final Class>... args) {
145 | return AccessController.doPrivileged(new PrivilegedAction