├── .gitignore
├── .travis.yml
├── README.md
├── pom.xml
└── src
    └── main
        ├── java
            └── tk
            │   └── netindev
            │       └── drill
            │           ├── Drill.java
            │           ├── hasher
            │               └── Hasher.java
            │           ├── miner
            │               ├── Job.java
            │               ├── Miner.java
            │               └── Worker.java
            │           └── util
            │               ├── Hashrate.java
            │               ├── Hex.java
            │               └── Misc.java
        ├── jni
            ├── CMakeLists.txt
            ├── algos.hpp
            ├── c_blake256.c
            ├── c_blake256.h
            ├── c_groestl.c
            ├── c_groestl.h
            ├── c_jh.c
            ├── c_jh.h
            ├── c_keccak.c
            ├── c_keccak.h
            ├── c_skein.c
            ├── c_skein.h
            ├── cryptonight.h
            ├── cryptonight_aesni.h
            ├── cryptonight_common.cpp
            ├── groestl_tables.h
            ├── hash.h
            ├── hasher.cpp
            ├── int-util.h
            ├── skein_port.h
            └── soft_aes.hpp
        └── resources
            ├── simplelogger.properties
            ├── unix
                └── x64
                │   └── libcryptonight.so
            └── win
                └── x64
                    └── cryptonight.dll


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /bin
3 | .classpath
4 | .project
5 | /.settings
6 | /.idea
7 | *.DS_Store
8 | dependency-reduced-pom.xml


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 |   - oraclejdk8 
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # drill [![Build Status](https://travis-ci.org/netindev/drill.svg?branch=master)](https://travis-ci.org/netindev/drill)
 2 | Monero miner written in java using JNI bindings.
 3 | 
 4 | ## Download
 5 | * Binary releases: https://github.com/netindev/drill/releases
 6 | * Git tree: https://github.com/netindev/drill.git
 7 | 
 8 | ## Usage
 9 | Usage: ```java -jar drill-x.x.x.jar -thread 2 -host localhost -port 3333 -user 4AignrnSVPiXUwk3nKBsTWVi4PCvAKPsrJKSpqinK55bQPFXHTsbYbe5FtUmxjJTbcATQ233gkntYA51fd6Hmur5F3v2o1G -pass x```
10 | 
11 | ### Options
12 | | Arg | Description | Required |
13 | | --- | --- | --- |
14 | | -host | Pool host to connect | Yes |
15 | | -user | Username to login, also can be used your address | Yes |
16 | | -port | Pool host port | Yes |
17 | | -pass | Password to login | Optional |
18 | | -thread | Thread count | Optional |
19 | | -help | Prints the help | Optional |
20 | 
21 | ## Build
22 | CMake:
23 | * Install [CMake](https://cmake.org/download/)
24 | * Go to: `..\drill\src\main\jni` and execute `cmake .`
25 | 
26 | Java:
27 | * Install [Maven](https://maven.apache.org/download.html)
28 | * Go to: `..\drill` and execute `mvn clean install`
29 | 
30 | ## Contacts
31 | * [email](mailto:contact@netindev.tk)
32 | * [twitter](https://twitter.com/netindev)


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 2 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 | 	<modelVersion>4.0.0</modelVersion>
 5 | 	<groupId>tk.netindev.drill</groupId>
 6 | 	<artifactId>drill</artifactId>
 7 | 	<version>0.1.0-SNAPSHOT</version>
 8 | 	<dependencies>
 9 | 		<dependency>
10 | 			<groupId>commons-cli</groupId>
11 | 			<artifactId>commons-cli</artifactId>
12 | 			<version>1.4</version>
13 | 		</dependency>
14 | 		<dependency>
15 | 			<groupId>org.slf4j</groupId>
16 | 			<artifactId>slf4j-simple</artifactId>
17 | 			<version>1.7.21</version>
18 | 		</dependency>
19 | 		<dependency>
20 | 			<groupId>org.slf4j</groupId>
21 | 			<artifactId>slf4j-api</artifactId>
22 | 			<version>1.7.5</version>
23 | 		</dependency>
24 | 		<dependency>
25 | 			<groupId>com.eclipsesource.minimal-json</groupId>
26 | 			<artifactId>minimal-json</artifactId>
27 | 			<version>0.9.2</version>
28 | 		</dependency>
29 | 	</dependencies>
30 | 	<build>
31 | 		<outputDirectory>target/classes</outputDirectory>
32 | 		<plugins>
33 | 			<plugin>
34 | 				<groupId>org.apache.maven.plugins</groupId>
35 | 				<artifactId>maven-jar-plugin</artifactId>
36 | 				<configuration>
37 | 					<archive>
38 | 						<manifest>
39 | 							<addClasspath>true</addClasspath>
40 | 							<mainClass>tk.netindev.drill.Drill</mainClass>
41 | 						</manifest>
42 | 					</archive>
43 | 				</configuration>
44 | 			</plugin>
45 | 			<plugin>
46 | 				<groupId>org.apache.maven.plugins</groupId>
47 | 				<artifactId>maven-compiler-plugin</artifactId>
48 | 				<version>3.8.0</version>
49 | 				<configuration>
50 | 					<source>1.7</source>
51 | 					<target>1.7</target>
52 | 				</configuration>
53 | 			</plugin>
54 | 			<plugin>
55 | 				<groupId>org.apache.maven.plugins</groupId>
56 | 				<artifactId>maven-shade-plugin</artifactId>
57 | 				<version>3.2.0</version>
58 | 				<executions>
59 | 					<execution>
60 | 						<phase>package</phase>
61 | 						<goals>
62 | 							<goal>shade</goal>
63 | 						</goals>
64 | 					</execution>
65 | 				</executions>
66 | 			</plugin>
67 | 		</plugins>
68 | 	</build>
69 | </project>


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/Drill.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill;
 2 | 
 3 | import org.apache.commons.cli.CommandLine;
 4 | import org.apache.commons.cli.DefaultParser;
 5 | import org.apache.commons.cli.Option;
 6 | import org.apache.commons.cli.Options;
 7 | import org.apache.commons.cli.ParseException;
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | import tk.netindev.drill.miner.Miner;
12 | import tk.netindev.drill.util.Misc;
13 | 
14 | /**
15 |  *
16 |  * @author netindev
17 |  *
18 |  */
19 | public class Drill {
20 | 
21 |    private static final Logger logger = LoggerFactory
22 |          .getLogger(Drill.class.getName());
23 |    public static final double PACKAGE_VERSION = 0.10D;
24 | 
25 |    public static void main(String[] args) {
26 |       if (args.length == 0) {
27 |          logger.error(
28 |                "Invalid arguments, please add to the arguments \"-help\".");
29 |          return;
30 |       }
31 |       try {
32 |          System.out.println("Drill cryptonight miner, written by netindev, V: "
33 |                + PACKAGE_VERSION);
34 |          final String system = System.getProperty("os.name").toLowerCase();
35 |          if (!(system.indexOf("win") >= 0 || system.indexOf("nix") >= 0
36 |                || system.indexOf("nux") >= 0 || system.indexOf("aix") >= 0)
37 |                || !System.getProperty("sun.arch.data.model").equals("64")) {
38 |             logger.error("Unfortunately, " + system
39 |                   + " isn't supported at this time.");
40 |             return;
41 |          }
42 |          parseArgs(args);
43 |       } catch (final Throwable e) {
44 |          logger.error(e.getMessage());
45 |       }
46 |    }
47 | 
48 |    private static void parseArgs(String[] args) {
49 |       final Options options = new Options();
50 |       options.addOption(Option.builder("host").hasArg().required().build());
51 |       options.addOption(Option.builder("user").hasArg().required().build());
52 |       options.addOption(Option.builder("port").hasArg().required().build());
53 | 
54 |       options.addOption(Option.builder("pass").hasArg().build());
55 |       options.addOption(Option.builder("thread").hasArg().build());
56 | 
57 |       options.addOption(Option.builder("help").build());
58 |       try {
59 |          final CommandLine parse = new DefaultParser().parse(options, args);
60 |          if (parse.hasOption("help")) {
61 |             logger.info("Arguments with * are optional.");
62 |             logger.info(
63 |                   "java -jar drill.jar -host \"localhost\" -user \"netindev.8700k\" -port \"1000\" -pass* \"12345\" -thread* \"7\"");
64 |             return;
65 |          }
66 |          final String host = parse.getOptionValue("host"),
67 |                user = parse.getOptionValue("user"),
68 |                port = parse.getOptionValue("port");
69 |          final String pass = parse.hasOption("pass")
70 |                ? parse.getOptionValue("pass")
71 |                : "",
72 |                thread = parse.hasOption("thread")
73 |                      ? parse.getOptionValue("thread")
74 |                      : String.valueOf(
75 |                            Runtime.getRuntime().availableProcessors() - 1);
76 |          if (!Misc.isInteger(port)) {
77 |             logger.error("The port isn't an integer");
78 |             return;
79 |          } else if (!Misc.isInteger(thread)) {
80 |             logger.error("The thread isn't an integer");
81 |             return;
82 |          }
83 |          new Miner(host, user, pass, Integer.parseInt(port),
84 |                Integer.parseInt(thread)).start();
85 |       } catch (final ParseException e) {
86 |          logger.error(e.getMessage());
87 |          logger.error("Correct use: java -jar scuti-lite.jar --help");
88 |       } catch (final Exception e) {
89 |          logger.error(e.getMessage());
90 |       }
91 |    }
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/hasher/Hasher.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.hasher;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import org.slf4j.Logger;
 6 | import org.slf4j.LoggerFactory;
 7 | 
 8 | import tk.netindev.drill.util.Misc;
 9 | 
10 | /**
11 |  *
12 |  * @author netindev
13 |  *
14 |  */
15 | public class Hasher {
16 | 
17 |    private static final Logger logger = LoggerFactory
18 |          .getLogger(Hasher.class.getName());
19 | 
20 |    public static native void slowHash(byte[] input, byte[] output);
21 | 
22 |    static {
23 |       String library = null;
24 |       final String system = System.getProperty("os.name").toLowerCase();
25 |       if (system.indexOf("win") >= 0) {
26 |          library = "/win/x64/cryptonight.dll";
27 |       } else if (system.indexOf("nix") >= 0 || system.indexOf("nux") >= 0
28 |             || system.indexOf("aix") >= 0) {
29 |          library = "/unix/x64/libcryptonight.so";
30 |       } else {
31 |          logger.error("Couldn't find a dynamic-link library for your system.");
32 |       }
33 |       try {
34 |          Misc.loadLibrary(library);
35 |       } catch (final IOException e) {
36 |          logger.error(e.getMessage());
37 |       }
38 |    }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/miner/Job.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.miner;
 2 | 
 3 | /**
 4 |  *
 5 |  * @author netindev
 6 |  *
 7 |  */
 8 | public class Job {
 9 | 
10 |    private String id, jobId;
11 |    private byte[] blob;
12 |    private int target;
13 | 
14 |    public String getId() {
15 |       return this.id;
16 |    }
17 | 
18 |    public void setId(String id) {
19 |       this.id = id;
20 |    }
21 | 
22 |    public byte[] getBlob() {
23 |       return this.blob;
24 |    }
25 | 
26 |    public void setBlob(byte[] blob) {
27 |       this.blob = blob;
28 |    }
29 | 
30 |    public String getJobId() {
31 |       return this.jobId;
32 |    }
33 | 
34 |    public void setJobId(String jobId) {
35 |       this.jobId = jobId;
36 |    }
37 | 
38 |    public int getTarget() {
39 |       return this.target;
40 |    }
41 | 
42 |    public void setTarget(int target) {
43 |       this.target = target;
44 |    }
45 | 
46 | }


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/miner/Miner.java:
--------------------------------------------------------------------------------
  1 | package tk.netindev.drill.miner;
  2 | 
  3 | import java.io.IOException;
  4 | import java.io.PrintWriter;
  5 | import java.net.Socket;
  6 | import java.net.UnknownHostException;
  7 | import java.util.HashSet;
  8 | import java.util.Scanner;
  9 | import java.util.Set;
 10 | import java.util.concurrent.atomic.AtomicBoolean;
 11 | 
 12 | import org.slf4j.Logger;
 13 | import org.slf4j.LoggerFactory;
 14 | 
 15 | import com.eclipsesource.json.JsonObject;
 16 | import com.eclipsesource.json.JsonObject.Member;
 17 | 
 18 | import tk.netindev.drill.Drill;
 19 | import tk.netindev.drill.util.Hashrate;
 20 | import tk.netindev.drill.util.Hex;
 21 | 
 22 | /**
 23 |  *
 24 |  * @author netindev
 25 |  *
 26 |  */
 27 | public class Miner {
 28 | 
 29 |    private static final Logger logger = LoggerFactory
 30 |          .getLogger(Miner.class.getName());
 31 | 
 32 |    private final String host, user, pass;
 33 |    private final int port, thread;
 34 | 
 35 |    private Socket socket;
 36 |    private PrintWriter printWriter;
 37 |    private Scanner scanner;
 38 | 
 39 |    protected final Hashrate hashrate = new Hashrate();
 40 | 
 41 |    private final Set<Worker> set = new HashSet<>();
 42 | 
 43 |    public Miner(String host, String user, String pass, int port, int thread) {
 44 |       this.host = host;
 45 |       this.user = user;
 46 |       this.pass = pass;
 47 |       this.port = port;
 48 |       this.thread = thread;
 49 |    }
 50 | 
 51 |    private boolean connect() {
 52 |       try {
 53 |          this.socket = new Socket(this.host, this.port);
 54 |          this.printWriter = new PrintWriter(this.socket.getOutputStream());
 55 |          this.scanner = new Scanner(this.socket.getInputStream());
 56 |          this.socket.setTcpNoDelay(true);
 57 |          this.socket.setSoTimeout(1000 * 150 /* 2:30 mins */);
 58 |          final JsonObject params = new JsonObject(), doc = new JsonObject();
 59 |          params.add("login", this.user);
 60 |          params.add("pass", this.pass);
 61 |          params.add("agent", "drill/" + Drill.PACKAGE_VERSION);
 62 | 
 63 |          doc.add("jsonrpc", "2.0");
 64 |          doc.add("id", 1);
 65 |          doc.add("method", "login");
 66 |          doc.add("params", params);
 67 |          this.printWriter.print(doc.toString() + "\n");
 68 |          this.printWriter.flush();
 69 |          return true;
 70 |       } catch (final UnknownHostException e) {
 71 |          logger.error("Can't connect to: " + e.getMessage());
 72 |          return false;
 73 |       } catch (final IOException e) {
 74 |          logger.error(e.getMessage());
 75 |          return false;
 76 |       }
 77 |    }
 78 | 
 79 |    public void start() throws Exception {
 80 |       if (this.connect()) {
 81 |          logger.info("Connected to: " + this.host + ":" + this.port);
 82 |          while (this.scanner.hasNextLine()) {
 83 |             try {
 84 |                final String string = this.scanner.nextLine();
 85 |                final Job job = this.parseJob(string);
 86 |                if (job != null) {
 87 |                   logger.info("New job received, diff: "
 88 |                         + (Integer.MAX_VALUE / job.getTarget()) * 2);
 89 |                   this.work(job);
 90 |                   logger.info("Hashrate: "
 91 |                         + String.format("%.2f", this.getHashrate()) + " h/s");
 92 |                }
 93 |             } catch (final Exception e) {
 94 |                logger.error(e.getMessage());
 95 |                break;
 96 |             }
 97 |          }
 98 |          for (final Thread thread : this.set) {
 99 |             thread.interrupt();
100 |          }
101 |          this.set.clear();
102 |          logger.error("Disconnected from the pool");
103 |          this.reconnect();
104 |       } else {
105 |          logger.error("Couldn't connect to the pool");
106 |          this.reconnect();
107 |       }
108 |    }
109 | 
110 |    private Job parseJob(String string) {
111 |       final Job job = new Job();
112 |       final AtomicBoolean status = new AtomicBoolean(false),
113 |             info = new AtomicBoolean(false);
114 |       for (final Member member : JsonObject.readFrom(string)) {
115 |          if (member.getName().equals("result")) {
116 |             for (final Member resultTable : member.getValue().asObject()) {
117 |                if (resultTable.getName().equals("id")) {
118 |                   job.setId(resultTable.getValue().asString());
119 |                } else if (resultTable.getName().equals("job")) {
120 |                   for (final Member jobTable : resultTable.getValue()
121 |                         .asObject()) {
122 |                      if (jobTable.getName().equals("blob")) {
123 |                         job.setBlob(
124 |                               Hex.unhexlify(jobTable.getValue().asString()));
125 |                      } else if (jobTable.getName().equals("job_id")) {
126 |                         job.setJobId(jobTable.getValue().asString());
127 |                      } else if (jobTable.getName().equals("target")) {
128 |                         final byte[] target = Hex
129 |                               .unhexlify(jobTable.getValue().asString());
130 |                         job.setTarget(
131 |                               (((target[3] << 24) | ((target[2] & 255) << 16))
132 |                                     | ((target[1] & 255) << 8))
133 |                                     | (target[0] & 255));
134 |                      }
135 |                   }
136 |                   status.set(true);
137 |                } else if (resultTable.getName().equals("status")) {
138 |                   if (resultTable.getValue().asString().equals("OK")
139 |                         && !status.get()) {
140 |                      logger.info("Result accepted by the pool!");
141 |                      info.set(true);
142 |                   }
143 |                }
144 |             }
145 |          } else if (member.getName().equals("error")) {
146 |             if (!member.getValue().isNull()) {
147 |                for (final Member errorTable : member.getValue().asObject()) {
148 |                   if (errorTable.getName().equals("message")) {
149 |                      if (errorTable.getValue().asString()
150 |                            .equals("Unauthenticated")) {
151 |                         throw new RuntimeException("Unauthenticated");
152 |                      }
153 |                      logger.error(errorTable.getValue().asString());
154 |                      info.set(true);
155 |                   }
156 |                }
157 |             }
158 |          } else if (member.getName().equals("params")) {
159 |             for (final Member paramTable : member.getValue().asObject()) {
160 |                if (paramTable.getName().equals("id")) {
161 |                   job.setId(paramTable.getValue().asString());
162 |                } else if (paramTable.getName().equals("blob")) {
163 |                   job.setBlob(Hex.unhexlify(paramTable.getValue().asString()));
164 |                } else if (paramTable.getName().equals("job_id")) {
165 |                   job.setJobId(paramTable.getValue().asString());
166 |                } else if (paramTable.getName().equals("target")) {
167 |                   final byte[] target = Hex
168 |                         .unhexlify(paramTable.getValue().asString());
169 |                   job.setTarget((((target[3] << 24) | ((target[2] & 255) << 16))
170 |                         | ((target[1] & 255) << 8)) | (target[0] & 255));
171 |                }
172 |             }
173 |          }
174 |       }
175 |       return info.get() ? null : job;
176 |    }
177 | 
178 |    protected void send(Job job, byte[] nonce, byte[] result) {
179 |       final JsonObject params = new JsonObject(), doc = new JsonObject();
180 |       params.add("id", job.getId());
181 |       params.add("job_id", job.getJobId());
182 |       params.add("nonce", Hex.hexlify(nonce).toLowerCase());
183 |       params.add("result", Hex.hexlify(result).toLowerCase());
184 | 
185 |       doc.add("id", 1);
186 |       doc.add("jsonrpc", "2.0");
187 |       doc.add("method", "submit");
188 |       doc.add("params", params);
189 |       this.printWriter.print(doc.toString() + "\n");
190 |       this.printWriter.flush();
191 |    }
192 | 
193 |    private void work(Job job) {
194 |       // I didn't find a better way than this.
195 |       for (final Thread thread : this.set) {
196 |          thread.interrupt();
197 |       }
198 |       this.set.clear();
199 | 
200 |       for (int i = 0; i < this.thread; i++) {
201 |          final Worker worker = new Worker(this, job, 100000 * i);
202 |          worker.start();
203 |          this.set.add(worker);
204 |       }
205 |    }
206 | 
207 |    private float getHashrate() {
208 |       if (this.hashrate.size() < 10) {
209 |          return 0.0F;
210 |       } else {
211 |          final long runningTime = System.currentTimeMillis()
212 |                - this.hashrate.element();
213 |          if (runningTime < 10) {
214 |             return 0.0F;
215 |          } else {
216 |             return (float) (this.hashrate.size() / (runningTime * 0.001D));
217 |          }
218 |       }
219 |    }
220 | 
221 |    private void reconnect() {
222 |       try {
223 |          logger.info("Reconnecting in 30 seconds");
224 |          Thread.sleep(1000L * 30L);
225 |          while (!this.connect()) {
226 |             logger.error("Couldn't connect, trying to reconnect in 30 seconds");
227 |             Thread.sleep(1000L * 30L);
228 |          }
229 |          this.start();
230 |       } catch (final Exception e) {
231 |          e.printStackTrace();
232 |       }
233 |    }
234 | 
235 | }
236 | 


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/miner/Worker.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.miner;
 2 | 
 3 | import tk.netindev.drill.hasher.Hasher;
 4 | 
 5 | /**
 6 |  *
 7 |  * @author netindev
 8 |  *
 9 |  */
10 | public class Worker extends Thread {
11 | 
12 |    private final Miner miner;
13 |    private final Job job;
14 |    private final int nonce;
15 | 
16 |    public Worker(Miner miner, Job job, int nonce) {
17 |       this.miner = miner;
18 |       this.job = job;
19 |       this.nonce = nonce;
20 |    }
21 | 
22 |    @Override
23 |    public void run() {
24 |       int nonce = this.nonce;
25 |       final byte[] hash = new byte[32];
26 |       final byte[] array = new byte[4];
27 |       final byte[] blob = this.job.getBlob();
28 |       final int target = this.job.getTarget();
29 |       while (!interrupted()) {
30 |          blob[39] = (byte) nonce;
31 |          blob[40] = (byte) (nonce >> 8);
32 |          blob[41] = (byte) (nonce >> 16);
33 |          blob[42] = (byte) (nonce >> 24);
34 |          Hasher.slowHash(blob, hash);
35 |          final int difficulty = (((hash[31] << 24) | ((hash[30] & 255) << 16))
36 |                | ((hash[29] & 255) << 8)) | (hash[28] & 255);
37 |          if (difficulty >= 0 && difficulty <= target) {
38 |             array[0] = (byte) nonce;
39 |             array[1] = (byte) (nonce >> 8);
40 |             array[2] = (byte) (nonce >> 16);
41 |             array[3] = (byte) (nonce >> 24);
42 |             this.miner.send(this.job, array, hash);
43 |          }
44 |          synchronized (this.miner.hashrate) {
45 |             while (this.miner.hashrate.size() > 99
46 |                   && !this.miner.hashrate.isEmpty()) {
47 |                this.miner.hashrate.pop();
48 |             }
49 |          }
50 |          this.miner.hashrate.add(System.currentTimeMillis());
51 |          nonce++;
52 |       }
53 |    }
54 | 
55 | }


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/util/Hashrate.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.util;
 2 | 
 3 | import java.util.ArrayDeque;
 4 | 
 5 | /**
 6 |  *
 7 |  * @author netindev
 8 |  *
 9 |  */
10 | public class Hashrate extends ArrayDeque<Long> {
11 | 
12 |    private static final long serialVersionUID = -1940501882802741874L;
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/util/Hex.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.util;
 2 | 
 3 | /**
 4 |  *
 5 |  * @author netindev
 6 |  *
 7 |  */
 8 | public class Hex {
 9 | 
10 |    private static final char[] hexArray = "0123456789ABCDEF".toCharArray();
11 | 
12 |    public static int fromHexChar(char c) {
13 |       if ((c >= '0') && (c <= '9')) {
14 |          c -= 48;
15 |       }
16 |       if ((c >= 'A') && (c <= 'F')) {
17 |          c = (char) (c - 'A' + 10);
18 |       } else {
19 |          c = (char) (c - 'a' + 10);
20 |       }
21 |       return c;
22 |    }
23 | 
24 |    public static String hexlify(byte[] bytes) {
25 |       final char[] hexChars = new char[bytes.length * 2];
26 |       for (int j = 0; j < bytes.length; j++) {
27 |          final int v = bytes[j] & 0xFF;
28 |          hexChars[j * 2] = hexArray[v >>> 4];
29 |          hexChars[j * 2 + 1] = hexArray[v & 0x0F];
30 |       }
31 |       final String ret = new String(hexChars);
32 |       return ret;
33 |    }
34 | 
35 |    public static byte[] unhexlify(String string) {
36 |       final int length = string.length();
37 |       if (length % 2 != 0) {
38 |          throw new RuntimeException("Odd-length string");
39 |       }
40 |       final byte[] bytes = new byte[length / 2];
41 |       for (int i = 0; i < length; i += 2) {
42 |          final int top = Character.digit(string.charAt(i), 16);
43 |          final int bot = Character.digit(string.charAt(i + 1), 16);
44 |          if (top == -1 || bot == -1) {
45 |             throw new RuntimeException("Non-hexadecimal digit found");
46 |          }
47 |          bytes[i / 2] = (byte) ((top << 4) + bot);
48 |       }
49 |       return bytes;
50 |    }
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/tk/netindev/drill/util/Misc.java:
--------------------------------------------------------------------------------
 1 | package tk.netindev.drill.util;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileOutputStream;
 5 | import java.io.IOException;
 6 | import java.io.InputStream;
 7 | 
 8 | /**
 9 |  *
10 |  * @author netindev
11 |  *
12 |  */
13 | public class Misc {
14 | 
15 |    public static boolean isInteger(String string) {
16 |       try {
17 |          Integer.parseInt(string);
18 |          return true;
19 |       } catch (final Exception e) {
20 |          return false;
21 |       }
22 |    }
23 | 
24 |    public static void loadLibrary(String name) throws IOException {
25 |       final InputStream inputStream = Misc.class.getResourceAsStream(name);
26 |       final byte[] buffer = new byte[1024];
27 |       int read = -1;
28 |       final File temp = File.createTempFile(name, "");
29 |       final FileOutputStream outputStream = new FileOutputStream(temp);
30 |       while ((read = inputStream.read(buffer)) != -1) {
31 |          outputStream.write(buffer, 0, read);
32 |       }
33 |       outputStream.close();
34 |       inputStream.close();
35 |       System.load(temp.getAbsolutePath());
36 |    }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/jni/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.6)
 2 | project (cryptonight)
 3 | 
 4 | set (HEADER_FILES
 5 | 	algos.hpp
 6 | 	c_blake256.h
 7 | 	c_groestl.h
 8 |         c_jh.h
 9 |         c_keccak.h
10 |         c_skein.h
11 |         cryptonight.h
12 |         cryptonight_aesni.h
13 |         groestl_tables.h
14 |         hash.h
15 |         int-util.h
16 |         skein_port.h
17 |         soft_aes.hpp
18 | )
19 | 
20 | set (SOURCE_FILES
21 | 	c_blake256.c
22 |         c_groestl.c
23 | 	c_jh.c
24 |         c_keccak.c
25 |         c_skein.c
26 |         cryptonight_common.cpp
27 |         hasher.cpp
28 | )
29 | 
30 | add_definitions(-D_CRT_SECURE_NO_WARNINGS -msse4.1 -maes)
31 | 
32 | add_library(cryptonight SHARED
33 | 	${HEADER_FILES}
34 | ${SOURCE_FILES})


--------------------------------------------------------------------------------
/src/main/jni/algos.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <stddef.h>
  3 | #include <inttypes.h>
  4 | #include <type_traits>
  5 | 
  6 | // define aeon settings
  7 | constexpr size_t CRYPTONIGHT_LITE_MEMORY = 1 * 1024 * 1024;
  8 | constexpr uint32_t CRYPTONIGHT_LITE_MASK = 0xFFFF0;
  9 | constexpr uint32_t CRYPTONIGHT_LITE_ITER = 0x40000;
 10 | 
 11 | constexpr size_t CRYPTONIGHT_MEMORY = 2 * 1024 * 1024;
 12 | constexpr uint32_t CRYPTONIGHT_MASK = 0x1FFFF0;
 13 | constexpr uint32_t CRYPTONIGHT_ITER = 0x80000;
 14 | 
 15 | constexpr size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024;
 16 | constexpr uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0;
 17 | constexpr uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000;
 18 | 
 19 | constexpr uint32_t CRYPTONIGHT_MASARI_ITER = 0x40000;
 20 | 
 21 | constexpr uint32_t CRYPTONIGHT_SUPERFAST_ITER = 0x20000;
 22 | 
 23 | enum algo
 24 | {
 25 | 	invalid_algo = 0,
 26 | 	cryptonight = 1,
 27 | 	cryptonight_lite = 2,
 28 | 	cryptonight_monero = 3,
 29 | 	cryptonight_heavy = 4,
 30 | 	cryptonight_aeon = 5,
 31 | 	cryptonight_ipbc = 6, // equal to cryptonight_aeon with a small tweak in the miner code
 32 | 	cryptonight_stellite = 7, //equal to cryptonight_monero but with one tiny change
 33 | 	cryptonight_masari = 8, //equal to cryptonight_monero but with less iterations, used by masari
 34 | 	cryptonight_haven = 9, // equal to cryptonight_heavy with a small tweak
 35 | 	cryptonight_bittube2 = 10, // derived from cryptonight_heavy with own aes-round implementation and minor other tweaks
 36 | 	cryptonight_monero_v8 = 11,
 37 | 	cryptonight_superfast = 12
 38 | };
 39 | 
 40 | template<int i>
 41 | inline constexpr size_t cn_select_memory() { return 0; }
 42 | 
 43 | template<>
 44 | inline constexpr size_t cn_select_memory<1>() { return CRYPTONIGHT_MEMORY; }
 45 | 
 46 | template<>
 47 | inline constexpr size_t cn_select_memory<2>() { return CRYPTONIGHT_LITE_MEMORY; }
 48 | 
 49 | template<>
 50 | inline constexpr size_t cn_select_memory<3>() { return CRYPTONIGHT_MEMORY; }
 51 | 
 52 | template<>
 53 | inline constexpr size_t cn_select_memory<11>() { return CRYPTONIGHT_MEMORY; }
 54 | 
 55 | template<>
 56 | inline constexpr size_t cn_select_memory<4>() { return CRYPTONIGHT_HEAVY_MEMORY; }
 57 | 
 58 | template<>
 59 | inline constexpr size_t cn_select_memory<5>() { return CRYPTONIGHT_LITE_MEMORY; }
 60 | 
 61 | template<>
 62 | inline constexpr size_t cn_select_memory<6>() { return CRYPTONIGHT_LITE_MEMORY; }
 63 | 
 64 | template<>
 65 | inline constexpr size_t cn_select_memory<7>() { return CRYPTONIGHT_MEMORY; }
 66 | 
 67 | template<>
 68 | inline constexpr size_t cn_select_memory<8>() { return CRYPTONIGHT_MEMORY; }
 69 | 
 70 | template<>
 71 | inline constexpr size_t cn_select_memory<9>() { return CRYPTONIGHT_HEAVY_MEMORY; }
 72 | 
 73 | template<>
 74 | inline constexpr size_t cn_select_memory<10>() { return CRYPTONIGHT_HEAVY_MEMORY; }
 75 | 
 76 | template<>
 77 | inline constexpr size_t cn_select_memory<12>() { return CRYPTONIGHT_MEMORY; }
 78 | 
 79 | inline size_t cn_select_memory(algo algo)
 80 | {
 81 | 	switch (algo)
 82 | 	{
 83 | 	case cryptonight_stellite:
 84 | 	case cryptonight_monero:
 85 | 	case cryptonight_monero_v8:
 86 | 	case cryptonight_masari:
 87 | 	case cryptonight:
 88 | 	case cryptonight_superfast:
 89 | 		return CRYPTONIGHT_MEMORY;
 90 | 	case cryptonight_ipbc:
 91 | 	case cryptonight_aeon:
 92 | 	case cryptonight_lite:
 93 | 		return CRYPTONIGHT_LITE_MEMORY;
 94 | 	case cryptonight_bittube2:
 95 | 	case cryptonight_haven:
 96 | 	case cryptonight_heavy:
 97 | 		return CRYPTONIGHT_HEAVY_MEMORY;
 98 | 	default:
 99 | 		return 0;
100 | 	}
101 | }
102 | 
103 | template<int i>
104 | inline constexpr uint32_t cn_select_mask() { return 0; }
105 | 
106 | template<>
107 | inline constexpr uint32_t cn_select_mask<1>() { return CRYPTONIGHT_MASK; }
108 | 
109 | template<>
110 | inline constexpr uint32_t cn_select_mask<2>() { return CRYPTONIGHT_LITE_MASK; }
111 | 
112 | template<>
113 | inline constexpr uint32_t cn_select_mask<cryptonight_monero>() { return CRYPTONIGHT_MASK; }
114 | 
115 | template<>
116 | inline constexpr uint32_t cn_select_mask<cryptonight_monero_v8>() { return CRYPTONIGHT_MASK; }
117 | 
118 | template<>
119 | inline constexpr uint32_t cn_select_mask<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_MASK; }
120 | 
121 | template<>
122 | inline constexpr uint32_t cn_select_mask<cryptonight_aeon>() { return CRYPTONIGHT_LITE_MASK; }
123 | 
124 | template<>
125 | inline constexpr uint32_t cn_select_mask<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_MASK; }
126 | 
127 | template<>
128 | inline constexpr uint32_t cn_select_mask<cryptonight_stellite>() { return CRYPTONIGHT_MASK; }
129 | 
130 | template<>
131 | inline constexpr uint32_t cn_select_mask<cryptonight_masari>() { return CRYPTONIGHT_MASK; }
132 | 
133 | template<>
134 | inline constexpr uint32_t cn_select_mask<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_MASK; }
135 | 
136 | template<>
137 | inline constexpr uint32_t cn_select_mask<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_MASK; }
138 | 
139 | template<>
140 | inline constexpr uint32_t cn_select_mask<cryptonight_superfast>() { return CRYPTONIGHT_MASK; }
141 | 
142 | inline size_t cn_select_mask(algo algo)
143 | {
144 | 	switch (algo)
145 | 	{
146 | 	case cryptonight_stellite:
147 | 	case cryptonight_monero:
148 | 	case cryptonight_monero_v8:
149 | 	case cryptonight_masari:
150 | 	case cryptonight:
151 | 	case cryptonight_superfast:
152 | 		return CRYPTONIGHT_MASK;
153 | 	case cryptonight_ipbc:
154 | 	case cryptonight_aeon:
155 | 	case cryptonight_lite:
156 | 		return CRYPTONIGHT_LITE_MASK;
157 | 	case cryptonight_bittube2:
158 | 	case cryptonight_haven:
159 | 	case cryptonight_heavy:
160 | 		return CRYPTONIGHT_HEAVY_MASK;
161 | 	default:
162 | 		return 0;
163 | 	}
164 | }
165 | 
166 | template<algo ALGO>
167 | inline constexpr uint32_t cn_select_iter() { return 0; }
168 | 
169 | template<>
170 | inline constexpr uint32_t cn_select_iter<cryptonight>() { return CRYPTONIGHT_ITER; }
171 | 
172 | template<>
173 | inline constexpr uint32_t cn_select_iter<cryptonight_lite>() { return CRYPTONIGHT_LITE_ITER; }
174 | 
175 | template<>
176 | inline constexpr uint32_t cn_select_iter<cryptonight_monero>() { return CRYPTONIGHT_ITER; }
177 | 
178 | template<>
179 | inline constexpr uint32_t cn_select_iter<cryptonight_monero_v8>() { return CRYPTONIGHT_ITER; }
180 | 
181 | template<>
182 | inline constexpr uint32_t cn_select_iter<cryptonight_heavy>() { return CRYPTONIGHT_HEAVY_ITER; }
183 | 
184 | template<>
185 | inline constexpr uint32_t cn_select_iter<cryptonight_aeon>() { return CRYPTONIGHT_LITE_ITER; }
186 | 
187 | template<>
188 | inline constexpr uint32_t cn_select_iter<cryptonight_ipbc>() { return CRYPTONIGHT_LITE_ITER; }
189 | 
190 | template<>
191 | inline constexpr uint32_t cn_select_iter<cryptonight_stellite>() { return CRYPTONIGHT_ITER; }
192 | 
193 | template<>
194 | inline constexpr uint32_t cn_select_iter<cryptonight_masari>() { return CRYPTONIGHT_MASARI_ITER; }
195 | 
196 | template<>
197 | inline constexpr uint32_t cn_select_iter<cryptonight_haven>() { return CRYPTONIGHT_HEAVY_ITER; }
198 | 
199 | template<>
200 | inline constexpr uint32_t cn_select_iter<cryptonight_bittube2>() { return CRYPTONIGHT_HEAVY_ITER; }
201 | 
202 | template<>
203 | inline constexpr uint32_t cn_select_iter<cryptonight_superfast>() { return CRYPTONIGHT_SUPERFAST_ITER; }
204 | 
205 | inline size_t cn_select_iter(algo algo)
206 | {
207 | 	switch (algo)
208 | 	{
209 | 	case cryptonight_stellite:
210 | 	case cryptonight_monero:
211 | 	case cryptonight_monero_v8:
212 | 	case cryptonight:
213 | 		return CRYPTONIGHT_ITER;
214 | 	case cryptonight_ipbc:
215 | 	case cryptonight_aeon:
216 | 	case cryptonight_lite:
217 | 		return CRYPTONIGHT_LITE_ITER;
218 | 	case cryptonight_bittube2:
219 | 	case cryptonight_haven:
220 | 	case cryptonight_heavy:
221 | 		return CRYPTONIGHT_HEAVY_ITER;
222 | 	case cryptonight_masari:
223 | 		return CRYPTONIGHT_MASARI_ITER;
224 | 	case cryptonight_superfast:
225 | 		return CRYPTONIGHT_SUPERFAST_ITER;
226 | 	default:
227 | 		return 0;
228 | 	}
229 | }
230 | 


--------------------------------------------------------------------------------
/src/main/jni/c_blake256.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * The blake256_* and blake224_* functions are largely copied from
  3 |  * blake256_light.c and blake224_light.c from the BLAKE website:
  4 |  *
  5 |  *     http://131002.net/blake/
  6 |  *
  7 |  * The hmac_* functions implement HMAC-BLAKE-256 and HMAC-BLAKE-224.
  8 |  * HMAC is specified by RFC 2104.
  9 |  */
 10 | 
 11 | #include <string.h>
 12 | #include <stdio.h>
 13 | #include <stdint.h>
 14 | #include "c_blake256.h"
 15 | 
 16 | #define U8TO32(p) \
 17 | 	(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
 18 | 	 ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
 19 | #define U32TO8(p, v) \
 20 | 	(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
 21 | 	(p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
 22 | 
 23 | const uint8_t sigma[][16] = {
 24 | 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
 25 | 	{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
 26 | 	{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
 27 | 	{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
 28 | 	{ 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
 29 | 	{ 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
 30 | 	{12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
 31 | 	{13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
 32 | 	{ 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
 33 | 	{10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
 34 | 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
 35 | 	{14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
 36 | 	{11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
 37 | 	{ 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
 38 | };
 39 | 
 40 | const uint32_t cst[16] = {
 41 | 	0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
 42 | 	0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
 43 | 	0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
 44 | 	0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
 45 | };
 46 | 
 47 | static const uint8_t padding[] = {
 48 | 	0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 49 | 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 50 | };
 51 | 
 52 | 
 53 | void blake256_compress(state *S, const uint8_t *block) {
 54 | 	uint32_t v[16], m[16], i;
 55 | 
 56 | #define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
 57 | #define G(a,b,c,d,e)                                      \
 58 | 	v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
 59 | 	v[d] = ROT(v[d] ^ v[a],16);                           \
 60 | 	v[c] += v[d];                                         \
 61 | 	v[b] = ROT(v[b] ^ v[c],12);                           \
 62 | 	v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b];   \
 63 | 	v[d] = ROT(v[d] ^ v[a], 8);                           \
 64 | 	v[c] += v[d];                                         \
 65 | 	v[b] = ROT(v[b] ^ v[c], 7);
 66 | 
 67 | 	for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
 68 | 	for (i = 0; i < 8;  ++i) v[i] = S->h[i];
 69 | 	v[ 8] = S->s[0] ^ 0x243F6A88;
 70 | 	v[ 9] = S->s[1] ^ 0x85A308D3;
 71 | 	v[10] = S->s[2] ^ 0x13198A2E;
 72 | 	v[11] = S->s[3] ^ 0x03707344;
 73 | 	v[12] = 0xA4093822;
 74 | 	v[13] = 0x299F31D0;
 75 | 	v[14] = 0x082EFA98;
 76 | 	v[15] = 0xEC4E6C89;
 77 | 
 78 | 	if (S->nullt == 0) {
 79 | 		v[12] ^= S->t[0];
 80 | 		v[13] ^= S->t[0];
 81 | 		v[14] ^= S->t[1];
 82 | 		v[15] ^= S->t[1];
 83 | 	}
 84 | 
 85 | 	for (i = 0; i < 14; ++i) {
 86 | 		G(0, 4,  8, 12,  0);
 87 | 		G(1, 5,  9, 13,  2);
 88 | 		G(2, 6, 10, 14,  4);
 89 | 		G(3, 7, 11, 15,  6);
 90 | 		G(3, 4,  9, 14, 14);
 91 | 		G(2, 7,  8, 13, 12);
 92 | 		G(0, 5, 10, 15,  8);
 93 | 		G(1, 6, 11, 12, 10);
 94 | 	}
 95 | 
 96 | 	for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
 97 | 	for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
 98 | }
 99 | 
100 | void blake256_init(state *S) {
101 | 	S->h[0] = 0x6A09E667;
102 | 	S->h[1] = 0xBB67AE85;
103 | 	S->h[2] = 0x3C6EF372;
104 | 	S->h[3] = 0xA54FF53A;
105 | 	S->h[4] = 0x510E527F;
106 | 	S->h[5] = 0x9B05688C;
107 | 	S->h[6] = 0x1F83D9AB;
108 | 	S->h[7] = 0x5BE0CD19;
109 | 	S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
110 | 	S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
111 | }
112 | 
113 | void blake224_init(state *S) {
114 | 	S->h[0] = 0xC1059ED8;
115 | 	S->h[1] = 0x367CD507;
116 | 	S->h[2] = 0x3070DD17;
117 | 	S->h[3] = 0xF70E5939;
118 | 	S->h[4] = 0xFFC00B31;
119 | 	S->h[5] = 0x68581511;
120 | 	S->h[6] = 0x64F98FA7;
121 | 	S->h[7] = 0xBEFA4FA4;
122 | 	S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
123 | 	S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
124 | }
125 | 
126 | // datalen = number of bits
127 | void blake256_update(state *S, const uint8_t *data, uint32_t datalen) {
128 | 	int left = S->buflen >> 3;
129 | 	int fill = 64 - left;
130 | 
131 | 	if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
132 | 		memcpy((void *) (S->buf + left), (void *) data, fill);
133 | 		S->t[0] += 512;
134 | 		if (S->t[0] == 0) S->t[1]++;
135 | 		blake256_compress(S, S->buf);
136 | 		data += fill;
137 | 		datalen -= (fill << 3);
138 | 		left = 0;
139 | 	}
140 | 
141 | 	while (datalen >= 512) {
142 | 		S->t[0] += 512;
143 | 		if (S->t[0] == 0) S->t[1]++;
144 | 		blake256_compress(S, data);
145 | 		data += 64;
146 | 		datalen -= 512;
147 | 	}
148 | 
149 | 	if (datalen > 0) {
150 | 		memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
151 | 		S->buflen = (left << 3) + datalen;
152 | 	} else {
153 | 		S->buflen = 0;
154 | 	}
155 | }
156 | 
157 | // datalen = number of bits
158 | void blake224_update(state *S, const uint8_t *data, uint32_t datalen) {
159 | 	blake256_update(S, data, datalen);
160 | }
161 | 
162 | void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
163 | 	uint8_t msglen[8];
164 | 	uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
165 | 	if (lo < (unsigned) S->buflen) hi++;
166 | 	U32TO8(msglen + 0, hi);
167 | 	U32TO8(msglen + 4, lo);
168 | 
169 | 	if (S->buflen == 440) { /* one padding byte */
170 | 		S->t[0] -= 8;
171 | 		blake256_update(S, &pa, 8);
172 | 	} else {
173 | 		if (S->buflen < 440) { /* enough space to fill the block  */
174 | 			if (S->buflen == 0) S->nullt = 1;
175 | 			S->t[0] -= 440 - S->buflen;
176 | 			blake256_update(S, padding, 440 - S->buflen);
177 | 		} else { /* need 2 compressions */
178 | 			S->t[0] -= 512 - S->buflen;
179 | 			blake256_update(S, padding, 512 - S->buflen);
180 | 			S->t[0] -= 440;
181 | 			blake256_update(S, padding + 1, 440);
182 | 			S->nullt = 1;
183 | 		}
184 | 		blake256_update(S, &pb, 8);
185 | 		S->t[0] -= 8;
186 | 	}
187 | 	S->t[0] -= 64;
188 | 	blake256_update(S, msglen, 64);
189 | 
190 | 	U32TO8(digest +  0, S->h[0]);
191 | 	U32TO8(digest +  4, S->h[1]);
192 | 	U32TO8(digest +  8, S->h[2]);
193 | 	U32TO8(digest + 12, S->h[3]);
194 | 	U32TO8(digest + 16, S->h[4]);
195 | 	U32TO8(digest + 20, S->h[5]);
196 | 	U32TO8(digest + 24, S->h[6]);
197 | 	U32TO8(digest + 28, S->h[7]);
198 | }
199 | 
200 | void blake256_final(state *S, uint8_t *digest) {
201 | 	blake256_final_h(S, digest, 0x81, 0x01);
202 | }
203 | 
204 | void blake224_final(state *S, uint8_t *digest) {
205 | 	blake256_final_h(S, digest, 0x80, 0x00);
206 | }
207 | 
208 | // inlen = number of bytes
209 | void blake256_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) {
210 | 	state S;
211 | 	blake256_init(&S);
212 | 	blake256_update(&S, in, inlen * 8);
213 | 	blake256_final(&S, out);
214 | }
215 | 
216 | // inlen = number of bytes
217 | void blake224_hash(uint8_t *out, const uint8_t *in, uint32_t inlen) {
218 | 	state S;
219 | 	blake224_init(&S);
220 | 	blake224_update(&S, in, inlen * 8);
221 | 	blake224_final(&S, out);
222 | }
223 | 
224 | // keylen = number of bytes
225 | void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
226 | 	const uint8_t *key = _key;
227 | 	uint8_t keyhash[32];
228 | 	uint8_t pad[64];
229 | 	uint64_t i;
230 | 
231 | 	if (keylen > 64) {
232 | 		blake256_hash(keyhash, key, keylen);
233 | 		key = keyhash;
234 | 		keylen = 32;
235 | 	}
236 | 
237 | 	blake256_init(&S->inner);
238 | 	memset(pad, 0x36, 64);
239 | 	for (i = 0; i < keylen; ++i) {
240 | 		pad[i] ^= key[i];
241 | 	}
242 | 	blake256_update(&S->inner, pad, 512);
243 | 
244 | 	blake256_init(&S->outer);
245 | 	memset(pad, 0x5c, 64);
246 | 	for (i = 0; i < keylen; ++i) {
247 | 		pad[i] ^= key[i];
248 | 	}
249 | 	blake256_update(&S->outer, pad, 512);
250 | 
251 | 	memset(keyhash, 0, 32);
252 | }
253 | 
254 | // keylen = number of bytes
255 | void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
256 | 	const uint8_t *key = _key;
257 | 	uint8_t keyhash[32];
258 | 	uint8_t pad[64];
259 | 	uint64_t i;
260 | 
261 | 	if (keylen > 64) {
262 | 		blake256_hash(keyhash, key, keylen);
263 | 		key = keyhash;
264 | 		keylen = 28;
265 | 	}
266 | 
267 | 	blake224_init(&S->inner);
268 | 	memset(pad, 0x36, 64);
269 | 	for (i = 0; i < keylen; ++i) {
270 | 		pad[i] ^= key[i];
271 | 	}
272 | 	blake224_update(&S->inner, pad, 512);
273 | 
274 | 	blake224_init(&S->outer);
275 | 	memset(pad, 0x5c, 64);
276 | 	for (i = 0; i < keylen; ++i) {
277 | 		pad[i] ^= key[i];
278 | 	}
279 | 	blake224_update(&S->outer, pad, 512);
280 | 
281 | 	memset(keyhash, 0, 32);
282 | }
283 | 
284 | // datalen = number of bits
285 | void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint32_t datalen) {
286 |   // update the inner state
287 |   blake256_update(&S->inner, data, datalen);
288 | }
289 | 
290 | // datalen = number of bits
291 | void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint32_t datalen) {
292 |   // update the inner state
293 |   blake224_update(&S->inner, data, datalen);
294 | }
295 | 
296 | void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
297 | 	uint8_t ihash[32];
298 | 	blake256_final(&S->inner, ihash);
299 | 	blake256_update(&S->outer, ihash, 256);
300 | 	blake256_final(&S->outer, digest);
301 | 	memset(ihash, 0, 32);
302 | }
303 | 
304 | void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
305 | 	uint8_t ihash[32];
306 | 	blake224_final(&S->inner, ihash);
307 | 	blake224_update(&S->outer, ihash, 224);
308 | 	blake224_final(&S->outer, digest);
309 | 	memset(ihash, 0, 32);
310 | }
311 | 
312 | // keylen = number of bytes; inlen = number of bytes
313 | void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) {
314 | 	hmac_state S;
315 | 	hmac_blake256_init(&S, key, keylen);
316 | 	hmac_blake256_update(&S, in, inlen * 8);
317 | 	hmac_blake256_final(&S, out);
318 | }
319 | 
320 | // keylen = number of bytes; inlen = number of bytes
321 | void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint32_t inlen) {
322 | 	hmac_state S;
323 | 	hmac_blake224_init(&S, key, keylen);
324 | 	hmac_blake224_update(&S, in, inlen * 8);
325 | 	hmac_blake224_final(&S, out);
326 | }
327 | 


--------------------------------------------------------------------------------
/src/main/jni/c_blake256.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BLAKE256_H_
 2 | #define _BLAKE256_H_
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | typedef struct {
 7 |   uint32_t h[8], s[4], t[2];
 8 |   int buflen, nullt;
 9 |   uint8_t buf[64];
10 | } state;
11 | 
12 | typedef struct {
13 |   state inner;
14 |   state outer;
15 | } hmac_state;
16 | 
17 | void blake256_init(state *);
18 | void blake224_init(state *);
19 | 
20 | void blake256_update(state *, const uint8_t *, uint32_t);
21 | void blake224_update(state *, const uint8_t *, uint32_t);
22 | 
23 | void blake256_final(state *, uint8_t *);
24 | void blake224_final(state *, uint8_t *);
25 | 
26 | void blake256_hash(uint8_t *, const uint8_t *, uint32_t);
27 | void blake224_hash(uint8_t *, const uint8_t *, uint32_t);
28 | 
29 | /* HMAC functions: */
30 | 
31 | void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
32 | void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
33 | 
34 | void hmac_blake256_update(hmac_state *, const uint8_t *, uint32_t);
35 | void hmac_blake224_update(hmac_state *, const uint8_t *, uint32_t);
36 | 
37 | void hmac_blake256_final(hmac_state *, uint8_t *);
38 | void hmac_blake224_final(hmac_state *, uint8_t *);
39 | 
40 | void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t);
41 | void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint32_t);
42 | 
43 | #endif /* _BLAKE256_H_ */
44 | 


--------------------------------------------------------------------------------
/src/main/jni/c_groestl.c:
--------------------------------------------------------------------------------
  1 | /* hash.c     April 2012
  2 |  * Groestl ANSI C code optimised for 32-bit machines
  3 |  * Author: Thomas Krinninger
  4 |  *
  5 |  *  This work is based on the implementation of
  6 |  *          Soeren S. Thomsen and Krystian Matusiewicz
  7 |  *
  8 |  *
  9 |  */
 10 | 
 11 | #include "c_groestl.h"
 12 | #include "groestl_tables.h"
 13 | 
 14 | #define P_TYPE 0
 15 | #define Q_TYPE 1
 16 | 
 17 | const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
 18 | 
 19 | const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
 20 | 
 21 | 
 22 | #define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
 23 | 															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
 24 | 															v1 = temp_var;}
 25 | 
 26 | 
 27 | #define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
 28 |    tu = T[2*(uint32_t)x[4*c0+0]];			    \
 29 |    tl = T[2*(uint32_t)x[4*c0+0]+1];		    \
 30 |    tv1 = T[2*(uint32_t)x[4*c1+1]];			\
 31 |    tv2 = T[2*(uint32_t)x[4*c1+1]+1];			\
 32 |    ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
 33 |    tu ^= tv1;						\
 34 |    tl ^= tv2;						\
 35 |    tv1 = T[2*(uint32_t)x[4*c2+2]];			\
 36 |    tv2 = T[2*(uint32_t)x[4*c2+2]+1];			\
 37 |    ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
 38 |    tu ^= tv1;						\
 39 |    tl ^= tv2;   					\
 40 |    tv1 = T[2*(uint32_t)x[4*c3+3]];			\
 41 |    tv2 = T[2*(uint32_t)x[4*c3+3]+1];			\
 42 |    ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
 43 |    tu ^= tv1;						\
 44 |    tl ^= tv2;						\
 45 |    tl ^= T[2*(uint32_t)x[4*c4+0]];			\
 46 |    tu ^= T[2*(uint32_t)x[4*c4+0]+1];			\
 47 |    tv1 = T[2*(uint32_t)x[4*c5+1]];			\
 48 |    tv2 = T[2*(uint32_t)x[4*c5+1]+1];			\
 49 |    ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
 50 |    tl ^= tv1;						\
 51 |    tu ^= tv2;						\
 52 |    tv1 = T[2*(uint32_t)x[4*c6+2]];			\
 53 |    tv2 = T[2*(uint32_t)x[4*c6+2]+1];			\
 54 |    ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
 55 |    tl ^= tv1;						\
 56 |    tu ^= tv2;   					\
 57 |    tv1 = T[2*(uint32_t)x[4*c7+3]];			\
 58 |    tv2 = T[2*(uint32_t)x[4*c7+3]+1];			\
 59 |    ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
 60 |    tl ^= tv1;						\
 61 |    tu ^= tv2;						\
 62 |    y[i] = tu;						\
 63 |    y[i+1] = tl;
 64 | 
 65 | 
 66 | /* compute one round of P (short variants) */
 67 | static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
 68 |   uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
 69 |   uint32_t* x32 = (uint32_t*)x;
 70 |   x32[ 0] ^= 0x00000000^r;
 71 |   x32[ 2] ^= 0x00000010^r;
 72 |   x32[ 4] ^= 0x00000020^r;
 73 |   x32[ 6] ^= 0x00000030^r;
 74 |   x32[ 8] ^= 0x00000040^r;
 75 |   x32[10] ^= 0x00000050^r;
 76 |   x32[12] ^= 0x00000060^r;
 77 |   x32[14] ^= 0x00000070^r;
 78 |   COLUMN(x,y, 0,  0,  2,  4,  6,  9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 79 |   COLUMN(x,y, 2,  2,  4,  6,  8, 11, 13, 15,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 80 |   COLUMN(x,y, 4,  4,  6,  8, 10, 13, 15,  1,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 81 |   COLUMN(x,y, 6,  6,  8, 10, 12, 15,  1,  3,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 82 |   COLUMN(x,y, 8,  8, 10, 12, 14,  1,  3,  5,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 83 |   COLUMN(x,y,10, 10, 12, 14,  0,  3,  5,  7,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 84 |   COLUMN(x,y,12, 12, 14,  0,  2,  5,  7,  9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 85 |   COLUMN(x,y,14, 14,  0,  2,  4,  7,  9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 86 | }
 87 | 
 88 | /* compute one round of Q (short variants) */
 89 | static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
 90 |   uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
 91 |   uint32_t* x32 = (uint32_t*)x;
 92 |   x32[ 0] = ~x32[ 0];
 93 |   x32[ 1] ^= 0xffffffff^r;
 94 |   x32[ 2] = ~x32[ 2];
 95 |   x32[ 3] ^= 0xefffffff^r;
 96 |   x32[ 4] = ~x32[ 4];
 97 |   x32[ 5] ^= 0xdfffffff^r;
 98 |   x32[ 6] = ~x32[ 6];
 99 |   x32[ 7] ^= 0xcfffffff^r;
100 |   x32[ 8] = ~x32[ 8];
101 |   x32[ 9] ^= 0xbfffffff^r;
102 |   x32[10] = ~x32[10];
103 |   x32[11] ^= 0xafffffff^r;
104 |   x32[12] = ~x32[12];
105 |   x32[13] ^= 0x9fffffff^r;
106 |   x32[14] = ~x32[14];
107 |   x32[15] ^= 0x8fffffff^r;
108 |   COLUMN(x,y, 0,  2,  6, 10, 14,  1,  5,  9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
109 |   COLUMN(x,y, 2,  4,  8, 12,  0,  3,  7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
110 |   COLUMN(x,y, 4,  6, 10, 14,  2,  5,  9, 13,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
111 |   COLUMN(x,y, 6,  8, 12,  0,  4,  7, 11, 15,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
112 |   COLUMN(x,y, 8, 10, 14,  2,  6,  9, 13,  1,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
113 |   COLUMN(x,y,10, 12,  0,  4,  8, 11, 15,  3,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
114 |   COLUMN(x,y,12, 14,  2,  6, 10, 13,  1,  5,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
115 |   COLUMN(x,y,14,  0,  4,  8, 12, 15,  3,  7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
116 | }
117 | 
118 | /* compute compression function (short variants) */
119 | static void F512(uint32_t *h, const uint32_t *m) {
120 |   int i;
121 |   uint32_t Ptmp[2*COLS512];
122 |   uint32_t Qtmp[2*COLS512];
123 |   uint32_t y[2*COLS512];
124 |   uint32_t z[2*COLS512];
125 | 
126 |   for (i = 0; i < 2*COLS512; i++) {
127 | 	z[i] = m[i];
128 | 	Ptmp[i] = h[i]^m[i];
129 |   }
130 | 
131 |   /* compute Q(m) */
132 |   RND512Q((uint8_t*)z, y, 0x00000000);
133 |   RND512Q((uint8_t*)y, z, 0x01000000);
134 |   RND512Q((uint8_t*)z, y, 0x02000000);
135 |   RND512Q((uint8_t*)y, z, 0x03000000);
136 |   RND512Q((uint8_t*)z, y, 0x04000000);
137 |   RND512Q((uint8_t*)y, z, 0x05000000);
138 |   RND512Q((uint8_t*)z, y, 0x06000000);
139 |   RND512Q((uint8_t*)y, z, 0x07000000);
140 |   RND512Q((uint8_t*)z, y, 0x08000000);
141 |   RND512Q((uint8_t*)y, Qtmp, 0x09000000);
142 | 
143 |   /* compute P(h+m) */
144 |   RND512P((uint8_t*)Ptmp, y, 0x00000000);
145 |   RND512P((uint8_t*)y, z, 0x00000001);
146 |   RND512P((uint8_t*)z, y, 0x00000002);
147 |   RND512P((uint8_t*)y, z, 0x00000003);
148 |   RND512P((uint8_t*)z, y, 0x00000004);
149 |   RND512P((uint8_t*)y, z, 0x00000005);
150 |   RND512P((uint8_t*)z, y, 0x00000006);
151 |   RND512P((uint8_t*)y, z, 0x00000007);
152 |   RND512P((uint8_t*)z, y, 0x00000008);
153 |   RND512P((uint8_t*)y, Ptmp, 0x00000009);
154 | 
155 |   /* compute P(h+m) + Q(m) + h */
156 |   for (i = 0; i < 2*COLS512; i++) {
157 | 	h[i] ^= Ptmp[i]^Qtmp[i];
158 |   }
159 | }
160 | 
161 | 
162 | /* digest up to msglen bytes of input (full blocks only) */
163 | static void Transform(groestlHashState *ctx,
164 | 	       const uint8_t *input,
165 | 	       int msglen) {
166 | 
167 |   /* digest message, one block at a time */
168 |   for (; msglen >= SIZE512;
169 | 	   msglen -= SIZE512, input += SIZE512) {
170 | 	F512(ctx->chaining,(uint32_t*)input);
171 | 
172 | 	/* increment block counter */
173 | 	ctx->block_counter1++;
174 | 	if (ctx->block_counter1 == 0) ctx->block_counter2++;
175 |   }
176 | }
177 | 
178 | /* given state h, do h <- P(h)+h */
179 | static void OutputTransformation(groestlHashState *ctx) {
180 |   int j;
181 |   uint32_t temp[2*COLS512];
182 |   uint32_t y[2*COLS512];
183 |   uint32_t z[2*COLS512];
184 | 
185 | 
186 | 
187 | 	for (j = 0; j < 2*COLS512; j++) {
188 | 	  temp[j] = ctx->chaining[j];
189 | 	}
190 | 	RND512P((uint8_t*)temp, y, 0x00000000);
191 | 	RND512P((uint8_t*)y, z, 0x00000001);
192 | 	RND512P((uint8_t*)z, y, 0x00000002);
193 | 	RND512P((uint8_t*)y, z, 0x00000003);
194 | 	RND512P((uint8_t*)z, y, 0x00000004);
195 | 	RND512P((uint8_t*)y, z, 0x00000005);
196 | 	RND512P((uint8_t*)z, y, 0x00000006);
197 | 	RND512P((uint8_t*)y, z, 0x00000007);
198 | 	RND512P((uint8_t*)z, y, 0x00000008);
199 | 	RND512P((uint8_t*)y, temp, 0x00000009);
200 | 	for (j = 0; j < 2*COLS512; j++) {
201 | 	  ctx->chaining[j] ^= temp[j];
202 | 	}
203 | }
204 | 
205 | /* initialise context */
206 | static void Init(groestlHashState* ctx) {
207 |   int i = 0;
208 |   /* allocate memory for state and data buffer */
209 | 
210 |   for(;i<(SIZE512/sizeof(uint32_t));i++)
211 |   {
212 | 	ctx->chaining[i] = 0;
213 |   }
214 | 
215 |   /* set initial value */
216 |   ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
217 | 
218 |   /* set other variables */
219 |   ctx->buf_ptr = 0;
220 |   ctx->block_counter1 = 0;
221 |   ctx->block_counter2 = 0;
222 |   ctx->bits_in_last_byte = 0;
223 | }
224 | 
225 | /* update state with databitlen bits of input */
226 | static void Update(groestlHashState* ctx,
227 | 		  const BitSequence* input,
228 | 		  DataLength databitlen) {
229 |   int index = 0;
230 |   int msglen = (int)(databitlen/8);
231 |   int rem = (int)(databitlen%8);
232 | 
233 |   /* if the buffer contains data that has not yet been digested, first
234 | 	 add data to buffer until full */
235 |   if (ctx->buf_ptr) {
236 | 	while (ctx->buf_ptr < SIZE512 && index < msglen) {
237 | 	  ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
238 | 	}
239 | 	if (ctx->buf_ptr < SIZE512) {
240 | 	  /* buffer still not full, return */
241 | 	  if (rem) {
242 | 	ctx->bits_in_last_byte = rem;
243 | 	ctx->buffer[(int)ctx->buf_ptr++] = input[index];
244 | 	  }
245 | 	  return;
246 | 	}
247 | 
248 | 	/* digest buffer */
249 | 	ctx->buf_ptr = 0;
250 | 	Transform(ctx, ctx->buffer, SIZE512);
251 |   }
252 | 
253 |   /* digest bulk of message */
254 |   Transform(ctx, input+index, msglen-index);
255 |   index += ((msglen-index)/SIZE512)*SIZE512;
256 | 
257 |   /* store remaining data in buffer */
258 |   while (index < msglen) {
259 | 	ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
260 |   }
261 | 
262 |   /* if non-integral number of bytes have been supplied, store
263 | 	 remaining bits in last byte, together with information about
264 | 	 number of bits */
265 |   if (rem) {
266 | 	ctx->bits_in_last_byte = rem;
267 | 	ctx->buffer[(int)ctx->buf_ptr++] = input[index];
268 |   }
269 | }
270 | 
271 | #define BILB ctx->bits_in_last_byte
272 | 
273 | /* finalise: process remaining data (including padding), perform
274 |    output transformation, and write hash result to 'output' */
275 | static void Final(groestlHashState* ctx,
276 | 		 BitSequence* output) {
277 |   int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
278 |   uint8_t *s = (BitSequence*)ctx->chaining;
279 | 
280 |   /* pad with '1'-bit and first few '0'-bits */
281 |   if (BILB) {
282 | 	ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
283 | 	ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
284 | 	BILB = 0;
285 |   }
286 |   else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
287 | 
288 |   /* pad with '0'-bits */
289 |   if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
290 | 	/* padding requires two blocks */
291 | 	while (ctx->buf_ptr < SIZE512) {
292 | 	  ctx->buffer[(int)ctx->buf_ptr++] = 0;
293 | 	}
294 | 	/* digest first padding block */
295 | 	Transform(ctx, ctx->buffer, SIZE512);
296 | 	ctx->buf_ptr = 0;
297 |   }
298 |   while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
299 | 	ctx->buffer[(int)ctx->buf_ptr++] = 0;
300 |   }
301 | 
302 |   /* length padding */
303 |   ctx->block_counter1++;
304 |   if (ctx->block_counter1 == 0) ctx->block_counter2++;
305 |   ctx->buf_ptr = SIZE512;
306 | 
307 |   while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
308 | 	ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
309 | 	ctx->block_counter1 >>= 8;
310 |   }
311 |   while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
312 | 	ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
313 | 	ctx->block_counter2 >>= 8;
314 |   }
315 |   /* digest final padding block */
316 |   Transform(ctx, ctx->buffer, SIZE512);
317 |   /* perform output transformation */
318 |   OutputTransformation(ctx);
319 | 
320 |   /* store hash result in output */
321 |   for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
322 | 	output[j] = s[i];
323 |   }
324 | 
325 |   /* zeroise relevant variables and deallocate memory */
326 |   for (i = 0; i < COLS512; i++) {
327 | 	ctx->chaining[i] = 0;
328 |   }
329 |   for (i = 0; i < SIZE512; i++) {
330 | 	ctx->buffer[i] = 0;
331 |   }
332 | }
333 | 
334 | /* hash bit sequence */
335 | void groestl(const BitSequence* data,
336 | 		DataLength databitlen,
337 | 		BitSequence* hashval) {
338 | 
339 |   groestlHashState context;
340 | 
341 |   /* initialise */
342 | 	Init(&context);
343 | 
344 | 
345 |   /* process message */
346 |   Update(&context, data, databitlen);
347 | 
348 |   /* finalise */
349 |   Final(&context, hashval);
350 | }
351 | /*
352 | static int crypto_hash(unsigned char *out,
353 | 		const unsigned char *in,
354 | 		unsigned long long len)
355 | {
356 |   groestl(in, 8*len, out);
357 |   return 0;
358 | }
359 | 
360 | */
361 | 


--------------------------------------------------------------------------------
/src/main/jni/c_groestl.h:
--------------------------------------------------------------------------------
 1 | #ifndef __hash_h
 2 | #define __hash_h
 3 | /*
 4 | #include "crypto_uint8.h"
 5 | #include "crypto_uint32.h"
 6 | #include "crypto_uint64.h"
 7 | #include "crypto_hash.h"
 8 | 
 9 | typedef crypto_uint8 uint8_t;
10 | typedef crypto_uint32 uint32_t;
11 | typedef crypto_uint64 uint64_t;
12 | */
13 | #include <stdint.h>
14 | 
15 | #include "hash.h"
16 | 
17 | /* some sizes (number of bytes) */
18 | #define ROWS 8
19 | #define LENGTHFIELDLEN ROWS
20 | #define COLS512 8
21 | 
22 | #define SIZE512 (ROWS*COLS512)
23 | 
24 | #define ROUNDS512 10
25 | #define HASH_BIT_LEN 256
26 | 
27 | #define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff))
28 | 
29 | 
30 | #define li_32(h) 0x##h##u
31 | #define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
32 | #define u32BIG(a)				\
33 |   ((ROTL32(a,8) & li_32(00FF00FF)) |		\
34 |    (ROTL32(a,24) & li_32(FF00FF00)))
35 | 
36 | 
37 | /* NIST API begin */
38 | typedef struct {
39 |   uint32_t chaining[SIZE512/sizeof(uint32_t)];            /* actual state */
40 |   uint32_t block_counter1,
41 |   block_counter2;         /* message block counter(s) */
42 |   BitSequence buffer[SIZE512];      /* data buffer */
43 |   int buf_ptr;              /* data buffer pointer */
44 |   int bits_in_last_byte;    /* no. of message bits in last byte of
45 |                                data buffer */
46 | } groestlHashState;
47 | 
48 | /*void Init(hashState*);
49 | void Update(hashState*, const BitSequence*, DataLength);
50 | void Final(hashState*, BitSequence*); */
51 | void groestl(const BitSequence*, DataLength, BitSequence*);
52 | /* NIST API end   */
53 | 
54 | /*
55 | int crypto_hash(unsigned char *out,
56 |                 const unsigned char *in,
57 |                 unsigned long long len);
58 | */
59 | 
60 | #endif /* __hash_h */
61 | 


--------------------------------------------------------------------------------
/src/main/jni/c_jh.c:
--------------------------------------------------------------------------------
  1 | /*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
  2 | 
  3 |    --------------------------------
  4 |    Performance
  5 | 
  6 |    Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
  7 |    Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
  8 |    Speed for long message:
  9 |    1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
 10 |    2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
 11 | 
 12 |    --------------------------------
 13 |    Last Modified: January 16, 2011
 14 | */
 15 | 
 16 | #include "c_jh.h"
 17 | 
 18 | #include <stdint.h>
 19 | #include <string.h>
 20 | 
 21 | /*typedef unsigned long long uint64;*/
 22 | typedef uint64_t uint64;
 23 | 
 24 | /*define data alignment for different C compilers*/
 25 | #if defined(__GNUC__)
 26 | 	  #define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
 27 | #else
 28 | 	  #define DATA_ALIGN16(x) __declspec(align(16)) x
 29 | #endif
 30 | 
 31 | 
 32 | typedef struct {
 33 | 	int hashbitlen;	   	              /*the message digest size*/
 34 | 	unsigned long long databitlen;    /*the message size in bits*/
 35 | 	unsigned long long datasize_in_buffer;      /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
 36 | 	DATA_ALIGN16(uint64 x[8][2]);     /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
 37 | 	unsigned char buffer[64];         /*the 512-bit message block to be hashed;*/
 38 | } hashState;
 39 | 
 40 | 
 41 | /*The initial hash value H(0)*/
 42 | const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
 43 | const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
 44 | const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
 45 | const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
 46 | 
 47 | /*42 round constants, each round constant is 32-byte (256-bit)*/
 48 | const unsigned char E8_bitslice_roundconstant[42][32]={
 49 | {0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
 50 | {0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
 51 | {0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
 52 | {0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
 53 | {0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
 54 | {0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
 55 | {0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
 56 | {0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
 57 | {0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
 58 | {0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
 59 | {0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
 60 | {0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
 61 | {0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
 62 | {0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
 63 | {0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
 64 | {0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
 65 | {0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
 66 | {0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
 67 | {0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
 68 | {0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
 69 | {0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
 70 | {0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
 71 | {0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
 72 | {0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
 73 | {0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
 74 | {0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
 75 | {0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
 76 | {0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
 77 | {0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
 78 | {0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
 79 | {0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
 80 | {0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
 81 | {0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
 82 | {0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
 83 | {0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
 84 | {0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
 85 | {0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
 86 | {0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
 87 | {0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
 88 | {0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
 89 | {0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
 90 | {0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
 91 | 
 92 | 
 93 | static void E8(hashState *state);  /*The bijective function E8, in bitslice form*/
 94 | static void F8(hashState *state);  /*The compression function F8 */
 95 | 
 96 | /*The API functions*/
 97 | static HashReturn Init(hashState *state, int hashbitlen);
 98 | static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
 99 | static HashReturn Final(hashState *state, BitSequence *hashval);
100 | HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
101 | 
102 | /*swapping bit 2i with bit 2i+1 of 64-bit x*/
103 | #define SWAP1(x)   (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
104 | /*swapping bits 4i||4i+1 with bits 4i+2||4i+3 of 64-bit x*/
105 | #define SWAP2(x)   (x) = ((((x) & 0x3333333333333333ULL) << 2) | (((x) & 0xccccccccccccccccULL) >> 2));
106 | /*swapping bits 8i||8i+1||8i+2||8i+3 with bits 8i+4||8i+5||8i+6||8i+7 of 64-bit x*/
107 | #define SWAP4(x)   (x) = ((((x) & 0x0f0f0f0f0f0f0f0fULL) << 4) | (((x) & 0xf0f0f0f0f0f0f0f0ULL) >> 4));
108 | /*swapping bits 16i||16i+1||......||16i+7  with bits 16i+8||16i+9||......||16i+15 of 64-bit x*/
109 | #define SWAP8(x)   (x) = ((((x) & 0x00ff00ff00ff00ffULL) << 8) | (((x) & 0xff00ff00ff00ff00ULL) >> 8));
110 | /*swapping bits 32i||32i+1||......||32i+15 with bits 32i+16||32i+17||......||32i+31 of 64-bit x*/
111 | #define SWAP16(x)  (x) = ((((x) & 0x0000ffff0000ffffULL) << 16) | (((x) & 0xffff0000ffff0000ULL) >> 16));
112 | /*swapping bits 64i||64i+1||......||64i+31 with bits 64i+32||64i+33||......||64i+63 of 64-bit x*/
113 | #define SWAP32(x)  (x) = (((x) << 32) | ((x) >> 32));
114 | 
115 | /*The MDS transform*/
116 | #define L(m0,m1,m2,m3,m4,m5,m6,m7) \
117 | 	  (m4) ^= (m1);                \
118 | 	  (m5) ^= (m2);                \
119 | 	  (m6) ^= (m0) ^ (m3);         \
120 | 	  (m7) ^= (m0);                \
121 | 	  (m0) ^= (m5);                \
122 | 	  (m1) ^= (m6);                \
123 | 	  (m2) ^= (m4) ^ (m7);         \
124 | 	  (m3) ^= (m4);
125 | 
126 | /*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
127 | /*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
128 | #define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1)   \
129 | 	  m3  = ~(m3);                  \
130 | 	  m7  = ~(m7);                  \
131 | 	  m0 ^= ((~(m2)) & (cc0));      \
132 | 	  m4 ^= ((~(m6)) & (cc1));      \
133 | 	  temp0 = (cc0) ^ ((m0) & (m1));\
134 | 	  temp1 = (cc1) ^ ((m4) & (m5));\
135 | 	  m0 ^= ((m2) & (m3));          \
136 | 	  m4 ^= ((m6) & (m7));          \
137 | 	  m3 ^= ((~(m1)) & (m2));       \
138 | 	  m7 ^= ((~(m5)) & (m6));       \
139 | 	  m1 ^= ((m0) & (m2));          \
140 | 	  m5 ^= ((m4) & (m6));          \
141 | 	  m2 ^= ((m0) & (~(m3)));       \
142 | 	  m6 ^= ((m4) & (~(m7)));       \
143 | 	  m0 ^= ((m1) | (m3));          \
144 | 	  m4 ^= ((m5) | (m7));          \
145 | 	  m3 ^= ((m1) & (m2));          \
146 | 	  m7 ^= ((m5) & (m6));          \
147 | 	  m1 ^= (temp0 & (m0));         \
148 | 	  m5 ^= (temp1 & (m4));         \
149 | 	  m2 ^= temp0;                  \
150 | 	  m6 ^= temp1;
151 | 
152 | /*The bijective function E8, in bitslice form*/
153 | static void E8(hashState *state)
154 | {
155 | 	  uint64 i,roundnumber,temp0,temp1;
156 | 
157 | 	  for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
158 | 			/*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
159 | 			for (i = 0; i < 2; i++) {
160 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
161 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
162 | 				  SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
163 | 			}
164 | 
165 | 			/*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
166 | 			for (i = 0; i < 2; i++) {
167 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
168 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
169 | 				  SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
170 | 			}
171 | 
172 | 			/*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
173 | 			for (i = 0; i < 2; i++) {
174 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
175 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
176 | 				  SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
177 | 			}
178 | 
179 | 			/*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
180 | 			for (i = 0; i < 2; i++) {
181 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
182 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
183 | 				  SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
184 | 			}
185 | 
186 | 			/*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
187 | 			for (i = 0; i < 2; i++) {
188 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
189 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
190 | 				  SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
191 | 			}
192 | 
193 | 			/*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
194 | 			for (i = 0; i < 2; i++) {
195 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
196 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
197 | 				  SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
198 | 			}
199 | 
200 | 			/*round 7*roundnumber+6: Sbox and MDS layers*/
201 | 			for (i = 0; i < 2; i++) {
202 | 				  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
203 | 				  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
204 | 			}
205 | 			/*round 7*roundnumber+6: swapping layer*/
206 | 			for (i = 1; i < 8; i = i+2) {
207 | 				  temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
208 | 			}
209 | 	  }
210 | 
211 | }
212 | 
213 | /*The compression function F8 */
214 | static void F8(hashState *state)
215 | {
216 | 	  uint64  i;
217 | 
218 | 	  /*xor the 512-bit message with the fist half of the 1024-bit hash state*/
219 | 	  for (i = 0; i < 8; i++)  state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
220 | 
221 | 	  /*the bijective function E8 */
222 | 	  E8(state);
223 | 
224 | 	  /*xor the 512-bit message with the second half of the 1024-bit hash state*/
225 | 	  for (i = 0; i < 8; i++)  state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
226 | }
227 | 
228 | /*before hashing a message, initialize the hash state as H0 */
229 | static HashReturn Init(hashState *state, int hashbitlen)
230 | {
231 | 	  state->databitlen = 0;
232 | 	  state->datasize_in_buffer = 0;
233 | 
234 | 	  /*initialize the initial hash value of JH*/
235 | 	  state->hashbitlen = hashbitlen;
236 | 
237 | 	  /*load the initial hash value into state*/
238 | 	  switch (hashbitlen)
239 | 	  {
240 | 			case 224: memcpy(state->x,JH224_H0,128); break;
241 | 			case 256: memcpy(state->x,JH256_H0,128); break;
242 | 			case 384: memcpy(state->x,JH384_H0,128); break;
243 | 			case 512: memcpy(state->x,JH512_H0,128); break;
244 | 	  }
245 | 
246 | 	  return(SUCCESS);
247 | }
248 | 
249 | 
250 | /*hash each 512-bit message block, except the last partial block*/
251 | static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
252 | {
253 | 	  DataLength index; /*the starting address of the data to be compressed*/
254 | 
255 | 	  state->databitlen += databitlen;
256 | 	  index = 0;
257 | 
258 | 	  /*if there is remaining data in the buffer, fill it to a full message block first*/
259 | 	  /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
260 | 
261 | 	  /*There is data in the buffer, but the incoming data is insufficient for a full block*/
262 | 	  if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  ) {
263 | 			if ( (databitlen & 7) == 0 ) {
264 | 				 memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ;
265 | 		    }
266 | 			else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ;
267 | 			state->datasize_in_buffer += databitlen;
268 | 			databitlen = 0;
269 | 	  }
270 | 
271 | 	  /*There is data in the buffer, and the incoming data is sufficient for a full block*/
272 | 	  if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512)  ) {
273 | 	        memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ;
274 | 	        index = 64-(state->datasize_in_buffer >> 3);
275 | 	        databitlen = databitlen - (512 - state->datasize_in_buffer);
276 | 	        F8(state);
277 | 	        state->datasize_in_buffer = 0;
278 | 	  }
279 | 
280 | 	  /*hash the remaining full message blocks*/
281 | 	  for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
282 | 			memcpy(state->buffer, data+index, 64);
283 | 			F8(state);
284 | 	  }
285 | 
286 | 	  /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
287 | 	  if ( databitlen > 0) {
288 | 			if ((databitlen & 7) == 0)
289 | 				  memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
290 | 			else
291 | 				  memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
292 | 			state->datasize_in_buffer = databitlen;
293 | 	  }
294 | 
295 | 	  return(SUCCESS);
296 | }
297 | 
298 | /*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
299 | static HashReturn Final(hashState *state, BitSequence *hashval)
300 | {
301 | 	  unsigned int i;
302 | 
303 | 	  if ( (state->databitlen & 0x1ff) == 0 ) {
304 | 			/*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
305 | 			memset(state->buffer, 0, 64);
306 | 			state->buffer[0]  = 0x80;
307 | 			state->buffer[63] = state->databitlen & 0xff;
308 | 			state->buffer[62] = (state->databitlen >> 8)  & 0xff;
309 | 			state->buffer[61] = (state->databitlen >> 16) & 0xff;
310 | 			state->buffer[60] = (state->databitlen >> 24) & 0xff;
311 | 			state->buffer[59] = (state->databitlen >> 32) & 0xff;
312 | 			state->buffer[58] = (state->databitlen >> 40) & 0xff;
313 | 			state->buffer[57] = (state->databitlen >> 48) & 0xff;
314 | 			state->buffer[56] = (state->databitlen >> 56) & 0xff;
315 | 			F8(state);
316 | 	  }
317 | 	  else {
318 | 		    /*set the rest of the bytes in the buffer to 0*/
319 | 			if ( (state->datasize_in_buffer & 7) == 0)
320 | 				  for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++)  state->buffer[i] = 0;
321 | 			else
322 | 				  for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++)  state->buffer[i] = 0;
323 | 
324 | 			/*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
325 | 			state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
326 | 
327 | 			F8(state);
328 | 			memset(state->buffer, 0, 64);
329 | 			state->buffer[63] = state->databitlen & 0xff;
330 | 			state->buffer[62] = (state->databitlen >> 8) & 0xff;
331 | 			state->buffer[61] = (state->databitlen >> 16) & 0xff;
332 | 			state->buffer[60] = (state->databitlen >> 24) & 0xff;
333 | 			state->buffer[59] = (state->databitlen >> 32) & 0xff;
334 | 			state->buffer[58] = (state->databitlen >> 40) & 0xff;
335 | 			state->buffer[57] = (state->databitlen >> 48) & 0xff;
336 | 			state->buffer[56] = (state->databitlen >> 56) & 0xff;
337 | 			F8(state);
338 | 	  }
339 | 
340 | 	  /*truncating the final hash value to generate the message digest*/
341 | 	  switch(state->hashbitlen) {
342 | 			case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28);  break;
343 | 			case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32);  break;
344 | 			case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48);  break;
345 | 			case 512: memcpy(hashval,(unsigned char*)state->x+64,64);     break;
346 | 	  }
347 | 
348 | 	  return(SUCCESS);
349 | }
350 | 
351 | /* hash a message,
352 |    three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
353 |    one output:   message digest (hashval)
354 | */
355 | HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
356 | {
357 | 	  hashState state;
358 | 
359 | 	  if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
360 | 			Init(&state, hashbitlen);
361 | 			Update(&state, data, databitlen);
362 | 			Final(&state, hashval);
363 | 			return SUCCESS;
364 | 	  }
365 | 	  else
366 | 			return(BAD_HASHLEN);
367 | }
368 | 


--------------------------------------------------------------------------------
/src/main/jni/c_jh.h:
--------------------------------------------------------------------------------
 1 | /*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C
 2 | 
 3 |    --------------------------------
 4 |    Performance
 5 | 
 6 |    Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz)
 7 |    Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic)
 8 |    Speed for long message:
 9 |    1) 45.8 cycles/byte   compiler: Intel C++ Compiler 11.1   compilation option: icc -O2
10 |    2) 56.8 cycles/byte   compiler: gcc 4.4.3                 compilation option: gcc -O3
11 | 
12 |    --------------------------------
13 |    Last Modified: January 16, 2011
14 | */
15 | #pragma once
16 | 
17 | #include "hash.h"
18 | 
19 | HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
20 | 


--------------------------------------------------------------------------------
/src/main/jni/c_keccak.c:
--------------------------------------------------------------------------------
  1 | // keccak.c
  2 | // 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
  3 | // A baseline Keccak (3rd round) implementation.
  4 | 
  5 | #include <stdint.h>
  6 | #include <memory.h>
  7 | 
  8 | #define HASH_DATA_AREA 136
  9 | #define KECCAK_ROUNDS 24
 10 | 
 11 | #ifndef ROTL64
 12 | #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 13 | #endif
 14 | 
 15 | const uint64_t keccakf_rndc[24] =
 16 | {
 17 | 	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
 18 | 	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
 19 | 	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
 20 | 	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
 21 | 	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
 22 | 	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
 23 | 	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
 24 | 	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 25 | };
 26 | 
 27 | // update the state with given number of rounds
 28 | 
 29 | void keccakf(uint64_t st[25], int rounds)
 30 | {
 31 | 	int i, j, round;
 32 | 	uint64_t t, bc[5];
 33 | 
 34 | 	for (round = 0; round < rounds; ++round) {
 35 | 
 36 | 		// Theta
 37 | 		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
 38 | 		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
 39 | 		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
 40 | 		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
 41 | 		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
 42 | 
 43 | 		for (i = 0; i < 5; ++i) {
 44 | 			t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
 45 | 			st[i] ^= t;
 46 | 			st[i + 5] ^= t;
 47 | 			st[i + 10] ^= t;
 48 | 			st[i + 15] ^= t;
 49 | 			st[i + 20] ^= t;
 50 | 		}
 51 | 
 52 | 		// Rho Pi
 53 | 		t = st[1];
 54 | 		st[1] = ROTL64(st[6], 44);
 55 | 		st[6] = ROTL64(st[9], 20);
 56 | 		st[9] = ROTL64(st[22], 61);
 57 | 		st[22] = ROTL64(st[14], 39);
 58 | 		st[14] = ROTL64(st[20], 18);
 59 | 		st[20] = ROTL64(st[2], 62);
 60 | 		st[2] = ROTL64(st[12], 43);
 61 | 		st[12] = ROTL64(st[13], 25);
 62 | 		st[13] = ROTL64(st[19], 8);
 63 | 		st[19] = ROTL64(st[23], 56);
 64 | 		st[23] = ROTL64(st[15], 41);
 65 | 		st[15] = ROTL64(st[4], 27);
 66 | 		st[4] = ROTL64(st[24], 14);
 67 | 		st[24] = ROTL64(st[21], 2);
 68 | 		st[21] = ROTL64(st[8], 55);
 69 | 		st[8] = ROTL64(st[16], 45);
 70 | 		st[16] = ROTL64(st[5], 36);
 71 | 		st[5] = ROTL64(st[3], 28);
 72 | 		st[3] = ROTL64(st[18], 21);
 73 | 		st[18] = ROTL64(st[17], 15);
 74 | 		st[17] = ROTL64(st[11], 10);
 75 | 		st[11] = ROTL64(st[7], 6);
 76 | 		st[7] = ROTL64(st[10], 3);
 77 | 		st[10] = ROTL64(t, 1);
 78 | 
 79 | 		//  Chi
 80 | 		// unrolled loop, where only last iteration is different
 81 | 		j = 0;
 82 | 		bc[0] = st[j];
 83 | 		bc[1] = st[j + 1];
 84 | 
 85 | 		st[j] ^= (~st[j + 1]) & st[j + 2];
 86 | 		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
 87 | 		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
 88 | 		st[j + 3] ^= (~st[j + 4]) & bc[0];
 89 | 		st[j + 4] ^= (~bc[0]) & bc[1];
 90 | 
 91 | 		j = 5;
 92 | 		bc[0] = st[j];
 93 | 		bc[1] = st[j + 1];
 94 | 
 95 | 		st[j] ^= (~st[j + 1]) & st[j + 2];
 96 | 		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
 97 | 		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
 98 | 		st[j + 3] ^= (~st[j + 4]) & bc[0];
 99 | 		st[j + 4] ^= (~bc[0]) & bc[1];
100 | 
101 | 		j = 10;
102 | 		bc[0] = st[j];
103 | 		bc[1] = st[j + 1];
104 | 
105 | 		st[j] ^= (~st[j + 1]) & st[j + 2];
106 | 		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
107 | 		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
108 | 		st[j + 3] ^= (~st[j + 4]) & bc[0];
109 | 		st[j + 4] ^= (~bc[0]) & bc[1];
110 | 
111 | 		j = 15;
112 | 		bc[0] = st[j];
113 | 		bc[1] = st[j + 1];
114 | 
115 | 		st[j] ^= (~st[j + 1]) & st[j + 2];
116 | 		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
117 | 		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
118 | 		st[j + 3] ^= (~st[j + 4]) & bc[0];
119 | 		st[j + 4] ^= (~bc[0]) & bc[1];
120 | 
121 | 		j = 20;
122 | 		bc[0] = st[j];
123 | 		bc[1] = st[j + 1];
124 | 		bc[2] = st[j + 2];
125 | 		bc[3] = st[j + 3];
126 | 		bc[4] = st[j + 4];
127 | 
128 | 		st[j] ^= (~bc[1]) & bc[2];
129 | 		st[j + 1] ^= (~bc[2]) & bc[3];
130 | 		st[j + 2] ^= (~bc[3]) & bc[4];
131 | 		st[j + 3] ^= (~bc[4]) & bc[0];
132 | 		st[j + 4] ^= (~bc[0]) & bc[1];
133 | 
134 | 		//  Iota
135 | 		st[0] ^= keccakf_rndc[round];
136 | 	}
137 | }
138 | 
139 | // compute a keccak hash (md) of given byte length from "in"
140 | typedef uint64_t state_t[25];
141 | 
142 | void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
143 | {
144 | 	state_t st;
145 | 	uint8_t temp[144];
146 | 	int i, rsiz, rsizw;
147 | 
148 | 	rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
149 | 	rsizw = rsiz / 8;
150 | 
151 | 	memset(st, 0, sizeof(st));
152 | 
153 | 	for (; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
154 | 		for (i = 0; i < rsizw; i++)
155 | 			st[i] ^= ((uint64_t *)in)[i];
156 | 		keccakf(st, KECCAK_ROUNDS);
157 | 	}
158 | 
159 | 	// last block and padding
160 | 	memcpy(temp, in, inlen);
161 | 	temp[inlen++] = 1;
162 | 	memset(temp + inlen, 0, rsiz - inlen);
163 | 	temp[rsiz - 1] |= 0x80;
164 | 
165 | 	for (i = 0; i < rsizw; i++)
166 | 		st[i] ^= ((uint64_t *)temp)[i];
167 | 
168 | 	keccakf(st, KECCAK_ROUNDS);
169 | 
170 | 	memcpy(md, st, mdlen);
171 | }
172 | 
173 | void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
174 | {
175 | 	keccak(in, inlen, md, sizeof(state_t));
176 | }
177 | 


--------------------------------------------------------------------------------
/src/main/jni/c_keccak.h:
--------------------------------------------------------------------------------
 1 | // keccak.h
 2 | // 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
 3 | 
 4 | #ifndef KECCAK_H
 5 | #define KECCAK_H
 6 | 
 7 | #include <stdint.h>
 8 | #include <string.h>
 9 | 
10 | #ifndef KECCAK_ROUNDS
11 | #define KECCAK_ROUNDS 24
12 | #endif
13 | 
14 | #ifndef ROTL64
15 | #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
16 | #endif
17 | 
18 | // compute a keccak hash (md) of given byte length from "in"
19 | int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
20 | 
21 | // update the state
22 | void keccakf(uint64_t st[25], int norounds);
23 | 
24 | void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/main/jni/c_skein.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SKEIN_H_
 2 | #define _SKEIN_H_     1
 3 | /**************************************************************************
 4 | **
 5 | ** Interface declarations and internal definitions for Skein hashing.
 6 | **
 7 | ** Source code author: Doug Whiting, 2008.
 8 | **
 9 | ** This algorithm and source code is released to the public domain.
10 | **
11 | ***************************************************************************
12 | **
13 | ** The following compile-time switches may be defined to control some
14 | ** tradeoffs between speed, code size, error checking, and security.
15 | **
16 | ** The "default" note explains what happens when the switch is not defined.
17 | **
18 | **  SKEIN_DEBUG            -- make callouts from inside Skein code
19 | **                            to examine/display intermediate values.
20 | **                            [default: no callouts (no overhead)]
21 | **
22 | **  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
23 | **                            code. If not defined, most error checking
24 | **                            is disabled (for performance). Otherwise,
25 | **                            the switch value is interpreted as:
26 | **                                0: use assert()      to flag errors
27 | **                                1: return SKEIN_FAIL to flag errors
28 | **
29 | ***************************************************************************/
30 | #include "skein_port.h"                      /* get platform-specific definitions */
31 | 
32 | typedef enum
33 | {
34 |   SKEIN_SUCCESS         =      0,          /* return codes from Skein calls */
35 |   SKEIN_FAIL            =      1,
36 |   SKEIN_BAD_HASHLEN     =      2
37 | }
38 | SkeinHashReturn;
39 | 
40 | typedef uint32_t SkeinDataLength;                /* bit count  type */
41 | typedef u08b_t   SkeinBitSequence;               /* bit stream type */
42 | 
43 | /* "all-in-one" call */
44 | SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence *data,
45 |                            SkeinDataLength databitlen, SkeinBitSequence *hashval);
46 | 
47 | #endif  /* ifndef _SKEIN_H_ */
48 | 


--------------------------------------------------------------------------------
/src/main/jni/cryptonight.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CRYPTONIGHT_H_INCLUDED
 2 | #define __CRYPTONIGHT_H_INCLUDED
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <stddef.h>
 9 | #include <inttypes.h>
10 | 
11 | typedef struct {
12 | 	uint8_t hash_state[224]; // Need only 200, explicit align
13 | 	uint8_t* long_state;
14 | 	uint8_t ctx_info[24]; //Use some of the extra memory for flags
15 | } cryptonight_ctx;
16 | 
17 | typedef struct {
18 | 	const char* warning;
19 | } alloc_msg;
20 | 
21 | size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg);
22 | cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg);
23 | void cryptonight_free_ctx(cryptonight_ctx* ctx);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/src/main/jni/cryptonight_aesni.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   * This program is free software: you can redistribute it and/or modify
  3 |   * it under the terms of the GNU General Public License as published by
  4 |   * the Free Software Foundation, either version 3 of the License, or
  5 |   * any later version.
  6 |   *
  7 |   * This program is distributed in the hope that it will be useful,
  8 |   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10 |   * GNU General Public License for more details.
 11 |   *
 12 |   * You should have received a copy of the GNU General Public License
 13 |   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |   *
 15 |   */
 16 | #pragma once
 17 | 
 18 | #include "cryptonight.h"
 19 | #include "algos.hpp"
 20 | #include <memory.h>
 21 | #include <stdio.h>
 22 | #include <cfenv>
 23 | #include <utility>
 24 | 
 25 | #ifdef __GNUC__
 26 | #include <x86intrin.h>
 27 | static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
 28 | {
 29 | 	unsigned __int128 r = (unsigned __int128)a * (unsigned __int128)b;
 30 | 	*hi = r >> 64;
 31 | 	return (uint64_t)r;
 32 | }
 33 | 
 34 | #else
 35 | #include <intrin.h>
 36 | #endif // __GNUC__
 37 | 
 38 | #if !defined(_LP64) && !defined(_WIN64)
 39 | #error You are trying to do a 32-bit build. This will all end in tears. I know it.
 40 | #endif
 41 | 
 42 | #include "soft_aes.hpp"
 43 | 
 44 | extern "C"
 45 | {
 46 | 	void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
 47 | 	void keccakf(uint64_t st[25], int rounds);
 48 | 	extern void(*const extra_hashes[4])(const void *, uint32_t, char *);
 49 | }
 50 | 
 51 | // This will shift and xor tmp1 into itself as 4 32-bit vals such as
 52 | // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 53 | static inline __m128i sl_xor(__m128i tmp1)
 54 | {
 55 | 	__m128i tmp4;
 56 | 	tmp4 = _mm_slli_si128(tmp1, 0x04);
 57 | 	tmp1 = _mm_xor_si128(tmp1, tmp4);
 58 | 	tmp4 = _mm_slli_si128(tmp4, 0x04);
 59 | 	tmp1 = _mm_xor_si128(tmp1, tmp4);
 60 | 	tmp4 = _mm_slli_si128(tmp4, 0x04);
 61 | 	tmp1 = _mm_xor_si128(tmp1, tmp4);
 62 | 	return tmp1;
 63 | }
 64 | 
 65 | template<uint8_t rcon>
 66 | static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
 67 | {
 68 | 	__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
 69 | 	xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
 70 | 	*xout0 = sl_xor(*xout0);
 71 | 	*xout0 = _mm_xor_si128(*xout0, xout1);
 72 | 	xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
 73 | 	xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
 74 | 	*xout2 = sl_xor(*xout2);
 75 | 	*xout2 = _mm_xor_si128(*xout2, xout1);
 76 | }
 77 | 
 78 | static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2, uint8_t rcon)
 79 | {
 80 | 	__m128i xout1 = soft_aeskeygenassist(*xout2, rcon);
 81 | 	xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
 82 | 	*xout0 = sl_xor(*xout0);
 83 | 	*xout0 = _mm_xor_si128(*xout0, xout1);
 84 | 	xout1 = soft_aeskeygenassist(*xout0, 0x00);
 85 | 	xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
 86 | 	*xout2 = sl_xor(*xout2);
 87 | 	*xout2 = _mm_xor_si128(*xout2, xout1);
 88 | }
 89 | 
 90 | template<bool SOFT_AES>
 91 | static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3,
 92 | 	__m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
 93 | {
 94 | 	__m128i xout0, xout2;
 95 | 
 96 | 	xout0 = _mm_load_si128(memory);
 97 | 	xout2 = _mm_load_si128(memory + 1);
 98 | 	*k0 = xout0;
 99 | 	*k1 = xout2;
100 | 
101 | 	if (SOFT_AES)
102 | 		soft_aes_genkey_sub(&xout0, &xout2, 0x01);
103 | 	else
104 | 		aes_genkey_sub<0x01>(&xout0, &xout2);
105 | 	*k2 = xout0;
106 | 	*k3 = xout2;
107 | 
108 | 	if (SOFT_AES)
109 | 		soft_aes_genkey_sub(&xout0, &xout2, 0x02);
110 | 	else
111 | 		aes_genkey_sub<0x02>(&xout0, &xout2);
112 | 	*k4 = xout0;
113 | 	*k5 = xout2;
114 | 
115 | 	if (SOFT_AES)
116 | 		soft_aes_genkey_sub(&xout0, &xout2, 0x04);
117 | 	else
118 | 		aes_genkey_sub<0x04>(&xout0, &xout2);
119 | 	*k6 = xout0;
120 | 	*k7 = xout2;
121 | 
122 | 	if (SOFT_AES)
123 | 		soft_aes_genkey_sub(&xout0, &xout2, 0x08);
124 | 	else
125 | 		aes_genkey_sub<0x08>(&xout0, &xout2);
126 | 	*k8 = xout0;
127 | 	*k9 = xout2;
128 | }
129 | 
130 | static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
131 | {
132 | 	*x0 = _mm_aesenc_si128(*x0, key);
133 | 	*x1 = _mm_aesenc_si128(*x1, key);
134 | 	*x2 = _mm_aesenc_si128(*x2, key);
135 | 	*x3 = _mm_aesenc_si128(*x3, key);
136 | 	*x4 = _mm_aesenc_si128(*x4, key);
137 | 	*x5 = _mm_aesenc_si128(*x5, key);
138 | 	*x6 = _mm_aesenc_si128(*x6, key);
139 | 	*x7 = _mm_aesenc_si128(*x7, key);
140 | }
141 | 
142 | static inline void soft_aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
143 | {
144 | 	*x0 = soft_aesenc(*x0, key);
145 | 	*x1 = soft_aesenc(*x1, key);
146 | 	*x2 = soft_aesenc(*x2, key);
147 | 	*x3 = soft_aesenc(*x3, key);
148 | 	*x4 = soft_aesenc(*x4, key);
149 | 	*x5 = soft_aesenc(*x5, key);
150 | 	*x6 = soft_aesenc(*x6, key);
151 | 	*x7 = soft_aesenc(*x7, key);
152 | }
153 | 
154 | inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7)
155 | {
156 | 	__m128i tmp0 = x0;
157 | 	x0 = _mm_xor_si128(x0, x1);
158 | 	x1 = _mm_xor_si128(x1, x2);
159 | 	x2 = _mm_xor_si128(x2, x3);
160 | 	x3 = _mm_xor_si128(x3, x4);
161 | 	x4 = _mm_xor_si128(x4, x5);
162 | 	x5 = _mm_xor_si128(x5, x6);
163 | 	x6 = _mm_xor_si128(x6, x7);
164 | 	x7 = _mm_xor_si128(x7, tmp0);
165 | }
166 | 
167 | template<size_t MEM, bool SOFT_AES, bool PREFETCH, algo ALGO>
168 | void cn_explode_scratchpad(const __m128i* input, __m128i* output)
169 | {
170 | 	// This is more than we have registers, compiler will assign 2 keys on the stack
171 | 	__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
172 | 	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
173 | 
174 | 	aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
175 | 
176 | 	xin0 = _mm_load_si128(input + 4);
177 | 	xin1 = _mm_load_si128(input + 5);
178 | 	xin2 = _mm_load_si128(input + 6);
179 | 	xin3 = _mm_load_si128(input + 7);
180 | 	xin4 = _mm_load_si128(input + 8);
181 | 	xin5 = _mm_load_si128(input + 9);
182 | 	xin6 = _mm_load_si128(input + 10);
183 | 	xin7 = _mm_load_si128(input + 11);
184 | 
185 | 	if (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast)
186 | 	{
187 | 		for (size_t i = 0; i < 16; i++)
188 | 		{
189 | 			if (SOFT_AES)
190 | 			{
191 | 				soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
192 | 				soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
193 | 				soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
194 | 				soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
195 | 				soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
196 | 				soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
197 | 				soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
198 | 				soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
199 | 				soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
200 | 				soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
201 | 			}
202 | 			else
203 | 			{
204 | 				aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
205 | 				aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
206 | 				aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
207 | 				aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
208 | 				aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
209 | 				aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
210 | 				aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
211 | 				aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
212 | 				aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
213 | 				aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
214 | 			}
215 | 			mix_and_propagate(xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
216 | 		}
217 | 	}
218 | 
219 | 	for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
220 | 	{
221 | 		if (SOFT_AES)
222 | 		{
223 | 			soft_aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
224 | 			soft_aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
225 | 			soft_aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
226 | 			soft_aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
227 | 			soft_aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
228 | 			soft_aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
229 | 			soft_aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
230 | 			soft_aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
231 | 			soft_aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
232 | 			soft_aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
233 | 		}
234 | 		else
235 | 		{
236 | 			aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
237 | 			aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
238 | 			aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
239 | 			aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
240 | 			aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
241 | 			aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
242 | 			aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
243 | 			aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
244 | 			aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
245 | 			aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
246 | 		}
247 | 
248 | 		_mm_store_si128(output + i + 0, xin0);
249 | 		_mm_store_si128(output + i + 1, xin1);
250 | 		_mm_store_si128(output + i + 2, xin2);
251 | 		_mm_store_si128(output + i + 3, xin3);
252 | 
253 | 		if (PREFETCH)
254 | 			_mm_prefetch((const char*)output + i + 0, _MM_HINT_T2);
255 | 
256 | 		_mm_store_si128(output + i + 4, xin4);
257 | 		_mm_store_si128(output + i + 5, xin5);
258 | 		_mm_store_si128(output + i + 6, xin6);
259 | 		_mm_store_si128(output + i + 7, xin7);
260 | 
261 | 		if (PREFETCH)
262 | 			_mm_prefetch((const char*)output + i + 4, _MM_HINT_T2);
263 | 	}
264 | }
265 | 
266 | template<size_t MEM, bool SOFT_AES, bool PREFETCH, algo ALGO>
267 | void cn_implode_scratchpad(const __m128i* input, __m128i* output)
268 | {
269 | 	// This is more than we have registers, compiler will assign 2 keys on the stack
270 | 	__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
271 | 	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
272 | 
273 | 	aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
274 | 
275 | 	xout0 = _mm_load_si128(output + 4);
276 | 	xout1 = _mm_load_si128(output + 5);
277 | 	xout2 = _mm_load_si128(output + 6);
278 | 	xout3 = _mm_load_si128(output + 7);
279 | 	xout4 = _mm_load_si128(output + 8);
280 | 	xout5 = _mm_load_si128(output + 9);
281 | 	xout6 = _mm_load_si128(output + 10);
282 | 	xout7 = _mm_load_si128(output + 11);
283 | 
284 | 	for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
285 | 	{
286 | 		if (PREFETCH)
287 | 			_mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA);
288 | 
289 | 		xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
290 | 		xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
291 | 		xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
292 | 		xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
293 | 
294 | 		if (PREFETCH)
295 | 			_mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA);
296 | 
297 | 		xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
298 | 		xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
299 | 		xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
300 | 		xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
301 | 
302 | 		if (SOFT_AES)
303 | 		{
304 | 			soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
305 | 			soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
306 | 			soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
307 | 			soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
308 | 			soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
309 | 			soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
310 | 			soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
311 | 			soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
312 | 			soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
313 | 			soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
314 | 		}
315 | 		else
316 | 		{
317 | 			aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
318 | 			aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
319 | 			aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
320 | 			aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
321 | 			aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
322 | 			aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
323 | 			aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
324 | 			aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
325 | 			aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
326 | 			aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
327 | 		}
328 | 
329 | 		if (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast)
330 | 			mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
331 | 	}
332 | 
333 | 	if (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast)
334 | 	{
335 | 		for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
336 | 		{
337 | 			if (PREFETCH)
338 | 				_mm_prefetch((const char*)input + i + 0, _MM_HINT_NTA);
339 | 
340 | 			xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
341 | 			xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
342 | 			xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
343 | 			xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
344 | 
345 | 			if (PREFETCH)
346 | 				_mm_prefetch((const char*)input + i + 4, _MM_HINT_NTA);
347 | 
348 | 			xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
349 | 			xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
350 | 			xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
351 | 			xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
352 | 
353 | 			if (SOFT_AES)
354 | 			{
355 | 				soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
356 | 				soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
357 | 				soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
358 | 				soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
359 | 				soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
360 | 				soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
361 | 				soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
362 | 				soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
363 | 				soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
364 | 				soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
365 | 			}
366 | 			else
367 | 			{
368 | 				aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
369 | 				aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
370 | 				aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
371 | 				aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
372 | 				aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
373 | 				aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
374 | 				aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
375 | 				aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
376 | 				aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
377 | 				aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
378 | 			}
379 | 
380 | 			if (ALGO == cryptonight_heavy || ALGO == cryptonight_haven || ALGO == cryptonight_bittube2 || ALGO == cryptonight_superfast)
381 | 				mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
382 | 		}
383 | 
384 | 		for (size_t i = 0; i < 16; i++)
385 | 		{
386 | 			if (SOFT_AES)
387 | 			{
388 | 				soft_aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
389 | 				soft_aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
390 | 				soft_aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
391 | 				soft_aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
392 | 				soft_aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
393 | 				soft_aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
394 | 				soft_aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
395 | 				soft_aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
396 | 				soft_aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
397 | 				soft_aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
398 | 			}
399 | 			else
400 | 			{
401 | 				aes_round(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
402 | 				aes_round(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
403 | 				aes_round(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
404 | 				aes_round(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
405 | 				aes_round(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
406 | 				aes_round(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
407 | 				aes_round(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
408 | 				aes_round(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
409 | 				aes_round(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
410 | 				aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
411 | 			}
412 | 
413 | 			mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
414 | 		}
415 | 	}
416 | 
417 | 	_mm_store_si128(output + 4, xout0);
418 | 	_mm_store_si128(output + 5, xout1);
419 | 	_mm_store_si128(output + 6, xout2);
420 | 	_mm_store_si128(output + 7, xout3);
421 | 	_mm_store_si128(output + 8, xout4);
422 | 	_mm_store_si128(output + 9, xout5);
423 | 	_mm_store_si128(output + 10, xout6);
424 | 	_mm_store_si128(output + 11, xout7);
425 | }
426 | 
427 | inline uint64_t int_sqrt33_1_double_precision(const uint64_t n0)
428 | {
429 | 	__m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
430 | 	x = _mm_sqrt_sd(_mm_setzero_pd(), x);
431 | 	uint64_t r = static_cast<uint64_t>(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
432 | 
433 | 	const uint64_t s = r >> 20;
434 | 	r >>= 19;
435 | 
436 | 	uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
437 | 
438 | #ifdef __INTEL_COMPILER
439 | 	_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned __int64*)&x2), r, 0, (unsigned __int64*)&r);
440 | #elif defined(_MSC_VER) || (__GNUC__ >= 7)
441 | 	_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
442 | #else
443 | 	// GCC versions prior to 7 don't generate correct assembly for _subborrow_u64 -> _addcarry_u64 sequence
444 | 	// Fallback to simpler code
445 | 	if (x2 < n0) ++r;
446 | #endif
447 | 	return r;
448 | }
449 | 
450 | inline __m128i aes_round_bittube2(const __m128i& val, const __m128i& key)
451 | {
452 | 	alignas(16) uint32_t k[4];
453 | 	alignas(16) uint32_t x[4];
454 | 	_mm_store_si128((__m128i*)k, key);
455 | 	_mm_store_si128((__m128i*)x, _mm_xor_si128(val, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))); // x = ~val
456 | #define BYTE(p, i) ((unsigned char*)&p)[i]
457 | 	k[0] ^= saes_table[0][BYTE(x[0], 0)] ^ saes_table[1][BYTE(x[1], 1)] ^ saes_table[2][BYTE(x[2], 2)] ^ saes_table[3][BYTE(x[3], 3)];
458 | 	x[0] ^= k[0];
459 | 	k[1] ^= saes_table[0][BYTE(x[1], 0)] ^ saes_table[1][BYTE(x[2], 1)] ^ saes_table[2][BYTE(x[3], 2)] ^ saes_table[3][BYTE(x[0], 3)];
460 | 	x[1] ^= k[1];
461 | 	k[2] ^= saes_table[0][BYTE(x[2], 0)] ^ saes_table[1][BYTE(x[3], 1)] ^ saes_table[2][BYTE(x[0], 2)] ^ saes_table[3][BYTE(x[1], 3)];
462 | 	x[2] ^= k[2];
463 | 	k[3] ^= saes_table[0][BYTE(x[3], 0)] ^ saes_table[1][BYTE(x[0], 1)] ^ saes_table[2][BYTE(x[1], 2)] ^ saes_table[3][BYTE(x[2], 3)];
464 | #undef BYTE
465 | 	return _mm_load_si128((__m128i*)k);
466 | }
467 | 
468 | template<algo ALGO>
469 | inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
470 | {
471 | 	mem_out[0] = _mm_cvtsi128_si64(tmp);
472 | 
473 | 	tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
474 | 	uint64_t vh = _mm_cvtsi128_si64(tmp);
475 | 
476 | 	uint8_t x = static_cast<uint8_t>(vh >> 24);
477 | 	static const uint16_t table = 0x7531;
478 | 	if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
479 | 	{
480 | 		const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
481 | 		vh ^= ((table >> index) & 0x3) << 28;
482 | 
483 | 		mem_out[1] = vh;
484 | 	}
485 | 	else if (ALGO == cryptonight_stellite)
486 | 	{
487 | 		const uint8_t index = (((x >> 4) & 6) | (x & 1)) << 1;
488 | 		vh ^= ((table >> index) & 0x3) << 28;
489 | 
490 | 		mem_out[1] = vh;
491 | 	}
492 | 
493 | }
494 | 
495 | /** optimal type for sqrt
496 |  *
497 |  * Depending on the number of hashes calculated the optimal type for the sqrt value will be selected.
498 |  *
499 |  * @tparam N number of hashes per thread
500 |  */
501 | template<size_t N>
502 | struct GetOptimalSqrtType
503 | {
504 | 	using type = __m128i;
505 | };
506 | 
507 | template<>
508 | struct GetOptimalSqrtType<1u>
509 | {
510 | 	using type = uint64_t;
511 | };
512 | template<size_t N>
513 | using GetOptimalSqrtType_t = typename GetOptimalSqrtType<N>::type;
514 | 
515 | /** assign a value and convert if necessary
516 |  *
517 |  * @param output output type
518 |  * @param input value which is assigned to output
519 |  * @{
520 |  */
521 | inline void assign(__m128i& output, const uint64_t input)
522 | {
523 | 	output = _mm_cvtsi64_si128(input);
524 | }
525 | 
526 | inline void assign(uint64_t& output, const uint64_t input)
527 | {
528 | 	output = input;
529 | }
530 | 
531 | inline void assign(uint64_t& output, const __m128i& input)
532 | {
533 | 	output = _mm_cvtsi128_si64(input);
534 | }
535 | /** @} */
536 | 
537 | inline void set_float_rounding_mode()
538 | {
539 | #ifdef _MSC_VER
540 | 	_control87(RC_DOWN, MCW_RC);
541 | #else
542 | 	std::fesetround(FE_DOWNWARD);
543 | #endif
544 | }
545 | 
546 | #define CN_MONERO_V8_SHUFFLE_0(n, l0, idx0, ax0, bx0, bx1) \
547 | 	/* Shuffle the other 3x16 byte chunks in the current 64-byte cache line */ \
548 | 	if(ALGO == cryptonight_monero_v8) \
549 | 	{ \
550 | 		const uint64_t idx1 = idx0 & MASK; \
551 | 		const __m128i chunk1 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]); \
552 | 		const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \
553 | 		const __m128i chunk3 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \
554 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \
555 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \
556 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
557 | 	}
558 | 
559 | #define CN_MONERO_V8_SHUFFLE_1(n, l0, idx0, ax0, bx0, bx1, lo, hi) \
560 | 	/* Shuffle the other 3x16 byte chunks in the current 64-byte cache line */ \
561 | 	if(ALGO == cryptonight_monero_v8) \
562 | 	{ \
563 | 		const uint64_t idx1 = idx0 & MASK; \
564 | 		const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)&l0[idx1 ^ 0x10]), _mm_set_epi64x(lo, hi)); \
565 | 		const __m128i chunk2 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x20]); \
566 | 		hi ^= ((uint64_t*)&chunk2)[0]; \
567 | 		lo ^= ((uint64_t*)&chunk2)[1]; \
568 | 		const __m128i chunk3 = _mm_load_si128((__m128i *)&l0[idx1 ^ 0x30]); \
569 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x10], _mm_add_epi64(chunk3, bx1)); \
570 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x20], _mm_add_epi64(chunk1, bx0)); \
571 | 		_mm_store_si128((__m128i *)&l0[idx1 ^ 0x30], _mm_add_epi64(chunk2, ax0)); \
572 | 	}
573 | 
574 | #define CN_MONERO_V8_DIV(n, cx, sqrt_result, division_result_xmm, cl) \
575 | 	if(ALGO == cryptonight_monero_v8) \
576 | 	{ \
577 | 		uint64_t sqrt_result_tmp; \
578 | 		assign(sqrt_result_tmp, sqrt_result); \
579 | 		/* Use division and square root results from the _previous_ iteration to hide the latency */ \
580 | 		const uint64_t cx_64 = _mm_cvtsi128_si64(cx); \
581 | 		cl ^= static_cast<uint64_t>(_mm_cvtsi128_si64(division_result_xmm)) ^ (sqrt_result_tmp << 32); \
582 | 		const uint32_t d = (cx_64 + (sqrt_result_tmp << 1)) | 0x80000001UL; \
583 | 		/* Most and least significant bits in the divisor are set to 1 \
584 | 		 * to make sure we don't divide by a small or even number, \
585 | 		 * so there are no shortcuts for such cases \
586 | 		 * \
587 | 		 * Quotient may be as large as (2^64 - 1)/(2^31 + 1) = 8589934588 = 2^33 - 4 \
588 | 		 * We drop the highest bit to fit both quotient and remainder in 32 bits \
589 | 		 */  \
590 | 		/* Compiler will optimize it to a single div instruction */ \
591 | 		const uint64_t cx_s = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
592 | 		const uint64_t division_result = static_cast<uint32_t>(cx_s / d) + ((cx_s % d) << 32); \
593 | 		division_result_xmm = _mm_cvtsi64_si128(static_cast<int64_t>(division_result)); \
594 | 		/* Use division_result as an input for the square root to prevent parallel implementation in hardware */ \
595 | 		assign(sqrt_result, int_sqrt33_1_double_precision(cx_64 + division_result)); \
596 | 	}
597 | 
598 | #define CN_INIT_SINGLE \
599 | 	if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) && len < 43) \
600 | 	{ \
601 | 		memset(output, 0, 32 * N); \
602 | 		return; \
603 | 	}
604 | 
605 | #define CN_INIT(n, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm) \
606 | 	keccak((const uint8_t *)input + len * n, len, ctx[n]->hash_state, 200); \
607 | 	uint64_t monero_const; \
608 | 	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
609 | 	{ \
610 | 		monero_const =  *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(input) + len * n + 35); \
611 | 		monero_const ^=  *(reinterpret_cast<const uint64_t*>(ctx[n]->hash_state) + 24); \
612 | 	} \
613 | 	/* Optim - 99% time boundary */ \
614 | 	cn_explode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->hash_state, (__m128i*)ctx[n]->long_state); \
615 | 	\
616 | 	__m128i ax0; \
617 | 	uint64_t idx0; \
618 | 	__m128i bx0; \
619 | 	uint8_t* l0 = ctx[n]->long_state; \
620 | 	/* BEGIN cryptonight_monero_v8 variables */ \
621 | 	__m128i bx1; \
622 | 	__m128i division_result_xmm; \
623 | 	GetOptimalSqrtType_t<N> sqrt_result; \
624 | 	/* END cryptonight_monero_v8 variables */ \
625 | 	{ \
626 | 		uint64_t* h0 = (uint64_t*)ctx[n]->hash_state; \
627 | 		idx0 = h0[0] ^ h0[4]; \
628 | 		ax0 = _mm_set_epi64x(h0[1] ^ h0[5], idx0); \
629 | 		bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); \
630 | 		if(ALGO == cryptonight_monero_v8) \
631 | 		{ \
632 | 			bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]); \
633 | 			division_result_xmm = _mm_cvtsi64_si128(h0[12]); \
634 | 			assign(sqrt_result, h0[13]); \
635 | 			set_float_rounding_mode(); \
636 | 		} \
637 | 	} \
638 | 	__m128i *ptr0
639 | 
640 | #define CN_STEP1(n, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1) \
641 | 	__m128i cx; \
642 | 	ptr0 = (__m128i *)&l0[idx0 & MASK]; \
643 | 	cx = _mm_load_si128(ptr0); \
644 | 	if (ALGO == cryptonight_bittube2) \
645 | 	{ \
646 | 		cx = aes_round_bittube2(cx, ax0); \
647 | 	} \
648 | 	else \
649 | 	{ \
650 | 		if(SOFT_AES) \
651 | 			cx = soft_aesenc(cx, ax0); \
652 | 		else \
653 | 			cx = _mm_aesenc_si128(cx, ax0); \
654 | 	} \
655 | 	CN_MONERO_V8_SHUFFLE_0(n, l0, idx0, ax0, bx0, bx1)
656 | 
657 | #define CN_STEP2(n, monero_const, l0, ax0, bx0, idx0, ptr0, cx) \
658 | 	if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
659 | 		cryptonight_monero_tweak<ALGO>((uint64_t*)ptr0, _mm_xor_si128(bx0, cx)); \
660 | 	else \
661 | 		_mm_store_si128((__m128i *)ptr0, _mm_xor_si128(bx0, cx)); \
662 | 	idx0 = _mm_cvtsi128_si64(cx); \
663 | 	\
664 | 	ptr0 = (__m128i *)&l0[idx0 & MASK]; \
665 | 	if(PREFETCH) \
666 | 		_mm_prefetch((const char*)ptr0, _MM_HINT_T0); \
667 | 	if(ALGO != cryptonight_monero_v8) \
668 | 		bx0 = cx
669 | 
670 | #define CN_STEP3(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm) \
671 | 	uint64_t lo, cl, ch; \
672 | 	uint64_t al0 = _mm_cvtsi128_si64(ax0); \
673 | 	uint64_t ah0 = ((uint64_t*)&ax0)[1]; \
674 | 	cl = ((uint64_t*)ptr0)[0]; \
675 | 	ch = ((uint64_t*)ptr0)[1]; \
676 | 	CN_MONERO_V8_DIV(n, cx, sqrt_result, division_result_xmm, cl); \
677 | 	{ \
678 | 		uint64_t hi; \
679 | 		lo = _umul128(idx0, cl, &hi); \
680 | 		CN_MONERO_V8_SHUFFLE_1(n, l0, idx0, ax0, bx0, bx1, lo, hi); \
681 | 		ah0 += lo; \
682 | 		al0 += hi; \
683 | 	} \
684 | 	if(ALGO == cryptonight_monero_v8) \
685 | 	{ \
686 | 		bx1 = bx0; \
687 | 		bx0 = cx; \
688 | 	} \
689 | 	((uint64_t*)ptr0)[0] = al0; \
690 | 	if(PREFETCH) \
691 | 		_mm_prefetch((const char*)ptr0, _MM_HINT_T0)
692 | 
693 | #define CN_STEP4(n, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0) \
694 | 	if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2) \
695 | 	{ \
696 | 		if (ALGO == cryptonight_ipbc || ALGO == cryptonight_bittube2) \
697 | 			((uint64_t*)ptr0)[1] = ah0 ^ monero_const ^ ((uint64_t*)ptr0)[0]; \
698 | 		else \
699 | 			((uint64_t*)ptr0)[1] = ah0 ^ monero_const; \
700 | 	} \
701 | 	else \
702 | 		((uint64_t*)ptr0)[1] = ah0; \
703 | 	al0 ^= cl; \
704 | 	ah0 ^= ch; \
705 | 	ax0 = _mm_set_epi64x(ah0, al0); \
706 | 	idx0 = al0;
707 | 
708 | #define CN_STEP5(n, monero_const, l0, ax0, bx0, idx0, ptr0) \
709 | 	if(ALGO == cryptonight_heavy || ALGO == cryptonight_bittube2) \
710 | 	{ \
711 | 		ptr0 = (__m128i *)&l0[idx0 & MASK]; \
712 | 		int64_t u  = ((int64_t*)ptr0)[0]; \
713 | 		int32_t d  = ((int32_t*)ptr0)[2]; \
714 | 		int64_t q = u / (d | 0x5); \
715 | 		\
716 | 		((int64_t*)ptr0)[0] = u ^ q; \
717 | 		idx0 = d ^ q; \
718 | 	} \
719 | 	else if(ALGO == cryptonight_haven || ALGO == cryptonight_superfast) \
720 | 	{ \
721 | 		ptr0 = (__m128i *)&l0[idx0 & MASK]; \
722 | 		int64_t u  = ((int64_t*)ptr0)[0]; \
723 | 		int32_t d  = ((int32_t*)ptr0)[2]; \
724 | 		int64_t q = u / (d | 0x5); \
725 | 		\
726 | 		((int64_t*)ptr0)[0] = u ^ q; \
727 | 		idx0 = (~d) ^ q; \
728 | 	}
729 | 
730 | #define CN_FINALIZE(n) \
731 | 	/* Optim - 90% time boundary */ \
732 | 	cn_implode_scratchpad<MEM, SOFT_AES, PREFETCH, ALGO>((__m128i*)ctx[n]->long_state, (__m128i*)ctx[n]->hash_state); \
733 | 	/* Optim - 99% time boundary */ \
734 | 	keccakf((uint64_t*)ctx[n]->hash_state, 24); \
735 | 	extra_hashes[ctx[n]->hash_state[0] & 3](ctx[n]->hash_state, 200, (char*)output + 32 * n)
736 | 
737 | //! defer the evaluation of an macro
738 | #ifndef _MSC_VER
739 | #	define CN_DEFER(...) __VA_ARGS__
740 | #else
741 | #	define CN_EMPTY(...)
742 | #	define CN_DEFER(...) __VA_ARGS__ CN_EMPTY()
743 | #endif
744 | 
745 | //! execute the macro f with the passed arguments
746 | #define CN_EXEC(f,...) CN_DEFER(f)(__VA_ARGS__)
747 | 
748 | /** add append n to all arguments and keeps n as first argument
749 |  *
750 |  * @param n number which is appended to the arguments (expect the first argument n)
751 |  *
752 |  * @code{.cpp}
753 |  * CN_ENUM_2(1, foo, bar)
754 |  * // is transformed to
755 |  * 1, foo1, bar1
756 |  * @endcode
757 |  */
758 | #define CN_ENUM_0(n, ...) n
759 | #define CN_ENUM_1(n, x1) n, x1 ## n
760 | #define CN_ENUM_2(n, x1, x2) n, x1 ## n, x2 ## n
761 | #define CN_ENUM_3(n, x1, x2, x3) n, x1 ## n, x2 ## n, x3 ## n
762 | #define CN_ENUM_4(n, x1, x2, x3, x4) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n
763 | #define CN_ENUM_5(n, x1, x2, x3, x4, x5) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n
764 | #define CN_ENUM_6(n, x1, x2, x3, x4, x5, x6) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n
765 | #define CN_ENUM_7(n, x1, x2, x3, x4, x5, x6, x7) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n
766 | #define CN_ENUM_8(n, x1, x2, x3, x4, x5, x6, x7, x8) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n
767 | #define CN_ENUM_9(n, x1, x2, x3, x4, x5, x6, x7, x8, x9) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n
768 | #define CN_ENUM_10(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n
769 | #define CN_ENUM_11(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n
770 | #define CN_ENUM_12(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n, x12 ## n
771 | #define CN_ENUM_13(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n, x12 ## n, x13 ## n
772 | #define CN_ENUM_14(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n, x12 ## n, x13 ## n, x14 ## n
773 | #define CN_ENUM_15(n, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) n, x1 ## n, x2 ## n, x3 ## n, x4 ## n, x5 ## n, x6 ## n, x7 ## n, x8 ## n, x9 ## n, x10 ## n, x11 ## n, x12 ## n, x13 ## n, x14 ## n, x15 ## n
774 | 
775 |  /** repeat a macro call multiple times
776 |   *
777 |   * @param n number of arguments followed after f
778 |   * @param f name of the macro which should be executed
779 |   * @param ... n parameter which name will get appended by a unique number
780 |   *
781 |   * @code{.cpp}
782 |   * REPEAT_2(2, f, foo, bar)
783 |   * // is transformed to
784 |   * f(0, foo0, bar); f(1, foo1, bar1)
785 |   * @endcode
786 |   */
787 | #define REPEAT_1(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__))
788 | #define REPEAT_2(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__))
789 | #define REPEAT_3(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__))
790 | #define REPEAT_4(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(3, __VA_ARGS__))
791 | #define REPEAT_5(n, f, ...) CN_EXEC(f, CN_ENUM_ ## n(0, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(1, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(2, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(3, __VA_ARGS__)); CN_EXEC(f, CN_ENUM_ ## n(4, __VA_ARGS__))
792 | 
793 | template< size_t N>
794 | struct Cryptonight_hash;
795 | 
796 | template< >
797 | struct Cryptonight_hash<1>
798 | {
799 | 	static constexpr size_t N = 1;
800 | 
801 | 	template<algo ALGO, bool SOFT_AES, bool PREFETCH>
802 | 	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
803 | 	{
804 | 		constexpr size_t MASK = cn_select_mask<ALGO>();
805 | 		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
806 | 		constexpr size_t MEM = cn_select_memory<ALGO>();
807 | 
808 | 		CN_INIT_SINGLE;
809 | 		REPEAT_1(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm);
810 | 
811 | 		// Optim - 90% time boundary
812 | 		for (size_t i = 0; i < ITERATIONS; i++)
813 | 		{
814 | 			REPEAT_1(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1);
815 | 			REPEAT_1(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
816 | 			REPEAT_1(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm);
817 | 			REPEAT_1(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
818 | 			REPEAT_1(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
819 | 		}
820 | 
821 | 		REPEAT_1(0, CN_FINALIZE);
822 | 	}
823 | };
824 | 
825 | template< >
826 | struct Cryptonight_hash<2>
827 | {
828 | 	static constexpr size_t N = 2;
829 | 
830 | 	template<algo ALGO, bool SOFT_AES, bool PREFETCH>
831 | 	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
832 | 	{
833 | 		constexpr size_t MASK = cn_select_mask<ALGO>();
834 | 		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
835 | 		constexpr size_t MEM = cn_select_memory<ALGO>();
836 | 
837 | 		CN_INIT_SINGLE;
838 | 		REPEAT_2(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm);
839 | 
840 | 		// Optim - 90% time boundary
841 | 		for (size_t i = 0; i < ITERATIONS; i++)
842 | 		{
843 | 			REPEAT_2(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1);
844 | 			REPEAT_2(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
845 | 			REPEAT_2(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm);
846 | 			REPEAT_2(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
847 | 			REPEAT_2(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
848 | 		}
849 | 
850 | 		REPEAT_2(0, CN_FINALIZE);
851 | 	}
852 | };
853 | 
854 | template< >
855 | struct Cryptonight_hash<3>
856 | {
857 | 	static constexpr size_t N = 3;
858 | 
859 | 	template<algo ALGO, bool SOFT_AES, bool PREFETCH>
860 | 	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
861 | 	{
862 | 		constexpr size_t MASK = cn_select_mask<ALGO>();
863 | 		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
864 | 		constexpr size_t MEM = cn_select_memory<ALGO>();
865 | 
866 | 		CN_INIT_SINGLE;
867 | 		REPEAT_3(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm);
868 | 
869 | 		// Optim - 90% time boundary
870 | 		for (size_t i = 0; i < ITERATIONS; i++)
871 | 		{
872 | 			REPEAT_3(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1);
873 | 			REPEAT_3(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
874 | 			REPEAT_3(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm);
875 | 			REPEAT_3(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
876 | 			REPEAT_3(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
877 | 		}
878 | 
879 | 		REPEAT_3(0, CN_FINALIZE);
880 | 	}
881 | };
882 | 
883 | template< >
884 | struct Cryptonight_hash<4>
885 | {
886 | 	static constexpr size_t N = 4;
887 | 
888 | 	template<algo ALGO, bool SOFT_AES, bool PREFETCH>
889 | 	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
890 | 	{
891 | 		constexpr size_t MASK = cn_select_mask<ALGO>();
892 | 		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
893 | 		constexpr size_t MEM = cn_select_memory<ALGO>();
894 | 
895 | 		CN_INIT_SINGLE;
896 | 		REPEAT_4(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm);
897 | 
898 | 		// Optim - 90% time boundary
899 | 		for (size_t i = 0; i < ITERATIONS; i++)
900 | 		{
901 | 			REPEAT_4(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1);
902 | 			REPEAT_4(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
903 | 			REPEAT_4(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm);
904 | 			REPEAT_4(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
905 | 			REPEAT_4(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
906 | 		}
907 | 
908 | 		REPEAT_4(0, CN_FINALIZE);
909 | 	}
910 | };
911 | 
912 | template< >
913 | struct Cryptonight_hash<5>
914 | {
915 | 	static constexpr size_t N = 5;
916 | 
917 | 	template<algo ALGO, bool SOFT_AES, bool PREFETCH>
918 | 	static void hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx)
919 | 	{
920 | 		constexpr size_t MASK = cn_select_mask<ALGO>();
921 | 		constexpr size_t ITERATIONS = cn_select_iter<ALGO>();
922 | 		constexpr size_t MEM = cn_select_memory<ALGO>();
923 | 
924 | 		CN_INIT_SINGLE;
925 | 		REPEAT_5(9, CN_INIT, monero_const, l0, ax0, bx0, idx0, ptr0, bx1, sqrt_result, division_result_xmm);
926 | 
927 | 		// Optim - 90% time boundary
928 | 		for (size_t i = 0; i < ITERATIONS; i++)
929 | 		{
930 | 			REPEAT_5(8, CN_STEP1, monero_const, l0, ax0, bx0, idx0, ptr0, cx, bx1);
931 | 			REPEAT_5(7, CN_STEP2, monero_const, l0, ax0, bx0, idx0, ptr0, cx);
932 | 			REPEAT_5(15, CN_STEP3, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0, cx, bx1, sqrt_result, division_result_xmm);
933 | 			REPEAT_5(11, CN_STEP4, monero_const, l0, ax0, bx0, idx0, ptr0, lo, cl, ch, al0, ah0);
934 | 			REPEAT_5(6, CN_STEP5, monero_const, l0, ax0, bx0, idx0, ptr0);
935 | 		}
936 | 
937 | 		REPEAT_5(0, CN_FINALIZE);
938 | 	}
939 | };
940 | 


--------------------------------------------------------------------------------
/src/main/jni/cryptonight_common.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |   * This program is free software: you can redistribute it and/or modify
  3 |   * it under the terms of the GNU General Public License as published by
  4 |   * the Free Software Foundation, either version 3 of the License, or
  5 |   * any later version.
  6 |   *
  7 |   * This program is distributed in the hope that it will be useful,
  8 |   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10 |   * GNU General Public License for more details.
 11 |   *
 12 |   * You should have received a copy of the GNU General Public License
 13 |   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |   *
 15 |   * Additional permission under GNU GPL version 3 section 7
 16 |   *
 17 |   * If you modify this Program, or any covered work, by linking or combining
 18 |   * it with OpenSSL (or a modified version of that library), containing parts
 19 |   * covered by the terms of OpenSSL License and SSLeay License, the licensors
 20 |   * of this Program grant you additional permission to convey the resulting work.
 21 |   *
 22 |   */
 23 | 
 24 | extern "C"
 25 | {
 26 | #include "c_groestl.h"
 27 | #include "c_blake256.h"
 28 | #include "c_jh.h"
 29 | #include "c_skein.h"
 30 | }
 31 | #include "algos.hpp"
 32 | #include "cryptonight.h"
 33 | #include "cryptonight_aesni.h"
 34 | #include <stdio.h>
 35 | #include <stdlib.h>
 36 | #include <algorithm>
 37 | 
 38 | #ifdef __GNUC__
 39 | #include <mm_malloc.h>
 40 | #else
 41 | #include <malloc.h>
 42 | #endif // __GNUC__
 43 | 
 44 | #if defined(__APPLE__)
 45 | #include <mach/vm_statistics.h>
 46 | #endif
 47 | 
 48 | #ifdef _WIN32
 49 | #include <windows.h>
 50 | #include <ntsecapi.h>
 51 | #else
 52 | #include <sys/mman.h>
 53 | #include <errno.h>
 54 | #include <string.h>
 55 | #endif // _WIN32
 56 | 
 57 | void do_blake_hash(const void* input, uint32_t len, char* output) {
 58 | 	blake256_hash((uint8_t*)output, (const uint8_t*)input, len);
 59 | }
 60 | 
 61 | void do_groestl_hash(const void* input, uint32_t len, char* output) {
 62 | 	groestl((const uint8_t*)input, len * 8, (uint8_t*)output);
 63 | }
 64 | 
 65 | void do_jh_hash(const void* input, uint32_t len, char* output) {
 66 | 	jh_hash(32 * 8, (const uint8_t*)input, 8 * len, (uint8_t*)output);
 67 | }
 68 | 
 69 | void do_skein_hash(const void* input, uint32_t len, char* output) {
 70 | 	skein_hash(8 * 32, (const uint8_t*)input, 8 * len, (uint8_t*)output);
 71 | }
 72 | 
 73 | void(*const extra_hashes[4])(const void *, uint32_t, char *) = { do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash };
 74 | 
 75 | #ifdef _WIN32
 76 | 
 77 | BOOL bRebootDesirable = FALSE; //If VirtualAlloc fails, suggest a reboot
 78 | 
 79 | BOOL AddPrivilege(TCHAR* pszPrivilege)
 80 | {
 81 | 	HANDLE           hToken;
 82 | 	TOKEN_PRIVILEGES tp;
 83 | 	BOOL             status;
 84 | 
 85 | 	if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
 86 | 		return FALSE;
 87 | 
 88 | 	if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
 89 | 		return FALSE;
 90 | 
 91 | 	tp.PrivilegeCount = 1;
 92 | 	tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
 93 | 	status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
 94 | 
 95 | 	if (!status || (GetLastError() != ERROR_SUCCESS))
 96 | 		return FALSE;
 97 | 
 98 | 	CloseHandle(hToken);
 99 | 	return TRUE;
100 | }
101 | 
102 | BOOL AddLargePageRights()
103 | {
104 | 	HANDLE hToken;
105 | 	PTOKEN_USER user = NULL;
106 | 
107 | 	if (OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken) == TRUE)
108 | 	{
109 | 		TOKEN_ELEVATION Elevation;
110 | 		DWORD cbSize = sizeof(TOKEN_ELEVATION);
111 | 		BOOL bIsElevated = FALSE;
112 | 
113 | 		if (GetTokenInformation(hToken, TokenElevation, &Elevation, sizeof(Elevation), &cbSize))
114 | 			bIsElevated = Elevation.TokenIsElevated;
115 | 
116 | 		DWORD size = 0;
117 | 		GetTokenInformation(hToken, TokenUser, NULL, 0, &size);
118 | 
119 | 		if (size > 0 && bIsElevated)
120 | 		{
121 | 			user = (PTOKEN_USER)LocalAlloc(LPTR, size);
122 | 			GetTokenInformation(hToken, TokenUser, user, size, &size);
123 | 		}
124 | 
125 | 		CloseHandle(hToken);
126 | 	}
127 | 
128 | 	if (!user)
129 | 		return FALSE;
130 | 
131 | 	LSA_HANDLE handle;
132 | 	LSA_OBJECT_ATTRIBUTES attributes;
133 | 	ZeroMemory(&attributes, sizeof(attributes));
134 | 
135 | 	BOOL result = FALSE;
136 | 	if (LsaOpenPolicy(NULL, &attributes, POLICY_ALL_ACCESS, &handle) == 0)
137 | 	{
138 | 		LSA_UNICODE_STRING lockmem;
139 | 		lockmem.Buffer = L"SeLockMemoryPrivilege";
140 | 		lockmem.Length = 42;
141 | 		lockmem.MaximumLength = 44;
142 | 
143 | 		PLSA_UNICODE_STRING rights = NULL;
144 | 		ULONG cnt = 0;
145 | 		BOOL bHasRights = FALSE;
146 | 		if (LsaEnumerateAccountRights(handle, user->User.Sid, &rights, &cnt) == 0)
147 | 		{
148 | 			for (size_t i = 0; i < cnt; i++)
149 | 			{
150 | 				if (rights[i].Length == lockmem.Length &&
151 | 					memcmp(rights[i].Buffer, lockmem.Buffer, 42) == 0)
152 | 				{
153 | 					bHasRights = TRUE;
154 | 					break;
155 | 				}
156 | 			}
157 | 
158 | 			LsaFreeMemory(rights);
159 | 		}
160 | 
161 | 		if (!bHasRights)
162 | 			result = LsaAddAccountRights(handle, user->User.Sid, &lockmem, 1) == 0;
163 | 
164 | 		LsaClose(handle);
165 | 	}
166 | 
167 | 	LocalFree(user);
168 | 	return result;
169 | }
170 | #endif
171 | 
172 | size_t cryptonight_init(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
173 | {
174 | #ifdef _WIN32
175 | 	if (use_fast_mem == 0)
176 | 		return 1;
177 | 
178 | 	if (AddPrivilege(TEXT("SeLockMemoryPrivilege")) == 0)
179 | 	{
180 | 		if (AddLargePageRights())
181 | 		{
182 | 			msg->warning = "Added SeLockMemoryPrivilege to the current account. You need to reboot for it to work";
183 | 			bRebootDesirable = TRUE;
184 | 		}
185 | 		else
186 | 			msg->warning = "Obtaining SeLockMemoryPrivilege failed.";
187 | 
188 | 		return 0;
189 | 	}
190 | 
191 | 	bRebootDesirable = TRUE;
192 | 	return 1;
193 | #else
194 | 	return 1;
195 | #endif // _WIN32
196 | }
197 | 
198 | cryptonight_ctx* cryptonight_alloc_ctx(size_t use_fast_mem, size_t use_mlock, alloc_msg* msg)
199 | {
200 | 	size_t hashMemSize = CRYPTONIGHT_MEMORY;
201 | 
202 | 	cryptonight_ctx* ptr = (cryptonight_ctx*)_mm_malloc(sizeof(cryptonight_ctx), 4096);
203 | 
204 | 	if (use_fast_mem == 0)
205 | 	{
206 | 		// use 2MiB aligned memory
207 | 		ptr->long_state = (uint8_t*)_mm_malloc(hashMemSize, hashMemSize);
208 | 		ptr->ctx_info[0] = 0;
209 | 		ptr->ctx_info[1] = 0;
210 | 		return ptr;
211 | 	}
212 | 
213 | #ifdef _WIN32
214 | 	SIZE_T iLargePageMin = GetLargePageMinimum();
215 | 
216 | 	if (hashMemSize > iLargePageMin)
217 | 		iLargePageMin *= 2;
218 | 
219 | 	ptr->long_state = (uint8_t*)VirtualAlloc(NULL, iLargePageMin,
220 | 		MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
221 | 
222 | 	if (ptr->long_state == NULL)
223 | 	{
224 | 		_mm_free(ptr);
225 | 		if (bRebootDesirable)
226 | 			msg->warning = "VirtualAlloc failed. Reboot might help.";
227 | 		else
228 | 			msg->warning = "VirtualAlloc failed.";
229 | 		return NULL;
230 | 	}
231 | 	else
232 | 	{
233 | 		ptr->ctx_info[0] = 1;
234 | 		return ptr;
235 | 	}
236 | #else
237 | 	//http://man7.org/linux/man-pages/man2/mmap.2.html
238 | #if defined(__APPLE__)
239 | 	ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE,
240 | 		MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
241 | #elif defined(__FreeBSD__)
242 | 	ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE,
243 | 		MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
244 | #elif defined(__OpenBSD__)
245 | 	ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE,
246 | 		MAP_PRIVATE | MAP_ANON, -1, 0);
247 | #else
248 | 	ptr->long_state = (uint8_t*)mmap(NULL, hashMemSize, PROT_READ | PROT_WRITE,
249 | 		MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
250 | #endif
251 | 
252 | 	if (ptr->long_state == MAP_FAILED)
253 | 	{
254 | 		_mm_free(ptr);
255 | 		msg->warning = "mmap failed, check attribute 'use_slow_memory' in 'config.txt'";
256 | 		return NULL;
257 | 	}
258 | 
259 | 	ptr->ctx_info[0] = 1;
260 | 
261 | 	if (madvise(ptr->long_state, hashMemSize, MADV_RANDOM | MADV_WILLNEED) != 0)
262 | 		msg->warning = "madvise failed";
263 | 
264 | 	ptr->ctx_info[1] = 0;
265 | 	if (use_mlock != 0 && mlock(ptr->long_state, hashMemSize) != 0)
266 | 		msg->warning = "mlock failed";
267 | 	else
268 | 		ptr->ctx_info[1] = 1;
269 | 
270 | 	return ptr;
271 | #endif // _WIN32
272 | }
273 | 
274 | void cryptonight_free_ctx(cryptonight_ctx* ctx)
275 | {
276 | 	size_t hashMemSize = CRYPTONIGHT_MEMORY;
277 | 
278 | 	if (ctx->ctx_info[0] != 0)
279 | 	{
280 | #ifdef _WIN32
281 | 		VirtualFree(ctx->long_state, 0, MEM_RELEASE);
282 | #else
283 | 		if (ctx->ctx_info[1] != 0)
284 | 			munlock(ctx->long_state, hashMemSize);
285 | 		munmap(ctx->long_state, hashMemSize);
286 | #endif // _WIN32
287 | 	}
288 | 	else
289 | 		_mm_free(ctx->long_state);
290 | 
291 | 	_mm_free(ctx);
292 | }
293 | 


--------------------------------------------------------------------------------
/src/main/jni/groestl_tables.h:
--------------------------------------------------------------------------------
 1 | #ifndef __tables_h
 2 | #define __tables_h
 3 | 
 4 | 
 5 | const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
 6 | , 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
 7 | , 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
 8 | , 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
 9 | , 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
10 | , 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
11 | , 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
12 | , 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
13 | , 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
14 | , 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
15 | , 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
16 | , 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
17 | , 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
18 | , 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
19 | , 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
20 | , 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
21 | , 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
22 | , 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
23 | , 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
24 | , 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
25 | , 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
26 | , 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
27 | , 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
28 | , 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
29 | , 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
30 | , 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
31 | , 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
32 | , 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
33 | , 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
34 | , 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
35 | , 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
36 | , 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
37 | 
38 | #endif /* __tables_h */
39 | 


--------------------------------------------------------------------------------
/src/main/jni/hash.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include <stdint.h>
4 | 
5 | typedef unsigned char BitSequence;
6 | typedef uint32_t DataLength;
7 | typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn;
8 | 


--------------------------------------------------------------------------------
/src/main/jni/hasher.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include <iostream>
 5 | #include "jni.h"
 6 | #include "cryptonight_aesni.h"
 7 | 
 8 | /* Some parts are taken from XMR-STAK project, you can find here https://github.com/fireice-uk/xmr-stak*/
 9 | extern "C" {
10 | 
11 | #define SIZE 5
12 | #define FUNC 1
13 | 
14 | 	cryptonight_ctx* alloc_ctx()
15 | 	{
16 | 		cryptonight_ctx* ctx;
17 | 		alloc_msg msg = { 0 };
18 | 		ctx = cryptonight_alloc_ctx(1, 1, &msg);
19 | 		if (ctx == NULL) {
20 | 			ctx = cryptonight_alloc_ctx(1, 0, &msg);
21 | 			if (ctx == NULL) {
22 | 				if (ctx == NULL) {
23 | 					ctx = cryptonight_alloc_ctx(0, 0, NULL);
24 | 				}
25 | 				return ctx;
26 | 			}
27 | 			return ctx;
28 | 		}
29 | 		return nullptr;
30 | 	}
31 | 
32 | 	unsigned char* as_unsigned_char_array(JNIEnv *env, jbyteArray array) {
33 | 		int len = env->GetArrayLength(array);
34 | 		unsigned char* buf = new unsigned char[len];
35 | 		env->GetByteArrayRegion(array, 0, len, reinterpret_cast<jbyte*>(buf));
36 | 		return buf;
37 | 	}
38 | 
39 | 	jbyteArray as_byte_array(JNIEnv *env, unsigned char* buf, int len) {
40 | 		jbyteArray array = env->NewByteArray(len);
41 | 		env->SetByteArrayRegion(array, 0, len, reinterpret_cast<jbyte*>(buf));
42 | 		return array;
43 | 	}
44 | 
45 | 	JNIEXPORT void JNICALL Java_tk_netindev_drill_hasher_Hasher_slowHash(JNIEnv *env, jclass clazz, jbyteArray input, jbyteArray output) {
46 | 		unsigned char* inputBuffer = as_unsigned_char_array(env, input);
47 | 		unsigned char* outputBuffer = as_unsigned_char_array(env, output);
48 | 		cryptonight_ctx* ctx[SIZE];
49 | 		for (int i = 0; i < SIZE; i++) {
50 | 			ctx[i] = alloc_ctx();
51 | 		}
52 | 		Cryptonight_hash<FUNC>::template hash<cryptonight_monero_v8, false, false>(inputBuffer, env->GetArrayLength(input), outputBuffer, ctx);
53 | 		for (int i = 0; i < SIZE; i++) {
54 | 			cryptonight_free_ctx(ctx[i]);
55 | 		}
56 | 		//env->ReleaseByteArrayElements(input, (jbyte *)inputBuffer, JNI_ABORT);
57 | 		env->ReleaseByteArrayElements(output, (jbyte *)outputBuffer, JNI_COMMIT);
58 | 	}
59 | 
60 | };


--------------------------------------------------------------------------------
/src/main/jni/int-util.h:
--------------------------------------------------------------------------------
  1 | // Copyright(c) 2012 - 2013 The Cryptonote developers
  2 | // Distributed under the MIT/X11 software license, see the accompanying
  3 | // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4 | 
  5 | #pragma once
  6 | 
  7 | #include <assert.h>
  8 | #include <stdbool.h>
  9 | #include <stdint.h>
 10 | #include <string.h>
 11 | 
 12 | #if defined(_MSC_VER)
 13 | #include <stdlib.h>
 14 | 
 15 | static inline uint32_t rol32(uint32_t x, int r) {
 16 | 	static_assert(sizeof(uint32_t) == sizeof(unsigned int), "this code assumes 32-bit integers");
 17 | 	return _rotl(x, r);
 18 | }
 19 | 
 20 | static inline uint64_t rol64(uint64_t x, int r) {
 21 | 	return _rotl64(x, r);
 22 | }
 23 | 
 24 | #else
 25 | 
 26 | static inline uint32_t rol32(uint32_t x, int r) {
 27 | 	return (x << (r & 31)) | (x >> (-r & 31));
 28 | }
 29 | 
 30 | static inline uint64_t rol64(uint64_t x, int r) {
 31 | 	return (x << (r & 63)) | (x >> (-r & 63));
 32 | }
 33 | 
 34 | #endif
 35 | 
 36 | static inline uint64_t hi_dword(uint64_t val) {
 37 | 	return val >> 32;
 38 | }
 39 | 
 40 | static inline uint64_t lo_dword(uint64_t val) {
 41 | 	return val & 0xFFFFFFFF;
 42 | }
 43 | 
 44 | static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) {
 45 | 	dividend |= ((uint64_t)*remainder) << 32;
 46 | 	*remainder = dividend % divisor;
 47 | 	return dividend / divisor;
 48 | }
 49 | 
 50 | // Long division with 2^32 base
 51 | static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) {
 52 | 	uint64_t dividend_dwords[4];
 53 | 	uint32_t remainder = 0;
 54 | 
 55 | 	dividend_dwords[3] = hi_dword(dividend_hi);
 56 | 	dividend_dwords[2] = lo_dword(dividend_hi);
 57 | 	dividend_dwords[1] = hi_dword(dividend_lo);
 58 | 	dividend_dwords[0] = lo_dword(dividend_lo);
 59 | 
 60 | 	*quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32;
 61 | 	*quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder);
 62 | 	*quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32;
 63 | 	*quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder);
 64 | 
 65 | 	return remainder;
 66 | }
 67 | 
 68 | #define IDENT32(x) ((uint32_t) (x))
 69 | #define IDENT64(x) ((uint64_t) (x))
 70 | 
 71 | #define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \
 72 |   (((uint32_t) (x) & 0x0000ff00) <<  8) | \
 73 |   (((uint32_t) (x) & 0x00ff0000) >>  8) | \
 74 |   (((uint32_t) (x) & 0xff000000) >> 24))
 75 | #define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \
 76 |   (((uint64_t) (x) & 0x000000000000ff00) << 40) | \
 77 |   (((uint64_t) (x) & 0x0000000000ff0000) << 24) | \
 78 |   (((uint64_t) (x) & 0x00000000ff000000) <<  8) | \
 79 |   (((uint64_t) (x) & 0x000000ff00000000) >>  8) | \
 80 |   (((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \
 81 |   (((uint64_t) (x) & 0x00ff000000000000) >> 40) | \
 82 |   (((uint64_t) (x) & 0xff00000000000000) >> 56))
 83 | 
 84 | static inline uint32_t ident32(uint32_t x) { return x; }
 85 | static inline uint64_t ident64(uint64_t x) { return x; }
 86 | 
 87 | static inline uint32_t swap32(uint32_t x) {
 88 | 	x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8);
 89 | 	return (x << 16) | (x >> 16);
 90 | }
 91 | static inline uint64_t swap64(uint64_t x) {
 92 | 	x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8);
 93 | 	x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16);
 94 | 	return (x << 32) | (x >> 32);
 95 | }
 96 | 
 97 | #if defined(__GNUC__)
 98 | #define UNUSED __attribute__((unused))
 99 | #else
100 | #define UNUSED
101 | #endif
102 | static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { }
103 | #undef UNUSED
104 | 
105 | static inline void mem_inplace_swap32(void *mem, size_t n) {
106 | 	size_t i;
107 | 	for (i = 0; i < n; i++) {
108 | 		((uint32_t *)mem)[i] = swap32(((const uint32_t *)mem)[i]);
109 | 	}
110 | }
111 | static inline void mem_inplace_swap64(void *mem, size_t n) {
112 | 	size_t i;
113 | 	for (i = 0; i < n; i++) {
114 | 		((uint64_t *)mem)[i] = swap64(((const uint64_t *)mem)[i]);
115 | 	}
116 | }
117 | 
118 | static inline void memcpy_ident32(void *dst, const void *src, size_t n) {
119 | 	memcpy(dst, src, 4 * n);
120 | }
121 | static inline void memcpy_ident64(void *dst, const void *src, size_t n) {
122 | 	memcpy(dst, src, 8 * n);
123 | }
124 | 
125 | static inline void memcpy_swap32(void *dst, const void *src, size_t n) {
126 | 	size_t i;
127 | 	for (i = 0; i < n; i++) {
128 | 		((uint32_t *)dst)[i] = swap32(((const uint32_t *)src)[i]);
129 | 	}
130 | }
131 | static inline void memcpy_swap64(void *dst, const void *src, size_t n) {
132 | 	size_t i;
133 | 	for (i = 0; i < n; i++) {
134 | 		((uint64_t *)dst)[i] = swap64(((const uint64_t *)src)[i]);
135 | 	}
136 | }
137 | 
138 | #define SWAP32LE IDENT32
139 | #define SWAP32BE SWAP32
140 | #define swap32le ident32
141 | #define swap32be swap32
142 | #define mem_inplace_swap32le mem_inplace_ident
143 | #define mem_inplace_swap32be mem_inplace_swap32
144 | #define memcpy_swap32le memcpy_ident32
145 | #define memcpy_swap32be memcpy_swap32
146 | #define SWAP64LE IDENT64
147 | #define SWAP64BE SWAP64
148 | #define swap64le ident64
149 | #define swap64be swap64
150 | #define mem_inplace_swap64le mem_inplace_ident
151 | #define mem_inplace_swap64be mem_inplace_swap64
152 | #define memcpy_swap64le memcpy_ident64
153 | #define memcpy_swap64be memcpy_swap64


--------------------------------------------------------------------------------
/src/main/jni/skein_port.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SKEIN_PORT_H_
  2 | #define _SKEIN_PORT_H_
  3 | 
  4 | #include <limits.h>
  5 | #include <stdint.h>
  6 | #include <stddef.h>
  7 | 
  8 | #ifndef RETURN_VALUES
  9 | #  define RETURN_VALUES
 10 | #  if defined( DLL_EXPORT )
 11 | #    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
 12 | #      define VOID_RETURN    __declspec( dllexport ) void __stdcall
 13 | #      define INT_RETURN     __declspec( dllexport ) int  __stdcall
 14 | #    elif defined( __GNUC__ )
 15 | #      define VOID_RETURN    __declspec( __dllexport__ ) void
 16 | #      define INT_RETURN     __declspec( __dllexport__ ) int
 17 | #    else
 18 | #      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
 19 | #    endif
 20 | #  elif defined( DLL_IMPORT )
 21 | #    if defined( _MSC_VER ) || defined ( __INTEL_COMPILER )
 22 | #      define VOID_RETURN    __declspec( dllimport ) void __stdcall
 23 | #      define INT_RETURN     __declspec( dllimport ) int  __stdcall
 24 | #    elif defined( __GNUC__ )
 25 | #      define VOID_RETURN    __declspec( __dllimport__ ) void
 26 | #      define INT_RETURN     __declspec( __dllimport__ ) int
 27 | #    else
 28 | #      error Use of the DLL is only available on the Microsoft, Intel and GCC compilers
 29 | #    endif
 30 | #  elif defined( __WATCOMC__ )
 31 | #    define VOID_RETURN  void __cdecl
 32 | #    define INT_RETURN   int  __cdecl
 33 | #  else
 34 | #    define VOID_RETURN  void
 35 | #    define INT_RETURN   int
 36 | #  endif
 37 | #endif
 38 | 
 39 | /*  These defines are used to declare buffers in a way that allows
 40 | 	faster operations on longer variables to be used.  In all these
 41 | 	defines 'size' must be a power of 2 and >= 8
 42 | 
 43 | 	dec_unit_type(size,x)       declares a variable 'x' of length
 44 | 								'size' bits
 45 | 
 46 | 	dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize'
 47 | 								bytes defined as an array of variables
 48 | 								each of 'size' bits (bsize must be a
 49 | 								multiple of size / 8)
 50 | 
 51 | 	ptr_cast(x,size)            casts a pointer to a pointer to a
 52 | 								variable of length 'size' bits
 53 | */
 54 | 
 55 | #define ui_type(size)               uint##size##_t
 56 | #define dec_unit_type(size,x)       typedef ui_type(size) x
 57 | #define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)]
 58 | #define ptr_cast(x,size)            ((ui_type(size)*)(x))
 59 | 
 60 | typedef unsigned int    uint_t;             /* native unsigned integer */
 61 | typedef uint8_t         u08b_t;             /*  8-bit unsigned integer */
 62 | typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */
 63 | 
 64 | #ifndef RotL_64
 65 | #define RotL_64(x,N)    (((x) << (N)) | ((x) >> (64-(N))))
 66 | #endif
 67 | 
 68 | /*
 69 |  * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
 70 |  * performance on x86 CPUs.  The Skein code requires the following
 71 |  * definitions for dealing with endianness:
 72 |  *
 73 |  *    SKEIN_NEED_SWAP:  0 for little-endian, 1 for big-endian
 74 |  *    Skein_Put64_LSB_First
 75 |  *    Skein_Get64_LSB_First
 76 |  *    Skein_Swap64
 77 |  *
 78 |  * If SKEIN_NEED_SWAP is defined at compile time, it is used here
 79 |  * along with the portable versions of Put64/Get64/Swap64, which
 80 |  * are slow in general.
 81 |  *
 82 |  * Otherwise, an "auto-detect" of endianness is attempted below.
 83 |  * If the default handling doesn't work well, the user may insert
 84 |  * platform-specific code instead (e.g., for big-endian CPUs).
 85 |  *
 86 |  */
 87 | #ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
 88 | 
 89 | #define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
 90 | 
 91 | /* special handler for IA64, which may be either endianness (?)  */
 92 | /* here we assume little-endian, but this may need to be changed */
 93 | #if defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
 94 | #  define PLATFORM_MUST_ALIGN (1)
 95 | #ifndef PLATFORM_BYTE_ORDER
 96 | #  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
 97 | #endif
 98 | #endif
 99 | 
100 | #ifndef   PLATFORM_MUST_ALIGN
101 | #  define PLATFORM_MUST_ALIGN (0)
102 | #endif
103 | 
104 | 
105 | #if   PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
106 | 	/* here for big-endian CPUs */
107 | #define SKEIN_NEED_SWAP   (1)
108 | #elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
109 | 	/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
110 | #define SKEIN_NEED_SWAP   (0)
111 | #if   PLATFORM_MUST_ALIGN == 0              /* ok to use "fast" versions? */
112 | #define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
113 | #define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt))
114 | #endif
115 | #else
116 | #error "Skein needs endianness setting!"
117 | #endif
118 | 
119 | #endif /* ifndef SKEIN_NEED_SWAP */
120 | 
121 | /*
122 |  ******************************************************************
123 |  *      Provide any definitions still needed.
124 |  ******************************************************************
125 |  */
126 | #ifndef Skein_Swap64  /* swap for big-endian, nop for little-endian */
127 | #if     SKEIN_NEED_SWAP
128 | #define Skein_Swap64(w64)                       \
129 |   ( (( ((u64b_t)(w64))       & 0xFF) << 56) |   \
130 | 	(((((u64b_t)(w64)) >> 8) & 0xFF) << 48) |   \
131 | 	(((((u64b_t)(w64)) >>16) & 0xFF) << 40) |   \
132 | 	(((((u64b_t)(w64)) >>24) & 0xFF) << 32) |   \
133 | 	(((((u64b_t)(w64)) >>32) & 0xFF) << 24) |   \
134 | 	(((((u64b_t)(w64)) >>40) & 0xFF) << 16) |   \
135 | 	(((((u64b_t)(w64)) >>48) & 0xFF) <<  8) |   \
136 | 	(((((u64b_t)(w64)) >>56) & 0xFF)      ) )
137 | #else
138 | #define Skein_Swap64(w64)  (w64)
139 | #endif
140 | #endif  /* ifndef Skein_Swap64 */
141 | 
142 | 
143 | #ifndef Skein_Put64_LSB_First
144 | void    Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
145 | #ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
146 | 	{ /* this version is fully portable (big-endian or little-endian), but slow */
147 | 	size_t n;
148 | 
149 | 	for (n=0;n<bCnt;n++)
150 | 		dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
151 | 	}
152 | #else
153 | 	;    /* output only the function prototype */
154 | #endif
155 | #endif   /* ifndef Skein_Put64_LSB_First */
156 | 
157 | 
158 | #ifndef Skein_Get64_LSB_First
159 | void    Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
160 | #ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
161 | 	{ /* this version is fully portable (big-endian or little-endian), but slow */
162 | 	size_t n;
163 | 
164 | 	for (n=0;n<8*wCnt;n+=8)
165 | 		dst[n/8] = (((u64b_t) src[n  ])      ) +
166 | 				   (((u64b_t) src[n+1]) <<  8) +
167 | 				   (((u64b_t) src[n+2]) << 16) +
168 | 				   (((u64b_t) src[n+3]) << 24) +
169 | 				   (((u64b_t) src[n+4]) << 32) +
170 | 				   (((u64b_t) src[n+5]) << 40) +
171 | 				   (((u64b_t) src[n+6]) << 48) +
172 | 				   (((u64b_t) src[n+7]) << 56) ;
173 | 	}
174 | #else
175 | 	;    /* output only the function prototype */
176 | #endif
177 | #endif   /* ifndef Skein_Get64_LSB_First */
178 | 
179 | #endif   /* ifndef _SKEIN_PORT_H_ */
180 | 


--------------------------------------------------------------------------------
/src/main/jni/soft_aes.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |   * This program is free software: you can redistribute it and/or modify
  3 |   * it under the terms of the GNU General Public License as published by
  4 |   * the Free Software Foundation, either version 3 of the License, or
  5 |   * any later version.
  6 |   *
  7 |   * This program is distributed in the hope that it will be useful,
  8 |   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 10 |   * GNU General Public License for more details.
 11 |   *
 12 |   * You should have received a copy of the GNU General Public License
 13 |   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 14 |   *
 15 |   * Additional permission under GNU GPL version 3 section 7
 16 |   *
 17 |   * If you modify this Program, or any covered work, by linking or combining
 18 |   * it with OpenSSL (or a modified version of that library), containing parts
 19 |   * covered by the terms of OpenSSL License and SSLeay License, the licensors
 20 |   * of this Program grant you additional permission to convey the resulting work.
 21 |   *
 22 |   */
 23 | 
 24 | /*
 25 |  * Parts of this file are originally copyright (c) 2014-2017, The Monero Project
 26 |  */
 27 | #pragma once
 28 | 
 29 | #ifdef __GNUC__
 30 | #include <x86intrin.h>
 31 | #else
 32 | #include <intrin.h>
 33 | #endif // __GNUC__
 34 | 
 35 | #include <inttypes.h>
 36 | 
 37 | #define saes_data(w) {\
 38 | 	w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
 39 | 	w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
 40 | 	w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
 41 | 	w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
 42 | 	w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
 43 | 	w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
 44 | 	w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
 45 | 	w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
 46 | 	w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
 47 | 	w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
 48 | 	w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
 49 | 	w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
 50 | 	w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
 51 | 	w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
 52 | 	w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
 53 | 	w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
 54 | 	w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
 55 | 	w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
 56 | 	w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
 57 | 	w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
 58 | 	w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
 59 | 	w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
 60 | 	w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
 61 | 	w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
 62 | 	w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
 63 | 	w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
 64 | 	w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
 65 | 	w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
 66 | 	w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
 67 | 	w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
 68 | 	w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
 69 | 	w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
 70 | 
 71 | #define SAES_WPOLY           0x011b
 72 | 
 73 | #define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
 74 | 	((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
 75 | 
 76 | #define saes_f2(x)   ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
 77 | #define saes_f3(x)   (saes_f2(x) ^ x)
 78 | #define saes_h0(x)   (x)
 79 | 
 80 | #define saes_u0(p)   saes_b2w(saes_f2(p),          p,          p, saes_f3(p))
 81 | #define saes_u1(p)   saes_b2w(saes_f3(p), saes_f2(p),          p,          p)
 82 | #define saes_u2(p)   saes_b2w(         p, saes_f3(p), saes_f2(p),          p)
 83 | #define saes_u3(p)   saes_b2w(         p,          p, saes_f3(p), saes_f2(p))
 84 | 
 85 | alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
 86 | alignas(16) const uint8_t  saes_sbox[256] = saes_data(saes_h0);
 87 | 
 88 | static inline __m128i soft_aesenc(__m128i in, __m128i key)
 89 | {
 90 | 	uint32_t x0, x1, x2, x3;
 91 | 	x0 = _mm_cvtsi128_si32(in);
 92 | 	x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
 93 | 	x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
 94 | 	x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
 95 | 
 96 | 	__m128i out = _mm_set_epi32(
 97 | 		(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
 98 | 		(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
 99 | 		(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
100 | 		(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
101 | 
102 | 	return _mm_xor_si128(out, key);
103 | }
104 | 
105 | static inline uint32_t sub_word(uint32_t key)
106 | {
107 | 	return (saes_sbox[key >> 24 ] << 24)   |
108 | 		(saes_sbox[(key >> 16) & 0xff] << 16 ) |
109 | 		(saes_sbox[(key >> 8)  & 0xff] << 8  ) |
110 | 		 saes_sbox[key & 0xff];
111 | }
112 | 
113 | #ifdef __clang__
114 | static inline uint32_t _rotr(uint32_t value, uint32_t amount)
115 | {
116 | 	return (value >> amount) | (value << ((32 - amount) & 31));
117 | }
118 | #endif
119 | 
120 | static inline __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
121 | {
122 | 	uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
123 | 	uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
124 | 	return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1);
125 | }
126 | 


--------------------------------------------------------------------------------
/src/main/resources/simplelogger.properties:
--------------------------------------------------------------------------------
1 | org.slf4j.simpleLogger.logFile=System.out
2 | org.slf4j.simpleLogger.showDateTime=true
3 | org.slf4j.simpleLogger.dateTimeFormat=[HH:mm:ss]
4 | org.slf4j.simpleLogger.showThreadName=false
5 | org.slf4j.simpleLogger.showLogName=false
6 | org.slf4j.simpleLogger.levelInBrackets=true


--------------------------------------------------------------------------------
/src/main/resources/unix/x64/libcryptonight.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netindev/drill/714f2e736457bde90fe610921cce1b4be5c2761c/src/main/resources/unix/x64/libcryptonight.so


--------------------------------------------------------------------------------
/src/main/resources/win/x64/cryptonight.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netindev/drill/714f2e736457bde90fe610921cce1b4be5c2761c/src/main/resources/win/x64/cryptonight.dll


--------------------------------------------------------------------------------