├── .gitignore ├── AudioProcessor.java ├── Bessel.java ├── ComplexNumber.java ├── Denoiser.java ├── DenoisingExample.java ├── LICENSE ├── README.md ├── Utils.java ├── WavFile.java ├── WavFileException.java └── data ├── fac_lom.wav ├── fulcrum1_in_pcm_11ks.wav ├── int_noisy.wav ├── int_noisy_enhanced.wav ├── noisy.wav ├── noisy_enhanced.wav ├── stereo_test.wav └── stereo_test_enhanced.wav /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | -------------------------------------------------------------------------------- /AudioProcessor.java: -------------------------------------------------------------------------------- 1 | interface AudioProcessor { 2 | double[] process(double[] input); 3 | } -------------------------------------------------------------------------------- /Bessel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * JScience - Java(TM) Tools and Libraries for the Advancement of Sciences. 3 | * Copyright (C) 2006 - JScience (http://jscience.org/) 4 | * All rights reserved. 5 | * 6 | * Permission to use, copy, modify, and distribute this software is 7 | * freely granted, provided that this notice is preserved. 8 | */ 9 | 10 | public class Bessel { 11 | 12 | /** 13 | * Evaluates a Chebyshev series. 14 | * @param x value at which to evaluate series 15 | * @param series the coefficients of the series 16 | */ 17 | 18 | public static double chebyshev(double x, double series[]) { 19 | double twox, b0 = 0.0, b1 = 0.0, b2 = 0.0; 20 | twox = 2 * x; 21 | for (int i = series.length - 1; i > -1; i--) { 22 | b2 = b1; 23 | b1 = b0; 24 | b0 = twox * b1 - b2 + series[i]; 25 | } 26 | return 0.5 * (b0 - b2); 27 | } 28 | 29 | /** 30 | * Modified Bessel function of first kind, order zero. 31 | * Based on the NETLIB Fortran function besi0 written by W. Fullerton. 32 | */ 33 | 34 | public static double modBesselFirstZero(double x) { 35 | double y = Math.abs(x); 36 | if (y > 3.0) 37 | return Math.exp(y) * expModBesselFirstZero(x); 38 | else 39 | return 2.75 + chebyshev(y * y / 4.5 - 1.0, bi0cs); 40 | } 41 | 42 | /** 43 | * Exponential scaled modified Bessel function of first kind, order zero. 44 | * Based on the NETLIB Fortran function besi0e written by W. Fullerton. 45 | */ 46 | 47 | private static double expModBesselFirstZero(double x) { 48 | final double y = Math.abs(x); 49 | if (y > 3.0) { 50 | if (y > 8.0) 51 | return (0.375 + chebyshev(16.0 / y - 1.0, ai02cs)) / Math.sqrt(y); 52 | else 53 | return (0.375 + chebyshev((48.0 / y - 11.0) / 5.0, ai0cs)) / Math.sqrt(y); 54 | } else 55 | return Math.exp(-y) * (2.75 + chebyshev(y * y / 4.5 - 1.0, bi0cs)); 56 | } 57 | 58 | /** 59 | * Modified Bessel function of first kind, order one. 60 | * Based on the NETLIB Fortran function besi0 written by W. Fullerton. 61 | */ 62 | 63 | public static double modBesselFirstOne(double x) { 64 | final double y = Math.abs(x); 65 | if (y > 3.0) 66 | return Math.exp(y) * expModBesselFirstOne(x); 67 | else if (y == 0.0) 68 | return 0.0; 69 | else 70 | return x * (0.875 + chebyshev(y * y / 4.5 - 1.0, bi1cs)); 71 | } 72 | 73 | /** 74 | * Exponential scaled modified Bessel function of first kind, order one. 75 | * Based on the NETLIB Fortran function besi1e written by W. Fullerton. 76 | */ 77 | 78 | private static double expModBesselFirstOne(double x) { 79 | final double y = Math.abs(x); 80 | if (y > 3.0) { 81 | if (y > 8.0) 82 | return x / y * (0.375 + chebyshev(16.0 / y - 1.0, ai12cs)) / Math.sqrt(y); 83 | else 84 | return x / y * (0.375 + chebyshev((48.0 / y - 11.0) / 5.0, ai1cs)) / Math.sqrt(y); 85 | } else if (y == 0.0) 86 | return 0.0; 87 | else 88 | return Math.exp(-y) * x * (0.875 + chebyshev(y * y / 4.5 - 1.0, bi1cs)); 89 | } 90 | 91 | // CHEBYSHEV SERIES 92 | 93 | // series for ai0 on the interval 1.25000d-01 to 3.33333d-01 94 | // with weighted error 7.87e-17 95 | // log weighted error 16.10 96 | // significant figures required 14.69 97 | // decimal places required 16.76 98 | 99 | private final static double ai0cs[] = { 100 | 0.07575994494023796, 101 | 0.00759138081082334, 102 | 0.00041531313389237, 103 | 0.00001070076463439, 104 | -0.00000790117997921, 105 | -0.00000078261435014, 106 | 0.00000027838499429, 107 | 0.00000000825247260, 108 | -0.00000001204463945, 109 | 0.00000000155964859, 110 | 0.00000000022925563, 111 | -0.00000000011916228, 112 | 0.00000000001757854, 113 | 0.00000000000112822, 114 | -0.00000000000114684, 115 | 0.00000000000027155, 116 | -0.00000000000002415, 117 | -0.00000000000000608, 118 | 0.00000000000000314, 119 | -0.00000000000000071, 120 | 0.00000000000000007 121 | }; 122 | 123 | // series for ai02 on the interval 0. to 1.25000d-01 124 | // with weighted error 3.79e-17 125 | // log weighted error 16.42 126 | // significant figures required 14.86 127 | // decimal places required 17.09 128 | private final static double ai02cs[] = { 129 | 0.05449041101410882, 130 | 0.00336911647825569, 131 | 0.00006889758346918, 132 | 0.00000289137052082, 133 | 0.00000020489185893, 134 | 0.00000002266668991, 135 | 0.00000000339623203, 136 | 0.00000000049406022, 137 | 0.00000000001188914, 138 | -0.00000000003149915, 139 | -0.00000000001321580, 140 | -0.00000000000179419, 141 | 0.00000000000071801, 142 | 0.00000000000038529, 143 | 0.00000000000001539, 144 | -0.00000000000004151, 145 | -0.00000000000000954, 146 | 0.00000000000000382, 147 | 0.00000000000000176, 148 | -0.00000000000000034, 149 | -0.00000000000000027, 150 | 0.00000000000000003 151 | }; 152 | 153 | // series for ai1 on the interval 1.25000d-01 to 3.33333d-01 154 | // with weighted error 6.98e-17 155 | // log weighted error 16.16 156 | // significant figures required 14.53 157 | // decimal places required 16.82 158 | 159 | private final static double ai1cs[] = { 160 | -0.02846744181881479, 161 | -0.01922953231443221, 162 | -0.00061151858579437, 163 | -0.00002069971253350, 164 | 0.00000858561914581, 165 | 0.00000104949824671, 166 | -0.00000029183389184, 167 | -0.00000001559378146, 168 | 0.00000001318012367, 169 | -0.00000000144842341, 170 | -0.00000000029085122, 171 | 0.00000000012663889, 172 | -0.00000000001664947, 173 | -0.00000000000166665, 174 | 0.00000000000124260, 175 | -0.00000000000027315, 176 | 0.00000000000002023, 177 | 0.00000000000000730, 178 | -0.00000000000000333, 179 | 0.00000000000000071, 180 | -0.00000000000000006 181 | }; 182 | 183 | // series for ai12 on the interval 0. to 1.25000d-01 184 | // with weighted error 3.55e-17 185 | // log weighted error 16.45 186 | // significant figures required 14.69 187 | // decimal places required 17.12 188 | 189 | private final static double ai12cs[] = { 190 | 0.02857623501828014, 191 | -0.00976109749136147, 192 | -0.00011058893876263, 193 | -0.00000388256480887, 194 | -0.00000025122362377, 195 | -0.00000002631468847, 196 | -0.00000000383538039, 197 | -0.00000000055897433, 198 | -0.00000000001897495, 199 | 0.00000000003252602, 200 | 0.00000000001412580, 201 | 0.00000000000203564, 202 | -0.00000000000071985, 203 | -0.00000000000040836, 204 | -0.00000000000002101, 205 | 0.00000000000004273, 206 | 0.00000000000001041, 207 | -0.00000000000000382, 208 | -0.00000000000000186, 209 | 0.00000000000000033, 210 | 0.00000000000000028, 211 | -0.00000000000000003 212 | }; 213 | 214 | 215 | // series for bi0 on the interval 0. to 9.00000d+00 216 | // with weighted error 2.46e-18 217 | // log weighted error 17.61 218 | // significant figures required 17.90 219 | // decimal places required 18.15 220 | 221 | private final static double bi0cs[] = { 222 | -0.07660547252839144951, 223 | 1.927337953993808270, 224 | 0.2282644586920301339, 225 | 0.01304891466707290428, 226 | 0.00043442709008164874, 227 | 0.00000942265768600193, 228 | 0.00000014340062895106, 229 | 0.00000000161384906966, 230 | 0.00000000001396650044, 231 | 0.00000000000009579451, 232 | 0.00000000000000053339, 233 | 0.00000000000000000245 234 | }; 235 | 236 | 237 | // series for bi1 on the interval 0. to 9.00000d+00 238 | // with weighted error 2.40e-17 239 | // log weighted error 16.62 240 | // significant figures required 16.23 241 | // decimal places required 17.14 242 | 243 | private final static double bi1cs[] = { 244 | -0.001971713261099859, 245 | 0.40734887667546481, 246 | 0.034838994299959456, 247 | 0.001545394556300123, 248 | 0.000041888521098377, 249 | 0.000000764902676483, 250 | 0.000000010042493924, 251 | 0.000000000099322077, 252 | 0.000000000000766380, 253 | 0.000000000000004741, 254 | 0.000000000000000024 255 | }; 256 | 257 | public static void main(String[] args) { 258 | double y = modBesselFirstZero(1); 259 | double z = modBesselFirstOne(1); 260 | System.out.println(y); 261 | System.out.println(z); 262 | } 263 | } -------------------------------------------------------------------------------- /ComplexNumber.java: -------------------------------------------------------------------------------- 1 | /** 2 | * ComplexNumber is a class which implements complex numbers in Java. 3 | * It includes basic operations that can be performed on complex numbers such as, 4 | * addition, subtraction, multiplication, conjugate, modulus and squaring. 5 | * The data type for Complex Numbers. 6 | *

7 | * The features of this library include:
8 | * 15 | * 16 | * @author Abdul Fatir 17 | * @version 1.1 18 | * 19 | */ 20 | public class ComplexNumber { 21 | /** 22 | * Used in format(int) to format the complex number as x+yi 23 | */ 24 | public static final int XY = 0; 25 | /** 26 | * Used in format(int) to format the complex number as R.cis(theta), where theta is arg(z) 27 | */ 28 | public static final int RCIS = 1; 29 | /** 30 | * The real, Re(z), part of the ComplexNumber. 31 | */ 32 | private double real; 33 | /** 34 | * The imaginary, Im(z), part of the ComplexNumber. 35 | */ 36 | private double imaginary; 37 | /** 38 | * Constructs a new ComplexNumber object with both real and imaginary parts 0 (z = 0 + 0i). 39 | */ 40 | public ComplexNumber() { 41 | real = 0.0; 42 | imaginary = 0.0; 43 | } 44 | 45 | /** 46 | * Constructs a new ComplexNumber object. 47 | * @param real the real part, Re(z), of the complex number 48 | * @param imaginary the imaginary part, Im(z), of the complex number 49 | */ 50 | 51 | public ComplexNumber(double real, double imaginary) { 52 | this.real = real; 53 | this.imaginary = imaginary; 54 | } 55 | 56 | public ComplexNumber(double real) { 57 | this.real = real; 58 | this.imaginary = 0; 59 | } 60 | 61 | /** 62 | * Adds another ComplexNumber to the current complex number. 63 | * @param z the complex number to be added to the current complex number 64 | */ 65 | 66 | public void add(ComplexNumber z) { 67 | set(add(this, z)); 68 | } 69 | 70 | /** 71 | * Subtracts another ComplexNumber from the current complex number. 72 | * @param z the complex number to be subtracted from the current complex number 73 | */ 74 | 75 | public void subtract(ComplexNumber z) { 76 | set(subtract(this, z)); 77 | } 78 | 79 | /** 80 | * Multiplies another ComplexNumber to the current complex number. 81 | * @param z the complex number to be multiplied to the current complex number 82 | */ 83 | 84 | public void multiply(ComplexNumber z) { 85 | set(multiply(this, z)); 86 | } 87 | /** 88 | * Divides the current ComplexNumber by another ComplexNumber. 89 | * @param z the divisor 90 | */ 91 | public void divide(ComplexNumber z) { 92 | set(divide(this, z)); 93 | } 94 | /** 95 | * Sets the value of current complex number to the passed complex number. 96 | * @param z the complex number 97 | */ 98 | public void set(ComplexNumber z) { 99 | this.real = z.real; 100 | this.imaginary = z.imaginary; 101 | } 102 | /** 103 | * Adds two ComplexNumber. 104 | * @param z1 the first ComplexNumber. 105 | * @param z2 the second ComplexNumber. 106 | * @return the resultant ComplexNumber (z1 + z2). 107 | */ 108 | public static ComplexNumber add(ComplexNumber z1, ComplexNumber z2) { 109 | return new ComplexNumber(z1.real + z2.real, z1.imaginary + z2.imaginary); 110 | } 111 | 112 | /** 113 | * Subtracts one ComplexNumber from another. 114 | * @param z1 the first ComplexNumber. 115 | * @param z2 the second ComplexNumber. 116 | * @return the resultant ComplexNumber (z1 - z2). 117 | */ 118 | public static ComplexNumber subtract(ComplexNumber z1, ComplexNumber z2) { 119 | return new ComplexNumber(z1.real - z2.real, z1.imaginary - z2.imaginary); 120 | } 121 | /** 122 | * Multiplies one ComplexNumber to another. 123 | * @param z1 the first ComplexNumber. 124 | * @param z2 the second ComplexNumber. 125 | * @return the resultant ComplexNumber (z1 * z2). 126 | */ 127 | public static ComplexNumber multiply(ComplexNumber z1, ComplexNumber z2) { 128 | double _real = z1.real * z2.real - z1.imaginary * z2.imaginary; 129 | double _imaginary = z1.real * z2.imaginary + z1.imaginary * z2.real; 130 | return new ComplexNumber(_real, _imaginary); 131 | } 132 | 133 | public ComplexNumber times(double alpha) { 134 | return new ComplexNumber(alpha*this.real,alpha*this.imaginary); 135 | } 136 | /** 137 | * Divides one ComplexNumber by another. 138 | * @param z1 the first ComplexNumber. 139 | * @param z2 the second ComplexNumber. 140 | * @return the resultant ComplexNumber (z1 / z2). 141 | */ 142 | public static ComplexNumber divide(ComplexNumber z1, ComplexNumber z2) { 143 | ComplexNumber output = multiply(z1, z2.conjugate()); 144 | double div = Math.pow(z2.mod(), 2); 145 | return new ComplexNumber(output.real / div, output.imaginary / div); 146 | } 147 | 148 | /** 149 | * The complex conjugate of the current complex number. 150 | * @return a ComplexNumber object which is the conjugate of the current complex number 151 | */ 152 | 153 | public ComplexNumber conjugate() { 154 | return new ComplexNumber(this.real, -this.imaginary); 155 | } 156 | 157 | /** 158 | * The modulus, magnitude or the absolute value of current complex number. 159 | * @return the magnitude or modulus of current complex number 160 | */ 161 | 162 | public double mod() { 163 | return Math.sqrt(Math.pow(this.real, 2) + Math.pow(this.imaginary, 2)); 164 | } 165 | 166 | /** 167 | * The square of the current complex number. 168 | * @return a ComplexNumber which is the square of the current complex number. 169 | */ 170 | 171 | public ComplexNumber square() { 172 | double _real = this.real * this.real - this.imaginary * this.imaginary; 173 | double _imaginary = 2 * this.real * this.imaginary; 174 | return new ComplexNumber(_real, _imaginary); 175 | } 176 | /** 177 | * @return the complex number in x + yi format 178 | */ 179 | @Override 180 | public String toString() { 181 | String re = this.real + ""; 182 | String im = ""; 183 | if (this.imaginary < 0) 184 | im = this.imaginary + "i"; 185 | else 186 | im = "+" + this.imaginary + "i"; 187 | return re + im; 188 | } 189 | /** 190 | * Calculates the exponential of the ComplexNumber 191 | * @param z The input complex number 192 | * @return a ComplexNumber which is e^(input z) 193 | */ 194 | public static ComplexNumber exp(ComplexNumber z) { 195 | double a = z.real; 196 | double b = z.imaginary; 197 | double r = Math.exp(a); 198 | a = r * Math.cos(b); 199 | b = r * Math.sin(b); 200 | return new ComplexNumber(a, b); 201 | } 202 | /** 203 | * Calculates the ComplexNumber to the passed integer power. 204 | * @param z The input complex number 205 | * @param power The power. 206 | * @return a ComplexNumber which is (z)^power 207 | */ 208 | public static ComplexNumber pow(ComplexNumber z, int power) { 209 | ComplexNumber output = new ComplexNumber(z.getRe(), z.getIm()); 210 | for (int i = 1; i < power; i++) { 211 | double _real = output.real * z.real - output.imaginary * z.imaginary; 212 | double _imaginary = output.real * z.imaginary + output.imaginary * z.real; 213 | output = new ComplexNumber(_real, _imaginary); 214 | } 215 | return output; 216 | } 217 | /** 218 | * Calculates the sine of the ComplexNumber 219 | * @param z the input complex number 220 | * @return a ComplexNumber which is the sine of z. 221 | */ 222 | public static ComplexNumber sin(ComplexNumber z) { 223 | double x = Math.exp(z.imaginary); 224 | double x_inv = 1 / x; 225 | double r = Math.sin(z.real) * (x + x_inv) / 2; 226 | double i = Math.cos(z.real) * (x - x_inv) / 2; 227 | return new ComplexNumber(r, i); 228 | } 229 | /** 230 | * Calculates the cosine of the ComplexNumber 231 | * @param z the input complex number 232 | * @return a ComplexNumber which is the cosine of z. 233 | */ 234 | public static ComplexNumber cos(ComplexNumber z) { 235 | double x = Math.exp(z.imaginary); 236 | double x_inv = 1 / x; 237 | double r = Math.cos(z.real) * (x + x_inv) / 2; 238 | double i = -Math.sin(z.real) * (x - x_inv) / 2; 239 | return new ComplexNumber(r, i); 240 | } 241 | /** 242 | * Calculates the tangent of the ComplexNumber 243 | * @param z the input complex number 244 | * @return a ComplexNumber which is the tangent of z. 245 | */ 246 | public static ComplexNumber tan(ComplexNumber z) { 247 | return divide(sin(z), cos(z)); 248 | } 249 | /** 250 | * Calculates the co-tangent of the ComplexNumber 251 | * @param z the input complex number 252 | * @return a ComplexNumber which is the co-tangent of z. 253 | */ 254 | public static ComplexNumber cot(ComplexNumber z) { 255 | return divide(new ComplexNumber(1, 0), tan(z)); 256 | } 257 | /** 258 | * Calculates the secant of the ComplexNumber 259 | * @param z the input complex number 260 | * @return a ComplexNumber which is the secant of z. 261 | */ 262 | public static ComplexNumber sec(ComplexNumber z) { 263 | return divide(new ComplexNumber(1, 0), cos(z)); 264 | } 265 | /** 266 | * Calculates the co-secant of the ComplexNumber 267 | * @param z the input complex number 268 | * @return a ComplexNumber which is the co-secant of z. 269 | */ 270 | public static ComplexNumber cosec(ComplexNumber z) { 271 | return divide(new ComplexNumber(1, 0), sin(z)); 272 | } 273 | /** 274 | * The real part of ComplexNumber 275 | * @return the real part of the complex number 276 | */ 277 | public double getRe() { 278 | return this.real; 279 | } 280 | /** 281 | * The imaginary part of ComplexNumber 282 | * @return the imaginary part of the complex number 283 | */ 284 | public double getIm() { 285 | return this.imaginary; 286 | } 287 | /** 288 | * The argument/phase of the current complex number. 289 | * @return arg(z) - the argument of current complex number 290 | */ 291 | public double getArg() { 292 | return Math.atan2(imaginary, real); 293 | } 294 | /** 295 | * Parses the String as a ComplexNumber of type x+yi. 296 | * @param s the input complex number as string 297 | * @return a ComplexNumber which is represented by the string. 298 | */ 299 | public static ComplexNumber parseComplex(String s) { 300 | s = s.replaceAll(" ", ""); 301 | ComplexNumber parsed = null; 302 | if (s.contains(String.valueOf("+")) || (s.contains(String.valueOf("-")) && s.lastIndexOf('-') > 0)) { 303 | String re = ""; 304 | String im = ""; 305 | s = s.replaceAll("i", ""); 306 | s = s.replaceAll("I", ""); 307 | if (s.indexOf('+') > 0) { 308 | re = s.substring(0, s.indexOf('+')); 309 | im = s.substring(s.indexOf('+') + 1, s.length()); 310 | parsed = new ComplexNumber(Double.parseDouble(re), Double.parseDouble(im)); 311 | } else if (s.lastIndexOf('-') > 0) { 312 | re = s.substring(0, s.lastIndexOf('-')); 313 | im = s.substring(s.lastIndexOf('-') + 1, s.length()); 314 | parsed = new ComplexNumber(Double.parseDouble(re), -Double.parseDouble(im)); 315 | } 316 | } else { 317 | // Pure imaginary number 318 | if (s.endsWith("i") || s.endsWith("I")) { 319 | s = s.replaceAll("i", ""); 320 | s = s.replaceAll("I", ""); 321 | parsed = new ComplexNumber(0, Double.parseDouble(s)); 322 | } 323 | // Pure real number 324 | else { 325 | parsed = new ComplexNumber(Double.parseDouble(s), 0); 326 | } 327 | } 328 | return parsed; 329 | } 330 | /** 331 | * Checks if the passed ComplexNumber is equal to the current. 332 | * @param z the complex number to be checked 333 | * @return true if they are equal, false otherwise 334 | */ 335 | @Override 336 | public final boolean equals(Object z) { 337 | if (!(z instanceof ComplexNumber)) 338 | return false; 339 | ComplexNumber a = (ComplexNumber) z; 340 | return (real == a.real) && (imaginary == a.imaginary); 341 | } 342 | /** 343 | * The inverse/reciprocal of the complex number. 344 | * @return the reciprocal of current complex number. 345 | */ 346 | public ComplexNumber inverse() { 347 | return divide(new ComplexNumber(1, 0), this); 348 | } 349 | /** 350 | * Formats the Complex number as x+yi or r.cis(theta) 351 | * @param format_id the format ID ComplexNumber.XY or ComplexNumber.RCIS. 352 | * @return a string representation of the complex number 353 | * @throws IllegalArgumentException if the format_id does not match. 354 | */ 355 | public String format(int format_id) throws IllegalArgumentException { 356 | String out = ""; 357 | if (format_id == XY) 358 | out = toString(); 359 | else if (format_id == RCIS) { 360 | out = mod() + " cis(" + getArg() + ")"; 361 | } else { 362 | throw new IllegalArgumentException("Unknown Complex Number format."); 363 | } 364 | return out; 365 | } 366 | 367 | public static void main(String[] args) { 368 | 369 | } 370 | } 371 | -------------------------------------------------------------------------------- /Denoiser.java: -------------------------------------------------------------------------------- 1 | import java.util.Arrays; 2 | 3 | public class Denoiser implements AudioProcessor { 4 | 5 | private static int windowLength; 6 | private static double overlapRatio; 7 | private int fs; 8 | private double noSpeechDuration; 9 | private int noSpeechSegments; 10 | private boolean speechFlag; 11 | private boolean noiseFlag; 12 | private int noiseCounter; 13 | private int noiseLength; 14 | private int noiseThreshold; 15 | private int frameReset; 16 | 17 | 18 | public Denoiser(int fs) { 19 | windowLength = 256; 20 | overlapRatio = 0.5; 21 | this.fs = fs; 22 | this.noSpeechDuration = 0.4; 23 | this.noSpeechSegments = (int)Math.floor((noSpeechDuration * fs - windowLength) / (overlapRatio * windowLength) + 1); 24 | this.speechFlag = false; 25 | this.noiseFlag = false; 26 | this.noiseLength = 9; 27 | this.noiseThreshold = 3; 28 | this.frameReset = 8; 29 | } 30 | 31 | public Denoiser(int fs, double noSpeechDuration) { 32 | windowLength = 256; 33 | overlapRatio = 0.5; 34 | this.fs = fs; 35 | this.noSpeechDuration = noSpeechDuration; 36 | this.noSpeechSegments = (int)Math.floor((noSpeechDuration * fs - windowLength) / (overlapRatio * windowLength) + 1); 37 | this.speechFlag = false; 38 | this.noiseFlag = false; 39 | this.noiseLength = 9; 40 | this.noiseThreshold = 3; 41 | this.frameReset = 8; 42 | } 43 | 44 | public Denoiser(int fs, double noSpeechDuration, int noiseLength, int noiseThreshold, int frameReset) { 45 | windowLength = 256; 46 | overlapRatio = 0.5; 47 | this.fs = fs; 48 | this.noSpeechDuration = noSpeechDuration; 49 | this.noSpeechSegments = (int)Math.floor((noSpeechDuration * fs - windowLength) / (overlapRatio * windowLength) + 1); 50 | this.speechFlag = false; 51 | this.noiseFlag = false; 52 | this.noiseLength = noiseLength; 53 | this.noiseThreshold = noiseThreshold; 54 | this.frameReset = frameReset; 55 | } 56 | 57 | /** 58 | * Process function for multi-channel inputs 59 | * @param input Multi channel signal 60 | * @return enhanced Multi channel enhanced signal 61 | */ 62 | 63 | public double[][] process(double[][] input) { 64 | int channels = input.length; 65 | int signalLength = input[0].length; 66 | 67 | double[][] enhanced = new double[channels][signalLength]; 68 | 69 | for (int i = 0; i < channels; i++) { 70 | enhanced[i] = process(input[i]); 71 | } 72 | return enhanced; 73 | } 74 | 75 | /** 76 | * Performs speech denoising on array of doubles based on Speech Enhancement Using a Minimum Mean-Square 77 | * Error Short-Time Spectral Amplitude Estimator by Eprahiam and Malah 78 | * @param input Double array of signal values 79 | * @return enhanced Double array of enhanced signal array 80 | */ 81 | 82 | public double[] process(double[] input) { 83 | double[][] sampledSignalWindowed = segmentSignal(input, windowLength, overlapRatio); 84 | int frames = sampledSignalWindowed[0].length; 85 | ComplexNumber[][] sampledSignalWindowedComplex = new ComplexNumber[frames][windowLength]; 86 | ComplexNumber[][] signalFFT = new ComplexNumber[frames][windowLength]; 87 | double[][] signalFFTMagnitude = new double[frames][windowLength]; 88 | double[][] signalFFTPhase = new double[frames][windowLength]; 89 | 90 | for (int i = 0; i < frames; i++) { 91 | for (int k = 0; k < windowLength; k++) { 92 | sampledSignalWindowedComplex[i][k] = new ComplexNumber(sampledSignalWindowed[k][i]); //convert samples to Complex form for fft and perform transpose 93 | } 94 | } 95 | 96 | for (int i = 0; i < frames; i++) { 97 | signalFFT[i] = Utils.fft(sampledSignalWindowedComplex[i]); 98 | } 99 | 100 | for (int i = 0; i < frames; i++) { 101 | for (int k = 0; k < windowLength; k++) { 102 | signalFFTMagnitude[i][k] = signalFFT[i][k].mod(); 103 | signalFFTPhase[i][k] = signalFFT[i][k].getArg(); 104 | } 105 | } 106 | 107 | double[][] noise = new double[this.noSpeechSegments][windowLength]; 108 | double[][] noiseMag = new double[this.noSpeechSegments][windowLength]; 109 | 110 | noise = Arrays.copyOfRange(signalFFTMagnitude, 0, this.noSpeechSegments); 111 | 112 | for (int i = 0; i < this.noSpeechSegments; i++) { 113 | for (int k = 0; k < windowLength; k++) { 114 | noiseMag[i][k] = Math.pow(noise[i][k], 2); 115 | } 116 | } 117 | 118 | double[] noiseMean = Utils.mean(noise, 0); 119 | double[] noiseVar = Utils.mean(noiseMag, 0); 120 | 121 | double gamma1p5 = Utils.gamma(1.5); 122 | double[] gain = new double[windowLength]; 123 | double[] gamma = new double[windowLength]; 124 | double[] gammaUpdate = new double[windowLength]; 125 | double[] xi = new double[windowLength]; 126 | double[] nu = new double[windowLength]; 127 | 128 | double alpha = 0.96; //Smoothing factor 129 | 130 | Arrays.fill(gain, 1); 131 | Arrays.fill(gamma, 1); 132 | 133 | double[][] enhancedSpectrum = new double[frames][windowLength]; 134 | 135 | for (int i = 0; i < frames; i++) { 136 | if (i < this.noSpeechSegments) { 137 | this.speechFlag = false; 138 | this.noiseCounter = 100; 139 | } else { 140 | vad(signalFFTMagnitude[i], noiseMean); 141 | } 142 | 143 | if (this.speechFlag == false) { // Noise estimate update during segements with no speech 144 | for (int k = 0; k < windowLength; k++) { 145 | noiseMean[k] = (this.noiseLength * noiseMean[k] + signalFFTMagnitude[i][k]) / (this.noiseLength + 1); 146 | noiseVar[k] = (this.noiseLength * noiseVar[k] + Math.pow(signalFFTMagnitude[i][k], 2)) / (this.noiseLength + 1); 147 | } 148 | } 149 | 150 | for (int k = 0; k < windowLength; k++) { 151 | gammaUpdate[k] = Math.pow(signalFFTMagnitude[i][k], 2) / noiseVar[k]; 152 | xi[k] = alpha * Math.pow(gain[k], 2) * gamma[k] + (1 - alpha) * Math.max(gammaUpdate[k] - 1, 0); 153 | gamma[k] = gammaUpdate[k]; 154 | nu[k] = gamma[k] * xi[k] / (xi[k] + 1); 155 | gain[k] = (gamma1p5 * Math.sqrt(nu[k])) / gamma[k] * Math.exp(-1 * nu[k] / 2) * ((1 + nu[k]) * Bessel.modBesselFirstZero(nu[k] / 2) + nu[k] * Bessel.modBesselFirstOne(nu[k] / 2)); 156 | 157 | if (Double.isNaN(gain[k]) || Double.isInfinite(gain[k])) { 158 | gain[k] = xi[k] / (xi[k] + 1); 159 | } 160 | 161 | enhancedSpectrum[i][k] = gain[k] * signalFFTMagnitude[i][k]; 162 | } 163 | } 164 | ComplexNumber[][] enhancedSpectrumComplex = new ComplexNumber[frames][windowLength]; 165 | 166 | for (int i = 0; i < frames; i++) { 167 | for (int k = 0; k < windowLength; k++) { 168 | enhancedSpectrumComplex[i][k] = ComplexNumber.exp(new ComplexNumber(0, signalFFTPhase[i][k])); 169 | enhancedSpectrumComplex[i][k] = enhancedSpectrumComplex[i][k].times(enhancedSpectrum[i][k]); 170 | } 171 | } 172 | 173 | ComplexNumber[][] enhancedSegments = new ComplexNumber[frames][windowLength]; 174 | double[][] enhancedSegmentsReal = new double[windowLength][frames]; 175 | 176 | for (int i = 0; i < frames; i++) { 177 | enhancedSegments[i] = Utils.ifft(enhancedSpectrumComplex[i]); 178 | } 179 | 180 | for (int i = 0; i < frames; i++) { 181 | for (int k = 0; k < windowLength; k++) { 182 | enhancedSegmentsReal[k][i] = enhancedSegments[i][k].getRe(); //convert samples to real from and perform tranpose 183 | } 184 | } 185 | 186 | double[] enhanced = overlapAndAdd(enhancedSegmentsReal, overlapRatio); 187 | return enhanced; 188 | } 189 | 190 | /** 191 | * Voice activity detector that predicts wheter the current frame contains speech or not 192 | * @param frame Current frame 193 | * @param noise Current noise estimate 194 | * @param noiseCounter Number of previous noise frames 195 | * @param noiseThreshold User set threshold 196 | * @param frameReset Number of frames after which speech flag is reset 197 | */ 198 | private void vad(double[] frame, double[] noise) { 199 | double[] spectralDifference = new double[windowLength]; 200 | 201 | for (int i = 0; i < windowLength; i++) { 202 | spectralDifference[i] = 20 * (Math.log10(frame[i]) - Math.log10(noise[i])); 203 | if (spectralDifference[i] < 0) { 204 | spectralDifference[i] = 0; 205 | } 206 | } 207 | 208 | double diff = Utils.mean(spectralDifference); 209 | 210 | if (diff < this.noiseThreshold) { 211 | this.noiseFlag = true; 212 | this.noiseCounter++; 213 | } else { 214 | this.noiseFlag = false; 215 | this.noiseCounter = 0; 216 | } 217 | 218 | if (this.noiseCounter > this.frameReset) { 219 | this.speechFlag = false; 220 | } else { 221 | this.speechFlag = true; 222 | } 223 | } 224 | 225 | /** 226 | * Windows sampled signal using overlapping Hamming windows 227 | * @param ss The sampled signal 228 | * @param ww The window width 229 | * @param or The overlap ratio 230 | * @return seg The overlapping windowed segments 231 | */ 232 | 233 | private double[][] segmentSignal(double[] ss, int ww, double or ) { 234 | int len = ss.length; 235 | double d = 1 - or; 236 | int frames = (int)(Math.floor(len - ww) / ww / d); 237 | int start = 0; 238 | int stop = 0; 239 | 240 | double[] window = Utils.hamming(ww); 241 | double[][] seg = new double[ww][frames]; 242 | 243 | for (int i = 0; i < frames; i++) { 244 | start = (int)(i * ww * or ); 245 | stop = start + ww; 246 | for (int k = 0; k < ww; k++) { 247 | seg[k][i] = ss[start + k] * window[k]; 248 | } 249 | } 250 | return seg; 251 | } 252 | 253 | /** 254 | * Overlap and add segments to calculate reconstructed signal 255 | * @param segments 2D array of overlapping signal segments 256 | * @param or overlap ratio 257 | * @return reconstructedSignal Speech signal post speech denoising 258 | */ 259 | 260 | private double[] overlapAndAdd(double[][] segments, double or ) { 261 | int ww = segments.length; 262 | int frames = segments[0].length; 263 | int start = 0; 264 | int stop = 0; 265 | int signalLength = (int)(ww * (1 - or ) * (frames - 1) + ww); 266 | 267 | double[] reconstructedSignal = new double[signalLength]; 268 | 269 | for (int i = 0; i < frames; i++) { 270 | start = (int)(i * ww * or ); 271 | stop = start + ww; 272 | for (int k = 0; k < ww; k++) { 273 | reconstructedSignal[start + k] = reconstructedSignal[start + k] + segments[k][i]; 274 | } 275 | } 276 | return reconstructedSignal; 277 | } 278 | 279 | public static void main(String[] args) { 280 | } 281 | } -------------------------------------------------------------------------------- /DenoisingExample.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | import java.util.Arrays; 3 | public class DenoisingExample { 4 | public static void main(String[] args) { 5 | try { 6 | String filename = args[0]; 7 | int pos = filename.lastIndexOf("."); 8 | String justName = pos > 0 ? filename.substring(0, pos) : filename; 9 | // Open the wav file specified as the first argument 10 | WavFile wavFile = WavFile.openWavFile(new File(filename)); 11 | 12 | // Display information about the wav file 13 | wavFile.display(); 14 | int fs = (int)wavFile.getSampleRate(); 15 | int validBits = wavFile.getValidBits(); 16 | // Get the number of audio channels in the wav file 17 | int numChannels = wavFile.getNumChannels(); 18 | int numFrames = (int)wavFile.getNumFrames(); 19 | int samples = numFrames * numChannels; 20 | 21 | double[] buffer = new double[samples]; 22 | double[][] splitChannel = new double[numChannels][numFrames]; 23 | 24 | int framesRead; 25 | framesRead = wavFile.readFrames(buffer, numFrames); 26 | 27 | 28 | // Close the wavFile 29 | wavFile.close(); 30 | double[] enhancedSingle; 31 | double[][] enhanced; 32 | 33 | WavFile output = WavFile.newWavFile(new File(justName+"_enhanced.wav"), numChannels, numFrames, validBits, fs); 34 | 35 | Denoiser denoiser = new Denoiser(fs,0.4,9,2,8); 36 | if (numChannels == 1) { 37 | enhancedSingle = denoiser.process(buffer); 38 | output.writeFrames(enhancedSingle, enhancedSingle.length); 39 | } else { 40 | for (int i = 0; i < numFrames; i++) { 41 | for (int k = 0; k < numChannels; k++) { 42 | splitChannel[k][i] = buffer[i * numChannels + k]; 43 | } 44 | } 45 | enhanced = denoiser.process(splitChannel); 46 | 47 | for (int i = 0; i < enhanced[0].length; i++) { 48 | for (int k = 0; k < numChannels; k++) { 49 | buffer[i * numChannels + k] = enhanced[k][i]; 50 | } 51 | 52 | } 53 | output.writeFrames(buffer, buffer.length); 54 | } 55 | 56 | } catch (Exception e) { 57 | System.err.println(e); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Alexander Chiu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | AudioProcessor 2 | ============== 3 | 4 | Java library for speech enhancement 5 | 6 | Usage 7 | ============== 8 | java DenoisingExample [wavfile] 9 | 10 | Denoiser denoiser = new Denoiser(parameters...);
11 | float[][] input = ...;
12 | float[][] output = denoiser.process(input);
13 | 14 | Denoiser denoiser = new Denoiser(parameters...);
15 | float[] input = ...;
16 | float[] output = denoiser.process(input);
17 | 18 | References 19 | ============== 20 | [1] Forward Backward Decision Directed Approach For Speech Enhancement Richard C. Hendriks, Richard Heusdens and Jesper Jensen
21 | [2] Ephraim, Y.; Malah, D., "Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator," Acoustics, Speech and Signal Processing, IEEE Transactions on , vol.32, no.6, pp.1109,1121, Dec 1984 22 | doi: 10.1109/TASSP.1984.1164453
23 | [3] ComplexNumber library by Abdul Fatir https://github.com/abdulfatir/jcomplexnumber
24 | [4] SpecialMath library by JScience http://jscience.org/
25 | [5] WavFile IO class by A.Greensted http://www.labbookpages.co.uk/audio/javaWavFiles.html
26 | [6] MMSE STSA by Esfandiar Zavarehei http://www.mathworks.com/matlabcentral/fileexchange/10143-mmse-stsa 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /Utils.java: -------------------------------------------------------------------------------- 1 | import java.util.Arrays; 2 | public class Utils { 3 | public Utils() { 4 | } 5 | 6 | /** 7 | * Calculates N samples of Hamming window 8 | * @param N Number of samples 9 | * @return samples Array of samples 10 | */ 11 | 12 | public static double[] hamming(int N) { 13 | double[] samples = new double[N]; 14 | 15 | for (int k = 0; k < N; k++) { 16 | samples[k] = 0.54 - 0.46 * Math.cos(2 * Math.PI * k / (N - 1)); 17 | } 18 | 19 | return samples; 20 | } 21 | 22 | /** 23 | * Performs Cooley–Tukey FFT algorithm and returns array of complex numbers 24 | * @param x Radix-2 length N signal array 25 | * @return X Radix-2 length N signal spectrum 26 | */ 27 | 28 | public static ComplexNumber[] fft(ComplexNumber[] x) { 29 | int N = x.length; 30 | 31 | if ( N == 1 ) { 32 | return new ComplexNumber[] {x[0]}; 33 | } 34 | 35 | if (N % 2 != 0) { 36 | throw new RuntimeException("Sample points N not radix-2"); 37 | } 38 | 39 | ComplexNumber[] xEven = new ComplexNumber[N / 2]; 40 | ComplexNumber[] xOdd = new ComplexNumber[N / 2]; 41 | 42 | for (int k = 0; k < N / 2; k++) { 43 | xEven[k] = x[2 * k]; 44 | xOdd[k] = x[2 * k + 1]; 45 | } 46 | 47 | ComplexNumber[] Ek = fft(xEven); 48 | ComplexNumber[] Ok = fft(xOdd); 49 | ComplexNumber[] X = new ComplexNumber[N]; 50 | 51 | for (int k = 0; k < N / 2; k++) { 52 | ComplexNumber tf = ComplexNumber.exp(new ComplexNumber(0, -2 * Math.PI * k / N)); 53 | X[k] = ComplexNumber.add(Ek[k], ComplexNumber.multiply(tf, Ok[k])); 54 | X[k + N / 2] = ComplexNumber.subtract(Ek[k], ComplexNumber.multiply(tf, Ok[k])); 55 | } 56 | 57 | return X; 58 | } 59 | 60 | /** 61 | * Perfoms ifft using fft function 62 | * @param X Radix-2 length N signal spectrum 63 | * @return x Radix-2 length N signal array 64 | */ 65 | 66 | public static ComplexNumber[] ifft(ComplexNumber[] X) { 67 | int N = X.length; 68 | ComplexNumber[] x = new ComplexNumber[N]; 69 | 70 | for (int k = 0; k < N; k ++) { 71 | x[k] = X[k].conjugate(); 72 | } 73 | 74 | x = fft(x); 75 | 76 | for (int k = 0; k < N; k ++) { 77 | x[k] = x[k].conjugate(); 78 | x[k] = x[k].times(1.0 / N); 79 | } 80 | 81 | return x; 82 | } 83 | 84 | /** 85 | * [Lanczos approximation of gamma function 86 | * @param x Input value 87 | * @return a Value of gamma function at x 88 | */ 89 | 90 | public static double gamma(double x) { 91 | int g = 7; 92 | double[] p = {0.99999999999980993, 676.5203681218851, -1259.1392167224028, 93 | 771.32342877765313, -176.61502916214059, 12.507343278686905, 94 | -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7 95 | }; 96 | 97 | if (x < 0.5) { 98 | return Math.PI / (Math.sin(Math.PI * x) * gamma(1 - x)); 99 | } 100 | 101 | x -= 1; 102 | double a = p[0]; 103 | double t = x + g + 0.5; 104 | for (int i = 1; i < p.length; i++) { 105 | a += p[i] / (x + i); 106 | } 107 | 108 | return Math.sqrt(2 * Math.PI) * Math.pow(t, x + 0.5) * Math.exp(-t) * a; 109 | } 110 | 111 | /** 112 | * Calculates mean of multidimensional array across axis of choice 113 | * @param data Multidimensional data array 114 | * @param axis Axis to calculate mean across 115 | * @return mean Array of mean values 116 | */ 117 | 118 | public static double[] mean(double[][] data, int axis) { 119 | double[] mean; 120 | int rows = data.length; 121 | int cols = data[0].length; 122 | 123 | if (axis != 1 && axis != 0) { 124 | throw new RuntimeException("Unknown axis. Choose 0 for columns or 1 for rows"); 125 | } 126 | 127 | if (axis == 0) { 128 | mean = new double[cols]; 129 | for (int c = 0; c < cols; c++) { 130 | double sum = 0.0; 131 | for (int r = 0; r < rows; r++) { 132 | sum += data[r][c]; 133 | } 134 | mean[c] = sum / rows; 135 | } 136 | } else { 137 | mean = new double[rows]; 138 | 139 | for (int r = 0; r < rows; r++) { 140 | double sum = 0.0; 141 | for (int c = 0; c < cols; c++) { 142 | sum += data[r][c]; 143 | } 144 | mean[r] = sum / cols; 145 | } 146 | } 147 | return mean; 148 | } 149 | public static double mean(double[] data){ 150 | double sum = 0.0; 151 | for(int i =0;i float conversion 31 | private double floatOffset; // Offset factor used for int <-> float conversion 32 | private boolean wordAlignAdjust; // Specify if an extra byte at the end of the data chunk is required for word alignment 33 | 34 | // Wav Header 35 | private int numChannels; // 2 bytes unsigned, 0x0001 (1) to 0xFFFF (65,535) 36 | private long sampleRate; // 4 bytes unsigned, 0x00000001 (1) to 0xFFFFFFFF (4,294,967,295) 37 | // Although a java int is 4 bytes, it is signed, so need to use a long 38 | private int blockAlign; // 2 bytes unsigned, 0x0001 (1) to 0xFFFF (65,535) 39 | private int validBits; // 2 bytes unsigned, 0x0002 (2) to 0xFFFF (65,535) 40 | 41 | // Buffering 42 | private byte[] buffer; // Local buffer used for IO 43 | private int bufferPointer; // Points to the current position in local buffer 44 | private int bytesRead; // Bytes read after last read into local buffer 45 | private long frameCounter; // Current number of frames read or written 46 | 47 | // Cannot instantiate WavFile directly, must either use newWavFile() or openWavFile() 48 | private WavFile() 49 | { 50 | buffer = new byte[BUFFER_SIZE]; 51 | } 52 | 53 | public int getNumChannels() 54 | { 55 | return numChannels; 56 | } 57 | 58 | public long getNumFrames() 59 | { 60 | return numFrames; 61 | } 62 | 63 | public long getFramesRemaining() 64 | { 65 | return numFrames - frameCounter; 66 | } 67 | 68 | public long getSampleRate() 69 | { 70 | return sampleRate; 71 | } 72 | 73 | public int getValidBits() 74 | { 75 | return validBits; 76 | } 77 | 78 | public static WavFile newWavFile(File file, int numChannels, long numFrames, int validBits, long sampleRate) throws IOException, WavFileException 79 | { 80 | // Instantiate new Wavfile and initialise 81 | WavFile wavFile = new WavFile(); 82 | wavFile.file = file; 83 | wavFile.numChannels = numChannels; 84 | wavFile.numFrames = numFrames; 85 | wavFile.sampleRate = sampleRate; 86 | wavFile.bytesPerSample = (validBits + 7) / 8; 87 | wavFile.blockAlign = wavFile.bytesPerSample * numChannels; 88 | wavFile.validBits = validBits; 89 | 90 | // Sanity check arguments 91 | if (numChannels < 1 || numChannels > 65535) throw new WavFileException("Illegal number of channels, valid range 1 to 65536"); 92 | if (numFrames < 0) throw new WavFileException("Number of frames must be positive"); 93 | if (validBits < 2 || validBits > 65535) throw new WavFileException("Illegal number of valid bits, valid range 2 to 65536"); 94 | if (sampleRate < 0) throw new WavFileException("Sample rate must be positive"); 95 | 96 | // Create output stream for writing data 97 | wavFile.oStream = new FileOutputStream(file); 98 | 99 | // Calculate the chunk sizes 100 | long dataChunkSize = wavFile.blockAlign * numFrames; 101 | long mainChunkSize = 4 + // Riff Type 102 | 8 + // Format ID and size 103 | 16 + // Format data 104 | 8 + // Data ID and size 105 | dataChunkSize; 106 | 107 | // Chunks must be word aligned, so if odd number of audio data bytes 108 | // adjust the main chunk size 109 | if (dataChunkSize % 2 == 1) { 110 | mainChunkSize += 1; 111 | wavFile.wordAlignAdjust = true; 112 | } 113 | else { 114 | wavFile.wordAlignAdjust = false; 115 | } 116 | 117 | // Set the main chunk size 118 | putLE(RIFF_CHUNK_ID, wavFile.buffer, 0, 4); 119 | putLE(mainChunkSize, wavFile.buffer, 4, 4); 120 | putLE(RIFF_TYPE_ID, wavFile.buffer, 8, 4); 121 | 122 | // Write out the header 123 | wavFile.oStream.write(wavFile.buffer, 0, 12); 124 | 125 | // Put format data in buffer 126 | long averageBytesPerSecond = sampleRate * wavFile.blockAlign; 127 | 128 | putLE(FMT_CHUNK_ID, wavFile.buffer, 0, 4); // Chunk ID 129 | putLE(16, wavFile.buffer, 4, 4); // Chunk Data Size 130 | putLE(1, wavFile.buffer, 8, 2); // Compression Code (Uncompressed) 131 | putLE(numChannels, wavFile.buffer, 10, 2); // Number of channels 132 | putLE(sampleRate, wavFile.buffer, 12, 4); // Sample Rate 133 | putLE(averageBytesPerSecond, wavFile.buffer, 16, 4); // Average Bytes Per Second 134 | putLE(wavFile.blockAlign, wavFile.buffer, 20, 2); // Block Align 135 | putLE(validBits, wavFile.buffer, 22, 2); // Valid Bits 136 | 137 | // Write Format Chunk 138 | wavFile.oStream.write(wavFile.buffer, 0, 24); 139 | 140 | // Start Data Chunk 141 | putLE(DATA_CHUNK_ID, wavFile.buffer, 0, 4); // Chunk ID 142 | putLE(dataChunkSize, wavFile.buffer, 4, 4); // Chunk Data Size 143 | 144 | // Write Format Chunk 145 | wavFile.oStream.write(wavFile.buffer, 0, 8); 146 | 147 | // Calculate the scaling factor for converting to a normalised double 148 | if (wavFile.validBits > 8) 149 | { 150 | // If more than 8 validBits, data is signed 151 | // Conversion required multiplying by magnitude of max positive value 152 | wavFile.floatOffset = 0; 153 | wavFile.floatScale = Long.MAX_VALUE >> (64 - wavFile.validBits); 154 | } 155 | else 156 | { 157 | // Else if 8 or less validBits, data is unsigned 158 | // Conversion required dividing by max positive value 159 | wavFile.floatOffset = 1; 160 | wavFile.floatScale = 0.5 * ((1 << wavFile.validBits) - 1); 161 | } 162 | 163 | // Finally, set the IO State 164 | wavFile.bufferPointer = 0; 165 | wavFile.bytesRead = 0; 166 | wavFile.frameCounter = 0; 167 | wavFile.ioState = IOState.WRITING; 168 | 169 | return wavFile; 170 | } 171 | 172 | public static WavFile openWavFile(File file) throws IOException, WavFileException 173 | { 174 | // Instantiate new Wavfile and store the file reference 175 | WavFile wavFile = new WavFile(); 176 | wavFile.file = file; 177 | 178 | // Create a new file input stream for reading file data 179 | wavFile.iStream = new FileInputStream(file); 180 | 181 | // Read the first 12 bytes of the file 182 | int bytesRead = wavFile.iStream.read(wavFile.buffer, 0, 12); 183 | if (bytesRead != 12) throw new WavFileException("Not enough wav file bytes for header"); 184 | 185 | // Extract parts from the header 186 | long riffChunkID = getLE(wavFile.buffer, 0, 4); 187 | long chunkSize = getLE(wavFile.buffer, 4, 4); 188 | long riffTypeID = getLE(wavFile.buffer, 8, 4); 189 | 190 | // Check the header bytes contains the correct signature 191 | if (riffChunkID != RIFF_CHUNK_ID) throw new WavFileException("Invalid Wav Header data, incorrect riff chunk ID"); 192 | if (riffTypeID != RIFF_TYPE_ID) throw new WavFileException("Invalid Wav Header data, incorrect riff type ID"); 193 | 194 | // Check that the file size matches the number of bytes listed in header 195 | if (file.length() != chunkSize+8) { 196 | throw new WavFileException("Header chunk size (" + chunkSize + ") does not match file size (" + file.length() + ")"); 197 | } 198 | 199 | boolean foundFormat = false; 200 | boolean foundData = false; 201 | 202 | // Search for the Format and Data Chunks 203 | while (true) 204 | { 205 | // Read the first 8 bytes of the chunk (ID and chunk size) 206 | bytesRead = wavFile.iStream.read(wavFile.buffer, 0, 8); 207 | if (bytesRead == -1) throw new WavFileException("Reached end of file without finding format chunk"); 208 | if (bytesRead != 8) throw new WavFileException("Could not read chunk header"); 209 | 210 | // Extract the chunk ID and Size 211 | long chunkID = getLE(wavFile.buffer, 0, 4); 212 | chunkSize = getLE(wavFile.buffer, 4, 4); 213 | 214 | // Word align the chunk size 215 | // chunkSize specifies the number of bytes holding data. However, 216 | // the data should be word aligned (2 bytes) so we need to calculate 217 | // the actual number of bytes in the chunk 218 | long numChunkBytes = (chunkSize%2 == 1) ? chunkSize+1 : chunkSize; 219 | 220 | if (chunkID == FMT_CHUNK_ID) 221 | { 222 | // Flag that the format chunk has been found 223 | foundFormat = true; 224 | 225 | // Read in the header info 226 | bytesRead = wavFile.iStream.read(wavFile.buffer, 0, 16); 227 | 228 | // Check this is uncompressed data 229 | int compressionCode = (int) getLE(wavFile.buffer, 0, 2); 230 | if (compressionCode != 1) throw new WavFileException("Compression Code " + compressionCode + " not supported"); 231 | 232 | // Extract the format information 233 | wavFile.numChannels = (int) getLE(wavFile.buffer, 2, 2); 234 | wavFile.sampleRate = getLE(wavFile.buffer, 4, 4); 235 | wavFile.blockAlign = (int) getLE(wavFile.buffer, 12, 2); 236 | wavFile.validBits = (int) getLE(wavFile.buffer, 14, 2); 237 | 238 | if (wavFile.numChannels == 0) throw new WavFileException("Number of channels specified in header is equal to zero"); 239 | if (wavFile.blockAlign == 0) throw new WavFileException("Block Align specified in header is equal to zero"); 240 | if (wavFile.validBits < 2) throw new WavFileException("Valid Bits specified in header is less than 2"); 241 | if (wavFile.validBits > 64) throw new WavFileException("Valid Bits specified in header is greater than 64, this is greater than a long can hold"); 242 | 243 | // Calculate the number of bytes required to hold 1 sample 244 | wavFile.bytesPerSample = (wavFile.validBits + 7) / 8; 245 | if (wavFile.bytesPerSample * wavFile.numChannels != wavFile.blockAlign) 246 | throw new WavFileException("Block Align does not agree with bytes required for validBits and number of channels"); 247 | 248 | // Account for number of format bytes and then skip over 249 | // any extra format bytes 250 | numChunkBytes -= 16; 251 | if (numChunkBytes > 0) wavFile.iStream.skip(numChunkBytes); 252 | } 253 | else if (chunkID == DATA_CHUNK_ID) 254 | { 255 | // Check if we've found the format chunk, 256 | // If not, throw an exception as we need the format information 257 | // before we can read the data chunk 258 | if (foundFormat == false) throw new WavFileException("Data chunk found before Format chunk"); 259 | 260 | // Check that the chunkSize (wav data length) is a multiple of the 261 | // block align (bytes per frame) 262 | if (chunkSize % wavFile.blockAlign != 0) throw new WavFileException("Data Chunk size is not multiple of Block Align"); 263 | 264 | // Calculate the number of frames 265 | wavFile.numFrames = chunkSize / wavFile.blockAlign; 266 | 267 | // Flag that we've found the wave data chunk 268 | foundData = true; 269 | 270 | break; 271 | } 272 | else 273 | { 274 | // If an unknown chunk ID is found, just skip over the chunk data 275 | wavFile.iStream.skip(numChunkBytes); 276 | } 277 | } 278 | 279 | // Throw an exception if no data chunk has been found 280 | if (foundData == false) throw new WavFileException("Did not find a data chunk"); 281 | 282 | // Calculate the scaling factor for converting to a normalised double 283 | if (wavFile.validBits > 8) 284 | { 285 | // If more than 8 validBits, data is signed 286 | // Conversion required dividing by magnitude of max negative value 287 | wavFile.floatOffset = 0; 288 | wavFile.floatScale = 1 << (wavFile.validBits - 1); 289 | } 290 | else 291 | { 292 | // Else if 8 or less validBits, data is unsigned 293 | // Conversion required dividing by max positive value 294 | wavFile.floatOffset = -1; 295 | wavFile.floatScale = 0.5 * ((1 << wavFile.validBits) - 1); 296 | } 297 | 298 | wavFile.bufferPointer = 0; 299 | wavFile.bytesRead = 0; 300 | wavFile.frameCounter = 0; 301 | wavFile.ioState = IOState.READING; 302 | 303 | return wavFile; 304 | } 305 | 306 | // Get and Put little endian data from local buffer 307 | // ------------------------------------------------ 308 | private static long getLE(byte[] buffer, int pos, int numBytes) 309 | { 310 | numBytes --; 311 | pos += numBytes; 312 | 313 | long val = buffer[pos] & 0xFF; 314 | for (int b=0 ; b>= 8; 325 | pos ++; 326 | } 327 | } 328 | 329 | // Sample Writing and Reading 330 | // -------------------------- 331 | private void writeSample(long val) throws IOException 332 | { 333 | for (int b=0 ; b>= 8; 343 | bufferPointer ++; 344 | } 345 | } 346 | 347 | private long readSample() throws IOException, WavFileException 348 | { 349 | long val = 0; 350 | 351 | for (int b=0 ; b 0) oStream.write(buffer, 0, bufferPointer); 673 | 674 | // If an extra byte is required for word alignment, add it to the end 675 | if (wordAlignAdjust) oStream.write(0); 676 | 677 | // Close the stream and set to null 678 | oStream.close(); 679 | oStream = null; 680 | } 681 | 682 | // Flag that the stream is closed 683 | ioState = IOState.CLOSED; 684 | } 685 | 686 | public void display() 687 | { 688 | display(System.out); 689 | } 690 | 691 | public void display(PrintStream out) 692 | { 693 | out.printf("File: %s\n", file); 694 | out.printf("Channels: %d, Frames: %d\n", numChannels, numFrames); 695 | out.printf("IO State: %s\n", ioState); 696 | out.printf("Sample Rate: %d, Block Align: %d\n", sampleRate, blockAlign); 697 | out.printf("Valid Bits: %d, Bytes per sample: %d\n", validBits, bytesPerSample); 698 | } 699 | 700 | public static void main(String[] args) 701 | { 702 | if (args.length < 1) 703 | { 704 | System.err.println("Must supply filename"); 705 | System.exit(1); 706 | } 707 | 708 | try 709 | { 710 | for (String filename : args) 711 | { 712 | WavFile readWavFile = openWavFile(new File(filename)); 713 | readWavFile.display(); 714 | 715 | long numFrames = readWavFile.getNumFrames(); 716 | int numChannels = readWavFile.getNumChannels(); 717 | int validBits = readWavFile.getValidBits(); 718 | long sampleRate = readWavFile.getSampleRate(); 719 | 720 | WavFile writeWavFile = newWavFile(new File("out.wav"), numChannels, numFrames, validBits, sampleRate); 721 | 722 | final int BUF_SIZE = 5001; 723 | 724 | // int[] buffer = new int[BUF_SIZE * numChannels]; 725 | // long[] buffer = new long[BUF_SIZE * numChannels]; 726 | double[] buffer = new double[BUF_SIZE * numChannels]; 727 | 728 | int framesRead = 0; 729 | int framesWritten = 0; 730 | 731 | do 732 | { 733 | framesRead = readWavFile.readFrames(buffer, BUF_SIZE); 734 | framesWritten = writeWavFile.writeFrames(buffer, BUF_SIZE); 735 | System.out.printf("%d %d\n", framesRead, framesWritten); 736 | } 737 | while (framesRead != 0); 738 | 739 | readWavFile.close(); 740 | writeWavFile.close(); 741 | } 742 | 743 | WavFile writeWavFile = newWavFile(new File("out2.wav"), 1, 10, 23, 44100); 744 | double[] buffer = new double[10]; 745 | writeWavFile.writeFrames(buffer, 10); 746 | writeWavFile.close(); 747 | } 748 | catch (Exception e) 749 | { 750 | System.err.println(e); 751 | e.printStackTrace(); 752 | } 753 | } 754 | } 755 | -------------------------------------------------------------------------------- /WavFileException.java: -------------------------------------------------------------------------------- 1 | public class WavFileException extends Exception 2 | { 3 | public WavFileException() 4 | { 5 | super(); 6 | } 7 | 8 | public WavFileException(String message) 9 | { 10 | super(message); 11 | } 12 | 13 | public WavFileException(String message, Throwable cause) 14 | { 15 | super(message, cause); 16 | } 17 | 18 | public WavFileException(Throwable cause) 19 | { 20 | super(cause); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /data/fac_lom.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/fac_lom.wav -------------------------------------------------------------------------------- /data/fulcrum1_in_pcm_11ks.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/fulcrum1_in_pcm_11ks.wav -------------------------------------------------------------------------------- /data/int_noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/int_noisy.wav -------------------------------------------------------------------------------- /data/int_noisy_enhanced.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/int_noisy_enhanced.wav -------------------------------------------------------------------------------- /data/noisy.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/noisy.wav -------------------------------------------------------------------------------- /data/noisy_enhanced.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/noisy_enhanced.wav -------------------------------------------------------------------------------- /data/stereo_test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/stereo_test.wav -------------------------------------------------------------------------------- /data/stereo_test_enhanced.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexanderchiu/AudioProcessor/f34ced1beca680f852e52ec6f47cb646f8b97dc8/data/stereo_test_enhanced.wav --------------------------------------------------------------------------------