├── .gitignore ├── README.md ├── pom.xml └── src ├── main └── java │ └── edu │ └── ecnu │ └── pbf │ ├── CommonConstants.java │ ├── PbfMain.java │ ├── base │ ├── BasicBloomFilter.java │ ├── PersistentBloomFilter.java │ └── impl │ │ ├── Beta0.java │ │ ├── Beta1.java │ │ ├── Beta2.java │ │ └── TemporalRangeBloomFilterV2.java │ ├── element │ └── DualTuple.java │ └── util │ ├── BinaryUtil.java │ ├── BinaryUtil2.java │ ├── DataGen.java │ ├── MathUtil.java │ ├── MurmurHash3.java │ ├── OptimizationUtil.java │ ├── PbfUtil.java │ ├── PbfUtil2.java │ ├── RandomGenerator.java │ └── ResultUtil.java └── test └── java └── edu └── ecnu └── pbf ├── AppTest.java └── Beta1Test.java /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | .classpath 3 | .project 4 | /.settings/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Persistent Bloom Filter # 2 | 3 | This project implements the PBF data structures described in the paper [Persistent Bloom Filter: Membership Testing for the Entire History](http://www.cs.utah.edu/~lifeifei/papers/pbf.pdf). 4 | 5 | ### Implemented data structures and algorithms ### 6 | 7 | * Basic Bloom Filter 8 | * PBF-1 9 | * PBF-2 10 | * Bit Allocation Algorithm 11 | * Online Parameter Learning Algorithm 12 | 13 | ### Build & Run ### 14 | 15 | Requires Java 7 (or above) 16 | 17 | ### Contributor ### 18 | 19 | * Jinwei Guo: guojinwei [at] stu [dot] ecnu [dot] edu [dot] cn 20 | * Yanqing Peng: ypeng [at] cs [dot] utah [dot] edu 21 | 22 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | edu.ecnu.pbf 5 | pbf-java-2 6 | jar 7 | 1.0-SNAPSHOT 8 | pbf-java-2 9 | http://maven.apache.org 10 | 11 | 12 | UTF-8 13 | UTF-8 14 | 15 | UTF-8 16 | 17 | 18 | 19 | junit 20 | junit 21 | 4.11 22 | test 23 | 24 | 25 | org.apache.commons 26 | commons-math3 27 | 3.6.1 28 | 29 | 30 | it.unimi.dsi 31 | fastutil 32 | 6.5.15 33 | 34 | 35 | 36 | ${project.artifactId} 37 | 38 | 39 | org.codehaus.mojo 40 | exec-maven-plugin 41 | 1.2.1 42 | 43 | 44 | 45 | exec 46 | 47 | 48 | 49 | 50 | java 51 | 52 | -classpath 53 | 54 | 55 | edu.ecnu.pbf.PbfMain 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-shade-plugin 62 | 3.0.0 63 | 64 | 65 | package 66 | 67 | shade 68 | 69 | 70 | 71 | 72 | edu.ecnu.pbf.PbfMain 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/CommonConstants.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf; 2 | 3 | public class CommonConstants 4 | { 5 | public static final int[] g = { 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 6 | 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 7 | 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 1 << 26, 8 | 1 << 27, 1 << 28, 1 << 29, 1 << 30 }; 9 | 10 | public static final int K_MAX = 16; //16; 11 | public static final int MAX_LEVEL_NUM = 30; 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/PbfMain.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf; 2 | 3 | /** 4 | * Hello world! 5 | * 6 | */ 7 | public class PbfMain 8 | { 9 | public static void main( String[] args ) 10 | { 11 | System.out.println( "Hello World!" ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/BasicBloomFilter.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base; 2 | 3 | import java.math.BigDecimal; 4 | import java.nio.charset.Charset; 5 | import java.util.BitSet; 6 | 7 | import edu.ecnu.pbf.util.MurmurHash3; 8 | 9 | /** 10 | * This is a basic implementation of Bloom filter. 11 | * 12 | * @author guo 13 | * 14 | */ 15 | public class BasicBloomFilter 16 | { 17 | private BitSet bs; 18 | private int bitNum; 19 | private int hashNum; 20 | 21 | public BasicBloomFilter() 22 | { 23 | this.bitNum = 1000000; 24 | this.hashNum = 4; 25 | bs = new BitSet(this.bitNum); 26 | } 27 | 28 | /** 29 | * Create a Bloom filter with the provided parameters. 30 | * 31 | * @param bitNum 32 | * the capacity of the Bloom filter 33 | * @param hashNum 34 | * the number of hash function used in Bloom filter 35 | */ 36 | public BasicBloomFilter(int bitNum, int hashNum) 37 | { 38 | this.bitNum = bitNum; 39 | this.hashNum = hashNum; 40 | bs = new BitSet(this.bitNum); 41 | } 42 | 43 | /** 44 | * Get the member variable used to store bits. 45 | * 46 | * @return BitSet 47 | */ 48 | public BitSet getBitSet() 49 | { 50 | return this.bs; 51 | } 52 | 53 | public void setBitSet(BitSet bs) 54 | { 55 | this.bs = bs; 56 | } 57 | 58 | /** 59 | * Get the number of bits in this Bloom filter. 60 | * 61 | * @return int 62 | */ 63 | public int getSize() 64 | { 65 | return this.bitNum; 66 | } 67 | 68 | 69 | /** 70 | * Get the number of hash function in this Bloom filter. 71 | * 72 | * @return int 73 | */ 74 | public int getHashNum() 75 | { 76 | return this.hashNum; 77 | } 78 | 79 | /** 80 | * Insert an element into the Bloom filter. 81 | * 82 | * @param element 83 | */ 84 | public void insert(byte[] element) 85 | { 86 | for (int i = 0; i < hashNum; i++) 87 | { 88 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 89 | element.length, i); 90 | int index = getIndex(hash); 91 | bs.set(index); 92 | } 93 | } 94 | 95 | /** 96 | * Insert a String element into the Bloom filter. 97 | * 98 | * @param element 99 | */ 100 | public void insertString(String element) 101 | { 102 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 103 | 104 | for (int i = 0; i < hashNum; i++) 105 | { 106 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 107 | byteElement.length, i); 108 | int index = getIndex(hash); 109 | bs.set(index); 110 | } 111 | 112 | } 113 | 114 | /** 115 | * Query whether an element exists in this Bloom filter. 116 | * 117 | * @param element 118 | * @return boolean true if the element exists in this bloom filter 119 | */ 120 | public boolean query(byte[] element) 121 | { 122 | boolean isExist = true; 123 | for (int i = 0; i < hashNum; i++) 124 | { 125 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 126 | element.length, i); 127 | int index = getIndex(hash); 128 | if (false == bs.get(index)) 129 | { 130 | isExist = false; 131 | break; 132 | } 133 | } 134 | 135 | return isExist; 136 | } 137 | 138 | /** 139 | * Query whether a {@code String} element exists in this Bloom filter. 140 | * 141 | * @param element 142 | * @return boolean true if the element exists in this bloom filter 143 | */ 144 | public boolean queryString(String element) 145 | { 146 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 147 | boolean isExist = true; 148 | //int hash = 0; 149 | for (int i = 0; i < hashNum; i++) 150 | { 151 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 152 | byteElement.length, i); 153 | int index = getIndex(hash); 154 | if (false == bs.get(index)) 155 | { 156 | isExist = false; 157 | break; 158 | } 159 | } 160 | 161 | return isExist; 162 | } 163 | 164 | /** 165 | * Get the probability of false positive. 166 | * 167 | * @return BigDecimal 168 | */ 169 | public BigDecimal getPFP(int insertNum) 170 | { 171 | return BigDecimal.ONE; 172 | } 173 | 174 | private int getIndex(int hash) 175 | { 176 | int index = hash % bitNum; 177 | return index >= 0 ? index : -index; 178 | } 179 | 180 | public static void main(String[] args) 181 | { 182 | BasicBloomFilter bbf = new BasicBloomFilter(100000, 5); 183 | bbf.insert("guo".getBytes()); 184 | System.out.println(bbf.query("guo".getBytes())); 185 | System.out.println(bbf.query("g".getBytes())); 186 | } 187 | 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/PersistentBloomFilter.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base; 2 | 3 | /** 4 | * A variant of Bloom filter. Persistent Bloom filter (PBF) can support temporal 5 | * sets queries. We use Set[startTime, endTime] to denote the set of elements 6 | * which arrived within [startTime, endTime]. We can use PBF to check whether an 7 | * element exists in Set[startTime, endTime]. False positives are possible, but 8 | * false negatives are not. 9 | * 10 | * @author guo 11 | * 12 | */ 13 | public interface PersistentBloomFilter 14 | { 15 | /** 16 | * Insert an element with a timestamp into this persistent bloom filter. 17 | * 18 | * @param element 19 | * @param timestamp 20 | * @return 21 | */ 22 | public boolean insertString(String element, long timestamp); 23 | 24 | /** 25 | * Query whether an element appears at the given time point. 26 | * 27 | * @param element 28 | * @param timestamp 29 | * @return 30 | */ 31 | public boolean queryString(String element, long timestamp); 32 | 33 | /** 34 | * Query whether an element with a timestamp is a member of the range set 35 | * Set[startTime, endTime]. 36 | * 37 | * @param element 38 | * @param startTime 39 | * @param endTime 40 | * @return 41 | */ 42 | public boolean queryString(String element, long startTime, long endTime); 43 | 44 | /** 45 | * Insert (element, timestamp) pair using the element's bytes to this Persistent Bloom filter. 46 | * @param element 47 | * @param timestamp 48 | * @return boolean 49 | */ 50 | public boolean insert(byte[] element, long timestamp); 51 | 52 | /** 53 | * Query whether an element appears at a time point using its bytes. 54 | * @param element 55 | * @param timestamp 56 | * @return boolean 57 | */ 58 | public boolean query(byte[] element, long timestamp); 59 | 60 | /** 61 | * Query whether an element exists in a temporal range using its bytes. 62 | * @param element 63 | * @param startTime 64 | * @param endTime 65 | * @return boolean 66 | */ 67 | public boolean query(byte[] element, long startTime, long endTime); 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/impl/Beta0.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base.impl; 2 | 3 | import java.nio.charset.Charset; 4 | import java.util.BitSet; 5 | 6 | import edu.ecnu.pbf.base.PersistentBloomFilter; 7 | import edu.ecnu.pbf.util.MurmurHash3; 8 | 9 | /** 10 | * This is a basic implementation of Bloom filter with timestamp of each element. 11 | * 12 | * @author guo 13 | * 14 | */ 15 | public class Beta0 implements PersistentBloomFilter 16 | { 17 | private BitSet bs; 18 | private int bitNum; 19 | private int hashNum; 20 | 21 | public Beta0() 22 | { 23 | this.bitNum = 1000000; 24 | this.hashNum = 4; 25 | bs = new BitSet(bitNum); 26 | } 27 | 28 | /** 29 | * Create a Bloom filter with the provided parameters. 30 | * 31 | * @param bitNum 32 | * the capacity of the Bloom filter 33 | * @param hashNum 34 | * the numbers of hash function used in Bloom filter 35 | */ 36 | public Beta0(int bitNum, int hashNum) 37 | { 38 | this.bitNum = bitNum; 39 | this.hashNum = hashNum; 40 | bs = new BitSet(bitNum); 41 | } 42 | 43 | /** 44 | * Get the member variable used to store bits. 45 | * 46 | * @return BitSet 47 | */ 48 | public BitSet getBitSet() 49 | { 50 | return this.bs; 51 | } 52 | 53 | public void setBitSet(BitSet bs) 54 | { 55 | this.bs = bs; 56 | } 57 | 58 | /** 59 | * Get the number of bits in this Bloom filter. 60 | * 61 | * @return int 62 | */ 63 | public int getSize() 64 | { 65 | return this.bitNum; 66 | } 67 | 68 | /** 69 | * Get the number of hash function in this Bloom filter. 70 | * 71 | * @return int 72 | */ 73 | public int getHashNum() 74 | { 75 | return this.hashNum; 76 | } 77 | 78 | /** 79 | * Insert a String element into the Bloom filter with timestamp. 80 | * 81 | * @param element 82 | * @param timestamp 83 | */ 84 | public boolean insertString(String element, long timestamp) 85 | { 86 | boolean result = true; 87 | // byte[] byteElement = (element + "|" + String.valueOf(timestamp)) 88 | // .getBytes(Charset.forName("UTF-8")); 89 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 90 | 91 | for (int i = 0; i < hashNum; i++) 92 | { 93 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 94 | byteElement.length, (int)(timestamp * this.hashNum + i)); 95 | int index = getIndex(hash); 96 | bs.set(index); 97 | } 98 | return result; 99 | } 100 | 101 | /** 102 | * Query weather a given String element exists in the Bloom filter with 103 | * timestamp 104 | * 105 | * @param element 106 | * @param timestamp 107 | * @return boolean true if the element exists in the given time 108 | */ 109 | public boolean queryString(String element, long timestamp) 110 | { 111 | // byte[] byteElement = (element + "|" + String.valueOf(timestamp)) 112 | // .getBytes(Charset.forName("UTF-8")); 113 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 114 | boolean isExist = true; 115 | for (int i = 0; i < hashNum; i++) 116 | { 117 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 118 | byteElement.length, (int)(timestamp * this.hashNum + i)); 119 | int index = getIndex(hash); 120 | if (false == bs.get(index)) 121 | { 122 | isExist = false; 123 | break; 124 | } 125 | } 126 | 127 | return isExist; 128 | } 129 | 130 | /** 131 | * Query weather a given String element exists in the given time range 132 | * 133 | * @param element 134 | * @param timestamp 135 | * @return boolean true if the element exists in the given time range 136 | */ 137 | public boolean queryString(String element, long startTime, long endTime) 138 | { 139 | boolean isExist = false; 140 | 141 | if (startTime > endTime || startTime < 0 || endTime < 0) 142 | { 143 | isExist = false; 144 | } 145 | else 146 | { 147 | for (long i = startTime; i <= endTime; i++) 148 | { 149 | if (queryString(element, i)) 150 | { 151 | isExist = true; 152 | break; 153 | } 154 | } 155 | } 156 | 157 | return isExist; 158 | } 159 | 160 | /** 161 | * Insert (element, timestamp) pair using the element's bytes to this Persistent Bloom filter. 162 | * @param element 163 | * @param timestamp 164 | * @return boolean 165 | */ 166 | public boolean insert(byte[] element, long timestamp) 167 | { 168 | boolean result = true; 169 | for (int i = 0; i < hashNum; i++) 170 | { 171 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 172 | element.length, (int)(timestamp * this.hashNum + i)); 173 | int index = getIndex(hash); 174 | bs.set(index); 175 | } 176 | return result; 177 | } 178 | 179 | /** 180 | * Query whether an element appears at a time point using its bytes. 181 | * @param element 182 | * @param timestamp 183 | * @return boolean 184 | */ 185 | public boolean query(byte[] element, long timestamp) 186 | { 187 | boolean result = true; 188 | for (int i = 0; i < hashNum; i++) 189 | { 190 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 191 | element.length, (int)(timestamp * this.hashNum + i)); 192 | int index = getIndex(hash); 193 | if (false == bs.get(index)) 194 | { 195 | result = false; 196 | break; 197 | } 198 | } 199 | return result; 200 | } 201 | 202 | /** 203 | * Query whether an element exists in a temporal range using its bytes. 204 | * @param element 205 | * @param startTime 206 | * @param endTime 207 | * @return boolean 208 | */ 209 | public boolean query(byte[] element, long startTime, long endTime) 210 | { 211 | boolean result = false; 212 | if (startTime > endTime || endTime < 0) 213 | { 214 | result = false; 215 | } 216 | else 217 | { 218 | startTime = startTime < 0 ? 0:startTime; 219 | for (long i = startTime; i <= endTime; i++) 220 | { 221 | if (query(element, i)) 222 | { 223 | result = true; 224 | break; 225 | } 226 | } 227 | } 228 | return result; 229 | } 230 | 231 | private int getIndex(int hash) 232 | { 233 | int index = hash % bitNum; 234 | return index >= 0 ? index : -index; 235 | } 236 | 237 | public static void main(String[] args) 238 | { 239 | byte[] element = "guo".getBytes(); 240 | Beta0 b = new Beta0(100000, 6); 241 | b.insert(element, 100); 242 | System.out.println(b.query(element, 100)); 243 | System.out.println(b.query(element, 0, 99)); 244 | System.out.println(b.query(element, 101, 200)); 245 | System.out.println(b.query(element, 0, 100)); 246 | System.out.println(b.query(element, 100, 200)); 247 | System.out.println(b.query(element, 0, 200)); 248 | System.out.println(b.query(element, 200, 215)); 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/impl/Beta1.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base.impl; 2 | 3 | import java.util.ArrayList; 4 | 5 | import edu.ecnu.pbf.CommonConstants; 6 | import edu.ecnu.pbf.base.BasicBloomFilter; 7 | import edu.ecnu.pbf.base.PersistentBloomFilter; 8 | import edu.ecnu.pbf.util.BinaryUtil2; 9 | 10 | public class Beta1 implements PersistentBloomFilter 11 | { 12 | private ArrayList bfArray; 13 | private Beta0 b0; // single bloom filter 14 | 15 | private int totalBitNum; 16 | private int levelNum; // L+1 17 | private int g; 18 | private int gLevel; 19 | private int maxT; 20 | 21 | public Beta1(int bitNum, int levelNum, int g) 22 | { 23 | b0 = new Beta0(bitNum, CommonConstants.K_MAX); 24 | totalBitNum = bitNum; 25 | 26 | this.gLevel = 1; 27 | int tempG = g; 28 | while ((tempG = tempG / 2) > 0) 29 | { 30 | this.gLevel++; 31 | } 32 | int bitNumTmp = bitNum; 33 | this.bfArray = new ArrayList(); 34 | int j = 2; 35 | for (int i = 0; i < (1 << (levelNum - this.gLevel + 1)) - 1; i++) 36 | { 37 | this.bfArray.add(new BasicBloomFilter(bitNumTmp, CommonConstants.K_MAX)); 38 | totalBitNum += bitNumTmp; 39 | // if ((i + 2) % j == 0) 40 | // { 41 | // j = j << 1; 42 | // bitNumTmp = bitNumTmp > 100000 ? bitNumTmp/2:bitNumTmp; 43 | // } 44 | } 45 | 46 | this.levelNum = levelNum; 47 | this.g = g; 48 | this.maxT = CommonConstants.g[levelNum - 1] - 1; 49 | } 50 | 51 | public Beta1(int[] m, int[] k, int levelNum, int g) 52 | { 53 | this.gLevel = 1; 54 | int tempG = g; 55 | while ((tempG = tempG / 2) > 0) 56 | { 57 | this.gLevel++; 58 | } 59 | 60 | b0 = new Beta0(m[0], k[0]); 61 | totalBitNum = m[0]; 62 | 63 | this.bfArray = new ArrayList(); 64 | for (int i = 1; i < m.length; i++) 65 | { 66 | if (m[i] == 0) 67 | { 68 | bfArray.add(new BasicBloomFilter(1, 1)); 69 | } 70 | else 71 | { 72 | bfArray.add(new BasicBloomFilter(m[i], k[i])); 73 | totalBitNum += m[i]; 74 | } 75 | } 76 | 77 | this.levelNum = levelNum; 78 | this.g = g; 79 | this.maxT = CommonConstants.g[levelNum - 1] - 1; 80 | 81 | // System.out.println("total number of bits: " + this.totalBitNum); 82 | // System.out.println("total number of bf: " + this.bfArray.size()); 83 | } 84 | 85 | public boolean insertString(String element, long timestamp) 86 | { 87 | int index = 1; 88 | int start = 0; 89 | int end = this.maxT; 90 | for (int i = 0; i < levelNum - gLevel + 1; i++) 91 | { 92 | bfArray.get(index - 1).insertString(element); 93 | int mid = (start + end) / 2; 94 | if (mid < timestamp) 95 | { 96 | start = mid + 1; 97 | index = index * 2 + 1; 98 | } 99 | else 100 | { 101 | end = mid; 102 | index = index * 2; 103 | } 104 | } 105 | 106 | b0.insertString(element, timestamp); 107 | return true; 108 | } 109 | 110 | public boolean queryString(String element, long timestamp) 111 | { 112 | // TODO 113 | return false; 114 | } 115 | 116 | public boolean queryString(String element, long startTime, long endTime) 117 | { 118 | boolean result = false; 119 | ArrayList indexArray = getBinaryDecomposition((int) startTime, (int) endTime); 120 | for (int i = 0; i < indexArray.size(); i++) 121 | { 122 | int index = indexArray.get(i); 123 | if (bfArray.get(index - 1).queryString(element)) 124 | { 125 | if (i == 0 && index > bfArray.size() / 2 && !b0.queryString(element, startTime, 126 | startTime + this.g - startTime / this.g - 1)) 127 | { 128 | continue; 129 | } 130 | else if ((i == indexArray.size() - 1) 131 | && index > bfArray.size() / 2 132 | && !b0.queryString(element, endTime - endTime / this.g, endTime)) 133 | { 134 | continue; 135 | } 136 | result = true; 137 | break; 138 | } 139 | } 140 | return result; 141 | } 142 | 143 | public boolean insert(byte[] element, long timestamp) 144 | { 145 | boolean result = true; 146 | int index = 1; 147 | int start = 0; 148 | int end = this.maxT; 149 | for (int i = 0; i < levelNum - gLevel + 1; i++) 150 | { 151 | BasicBloomFilter bf = bfArray.get(index - 1); 152 | if (null != bf) 153 | { 154 | bf.insert(element); 155 | } 156 | int mid = (start + end) / 2; 157 | if (mid < timestamp) 158 | { 159 | start = mid + 1; 160 | index = index * 2 + 1; 161 | } 162 | else 163 | { 164 | end = mid; 165 | index = index * 2; 166 | } 167 | } 168 | 169 | b0.insert(element, timestamp); 170 | return result; 171 | } 172 | 173 | public boolean query(byte[] element, long timestamp) 174 | { 175 | // TODO 176 | return false; 177 | } 178 | 179 | public boolean query(byte[] element, long startTime, long endTime) 180 | { 181 | boolean result = false; 182 | ArrayList indexArray = getBinaryDecomposition((int) startTime, (int) endTime); 183 | for (int i = 0; i < indexArray.size(); i++) 184 | { 185 | int index = indexArray.get(i); 186 | BasicBloomFilter bf = bfArray.get(index - 1); 187 | if (null == bf) 188 | { 189 | // continue; 190 | result = true; 191 | break; 192 | } 193 | else if (bf.query(element)) 194 | { 195 | if (i == 0 && index > bfArray.size() / 2 && !b0.query(element, startTime, 196 | startTime + this.g - startTime % this.g - 1)) 197 | { 198 | continue; 199 | } 200 | else if ((i == indexArray.size() - 1) 201 | && index > bfArray.size() / 2 202 | && !b0.query(element, endTime - endTime % this.g, endTime)) 203 | { 204 | continue; 205 | } 206 | result = true; 207 | break; 208 | } 209 | } 210 | return result; 211 | } 212 | 213 | public ArrayList getBinaryDecomposition(int start, int end) 214 | { 215 | ArrayList result = new ArrayList(); 216 | for (int i = start; i <= end;) 217 | { 218 | int level = BinaryUtil2.getBinaryLevel(i, end); // level is from 1 219 | // to L+1 220 | int index = (1 << (this.levelNum - level)) + i / CommonConstants.g[level - 1]; 221 | while (index > bfArray.size()) 222 | { 223 | index = index / 2; 224 | } 225 | result.add(index); 226 | i = i + (1 << (level - 1)); 227 | } 228 | return result; 229 | } 230 | 231 | //////// Basic function//////// 232 | public ArrayList getBfArray() 233 | { 234 | return bfArray; 235 | } 236 | 237 | public Beta0 getB0() 238 | { 239 | return b0; 240 | } 241 | 242 | public int getLevelNum() 243 | { 244 | return levelNum; 245 | } 246 | 247 | /** 248 | * Get the total number of levels required according to the maxT. 249 | * @param maxT start from 0 250 | * @return 251 | */ 252 | public static int getLevelNum(int maxT) 253 | { 254 | int result = 0; 255 | for (int i = 0; i < CommonConstants.g.length; i++) 256 | { 257 | if ((maxT + 1) <= CommonConstants.g[i]) 258 | { 259 | result = i + 1; 260 | break; 261 | } 262 | } 263 | return result; 264 | } 265 | 266 | public int getG() 267 | { 268 | return g; 269 | } 270 | 271 | public int getMaxT() 272 | { 273 | return maxT; 274 | } 275 | 276 | public int getTotalBitNum() 277 | { 278 | return this.totalBitNum; 279 | } 280 | 281 | public static void main(String[] args) 282 | { 283 | byte[] element = "guo".getBytes(); 284 | // int levelNum = Beta1.getLevelNum(86399); 285 | Beta1 b1 = new Beta1(10000, 4, 1); 286 | b1.insert(element, 4); 287 | 288 | System.out.println(b1.query(element, 2, 5)); 289 | System.out.println(b1.query(element, 2, 3)); 290 | System.out.println(b1.query("jin".getBytes(), 1, 6)); 291 | System.out.println(getLevelNum(3)); 292 | 293 | System.out.println("============"); 294 | ArrayList list = b1.getBinaryDecomposition(1, 4); 295 | for (int i = 0; i < list.size(); i++) 296 | { 297 | System.out.println(list.get(i)); 298 | } 299 | 300 | System.out.println(b1.getBinaryDecomposition(0, 0)); 301 | } 302 | 303 | } 304 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/impl/Beta2.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base.impl; 2 | 3 | import java.util.ArrayList; 4 | 5 | import edu.ecnu.pbf.base.PersistentBloomFilter; 6 | import edu.ecnu.pbf.element.DualTuple; 7 | import edu.ecnu.pbf.util.BinaryUtil2; 8 | 9 | /** 10 | * The leaf level is level 0, the top level is level L, where L=logT. 11 | * 12 | * @author Jinwei 13 | * 14 | */ 15 | public class Beta2 implements PersistentBloomFilter 16 | { 17 | public static final int[] g = { 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 18 | 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 19 | 1 << 18, 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 1 << 26, 20 | 1 << 27, 1 << 28, 1 << 29, 1 << 30 }; 21 | 22 | private int bitNum; 23 | private int topLevel; // L 24 | private int[] hashNum; 25 | private int totalBitNum; 26 | 27 | private TemporalRangeBloomFilterV2[] rangeBloomFilter; 28 | 29 | /** 30 | * 31 | * @param bitNum the number of bits for each level 32 | * @param topLevel 33 | * @param hashNum 34 | */ 35 | public Beta2(int bitNum, int hashNum, int topLevel) 36 | { 37 | this.bitNum = bitNum; 38 | this.topLevel = topLevel; 39 | this.hashNum = new int[topLevel + 1]; 40 | for (int i = 0; i <= topLevel; i++) 41 | { 42 | this.hashNum[i] = hashNum; 43 | } 44 | 45 | this.rangeBloomFilter = new TemporalRangeBloomFilterV2[topLevel + 1]; 46 | for (int i = 0; i <= topLevel; i++) 47 | { 48 | rangeBloomFilter[i] = new TemporalRangeBloomFilterV2(this.bitNum, this.hashNum[i], i); 49 | } 50 | } 51 | 52 | public Beta2(int[] bitNum, int[] hashNum, int topLevel) 53 | { 54 | this.totalBitNum = 0; 55 | this.topLevel = topLevel; 56 | this.hashNum = hashNum; 57 | 58 | this.rangeBloomFilter = new TemporalRangeBloomFilterV2[topLevel + 1]; 59 | for (int i = 0; i <= topLevel; i++) 60 | { 61 | if (bitNum[i] == 0) 62 | { 63 | rangeBloomFilter[i] = null; 64 | 65 | } 66 | else 67 | { 68 | this.totalBitNum += bitNum[i]; 69 | rangeBloomFilter[i] = new TemporalRangeBloomFilterV2(bitNum[i], hashNum[i], i); 70 | // System.out.println("level " + i + ": " + bitNum[i]); 71 | } 72 | } 73 | // System.out.println("total bit num: " + totalBitNum); 74 | } 75 | 76 | /** 77 | * According to T, get the topLevel. 78 | * 79 | * @param maxT 80 | * @return 81 | */ 82 | public static int getTopLevel(int maxT) 83 | { 84 | int result = 0; 85 | for (int i = 0; i < g.length; i++) 86 | { 87 | if ((maxT + 1) <= g[i]) 88 | { 89 | result = i; 90 | break; 91 | } 92 | } 93 | return result; 94 | } 95 | 96 | public boolean insertString(String element, long timestamp) 97 | { 98 | for (int i = 0; i <= topLevel; i++) 99 | { 100 | rangeBloomFilter[i].insertString(element, timestamp); 101 | } 102 | 103 | return true; 104 | } 105 | 106 | public boolean queryString(String element, long timestamp) 107 | { 108 | boolean result = false; 109 | if (rangeBloomFilter[0].queryString(element, timestamp)) 110 | { 111 | result = true; 112 | } 113 | return result; 114 | } 115 | 116 | public boolean queryString(String element, long startTime, long endTime) 117 | { 118 | boolean result = false; 119 | long startTimeTemp = startTime; 120 | long endTimeTemp = endTime; 121 | 122 | ArrayList dyadicResult = BinaryUtil2.getBinaryDecomposition((int) startTimeTemp, 123 | (int) endTimeTemp); 124 | 125 | for (int i = 0; i < dyadicResult.size(); i++) 126 | { 127 | int key = dyadicResult.get(i).getFirst(); 128 | int level = dyadicResult.get(i).getSecond(); 129 | if (rangeBloomFilter[level - 1].queryString(element, key)) 130 | { 131 | result = true; 132 | break; 133 | } 134 | } 135 | return result; 136 | } 137 | 138 | public boolean insert(byte[] element, long timestamp) 139 | { 140 | boolean result = true; 141 | for (int i = 0; i <= topLevel; i++) 142 | { 143 | if (null != rangeBloomFilter[i]) 144 | { 145 | rangeBloomFilter[i].insert(element, timestamp); 146 | } 147 | } 148 | return result; 149 | } 150 | 151 | public boolean query(byte[] element, long timestamp) 152 | { 153 | boolean result = false; 154 | if (rangeBloomFilter[0].query(element, timestamp)) 155 | { 156 | result = true; 157 | } 158 | return result; 159 | } 160 | 161 | public boolean query(byte[] element, long startTime, long endTime) 162 | { 163 | boolean result = false; 164 | long startTimeTemp = startTime; 165 | long endTimeTemp = endTime; 166 | 167 | ArrayList dyadicResult = BinaryUtil2.getBinaryDecomposition((int) startTimeTemp, 168 | (int) endTimeTemp); 169 | for (int i = 0; i < dyadicResult.size(); i++) 170 | { 171 | int key = dyadicResult.get(i).getFirst(); 172 | int level = dyadicResult.get(i).getSecond(); 173 | if (null == rangeBloomFilter[level - 1]) 174 | { 175 | // continue; 176 | result = true; // modified by jinwei (20171013) 177 | break; 178 | } 179 | else if (rangeBloomFilter[level - 1].query(element, key)) 180 | { 181 | result = true; 182 | break; 183 | } 184 | } 185 | return result; 186 | } 187 | 188 | public int getTotalBitNum() 189 | { 190 | return this.totalBitNum; 191 | } 192 | 193 | public static void main(String[] args) 194 | { 195 | Beta2 b = new Beta2(100000, 3, 10); 196 | b.insert("guo".getBytes(), 200); 197 | System.out.println(b.query("guo".getBytes(), 200)); 198 | System.out.println(b.query("guo".getBytes(), 100, 300)); 199 | System.out.println(b.query("guo".getBytes(), 201, 300)); 200 | System.out.println(b.query("guo".getBytes(), 100, 199)); 201 | System.out.println(b.query("jin".getBytes(), 1, 1000)); 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/base/impl/TemporalRangeBloomFilterV2.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.base.impl; 2 | 3 | import java.nio.charset.Charset; 4 | import java.util.ArrayList; 5 | import java.util.BitSet; 6 | 7 | import edu.ecnu.pbf.base.PersistentBloomFilter; 8 | import edu.ecnu.pbf.util.BinaryUtil; 9 | import edu.ecnu.pbf.util.MurmurHash3; 10 | 11 | /** 12 | * Optimize the query of TemporalRangeBloomFilter. 13 | * 14 | * @author Jinwei 15 | * 16 | */ 17 | public class TemporalRangeBloomFilterV2 implements PersistentBloomFilter 18 | { 19 | private BitSet bs; 20 | private int bitNum; 21 | private int hashNum; 22 | private int g; 23 | private int level; 24 | private int queryNum; 25 | 26 | /** 27 | * Create a temporal range Bloom filter with the provided parameters. 28 | * 29 | * @param bitNum 30 | * the capacity of the Bloom filter 31 | * @param hashNum 32 | * the numbers of hash function used in Bloom filter 33 | * @param g 34 | * the size of temporal range used in the Bloom filter 35 | * 36 | */ 37 | public TemporalRangeBloomFilterV2(int bitNum, int hashNum, int level) 38 | { 39 | this.bitNum = bitNum; 40 | this.hashNum = hashNum; 41 | this.level = level; 42 | this.g = 1 << (this.level); 43 | this.queryNum = 0; 44 | bs = new BitSet(bitNum); 45 | } 46 | 47 | /** 48 | * Get the member variable used to store bits. 49 | * 50 | * @return BitSet 51 | */ 52 | public BitSet getBitSet() 53 | { 54 | return this.bs; 55 | } 56 | 57 | /** 58 | * Set the bit set used for Bloom filter. 59 | * 60 | * @param bs 61 | */ 62 | public void setBitSet(BitSet bs) 63 | { 64 | this.bs = bs; 65 | } 66 | 67 | /** 68 | * Get the number of bits in this Bloom filter. 69 | * 70 | * @return int 71 | */ 72 | public int getSize() 73 | { 74 | return this.bitNum; 75 | } 76 | 77 | /** 78 | * Get the number of hash function in this Bloom filter. 79 | * 80 | * @return int 81 | */ 82 | public int getHashNum() 83 | { 84 | return this.hashNum; 85 | } 86 | 87 | /** 88 | * Get the size of time range of this Bloom filter 89 | * 90 | * @return int 91 | */ 92 | public int getG() 93 | { 94 | return this.g; 95 | } 96 | 97 | /** 98 | * Get the query number of this Bloom filter 99 | * 100 | * @return int 101 | */ 102 | public int getQueryNum() 103 | { 104 | return this.queryNum; 105 | } 106 | 107 | /** 108 | * Insert a String element into the Bloom filter in accordance with the 109 | * timestamp. This element timestamp is mapped to a time range. 110 | * 111 | * @param element 112 | * @param timestamp 113 | */ 114 | public boolean insertString(String element, long timestamp) 115 | { 116 | boolean result = true; 117 | 118 | long rangeNum = getRange(timestamp); 119 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 120 | // byte[] byteElement = (element + "|" + String.valueOf(rangeNum)) 121 | // .getBytes(Charset.forName("UTF-8")); 122 | 123 | for (int i = 0; i < hashNum; i++) 124 | { 125 | // int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 126 | // byteElement.length, i); 127 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 128 | byteElement.length, (int)rangeNum * this.hashNum + i); 129 | int index = getIndex(hash); 130 | bs.set(index); 131 | } 132 | 133 | return result; 134 | } 135 | 136 | /** 137 | * Query weather a given String element exists in the Bloom filter in 138 | * accordance with the timestamp of element 139 | * 140 | * @param element 141 | * @param timestamp 142 | * @return boolean true if the element exists in the given time 143 | */ 144 | public boolean queryString(String element, long timestamp) 145 | { 146 | long rangeNum = getRange(timestamp); 147 | this.queryNum++; 148 | byte[] byteElement = element.getBytes(Charset.forName("UTF-8")); 149 | // byte[] byteElement = (element + "|" + String.valueOf(rangeNum)) 150 | // .getBytes(Charset.forName("UTF-8")); 151 | boolean isExist = true; 152 | for (int i = 0; i < hashNum; i++) 153 | { 154 | // int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 155 | // byteElement.length, i); 156 | int hash = MurmurHash3.murmurhash3_x86_32(byteElement, 0, 157 | byteElement.length, (int)rangeNum * this.hashNum + i); 158 | int index = getIndex(hash); 159 | if (false == bs.get(index)) 160 | { 161 | isExist = false; 162 | break; 163 | } 164 | } 165 | 166 | return isExist; 167 | } 168 | 169 | /** 170 | * Query weather a given String element exists in the given time range 171 | * 172 | * @param element 173 | * @param timestamp 174 | * @return boolean true if the element exists in the given time range 175 | */ 176 | public boolean queryString(String element, long startTime, long endTime) 177 | { 178 | boolean isExist = false; 179 | 180 | if (startTime > endTime || startTime < 0 || endTime < 0) 181 | { 182 | isExist = false; 183 | } 184 | else 185 | { 186 | ArrayList startKeyArray = BinaryUtil.getStartKey(startTime, 187 | endTime, this.level); 188 | for (long startKey : startKeyArray) 189 | { 190 | if (queryString(element, startKey)) 191 | { 192 | isExist = true; 193 | break; 194 | } 195 | } 196 | } 197 | 198 | return isExist; 199 | } 200 | 201 | public boolean insert(byte[] element, long timestamp) 202 | { 203 | boolean result = true; 204 | long rangeNum = getRange(timestamp); 205 | 206 | for (int i = 0; i < hashNum; i++) 207 | { 208 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 209 | element.length, (int)rangeNum * this.hashNum + i); 210 | int index = getIndex(hash); 211 | bs.set(index); 212 | } 213 | return result; 214 | } 215 | 216 | public boolean query(byte[] element, long timestamp) 217 | { 218 | boolean result = true; 219 | long rangeNum = getRange(timestamp); 220 | this.queryNum++; 221 | for (int i = 0; i < hashNum; i++) 222 | { 223 | int hash = MurmurHash3.murmurhash3_x86_32(element, 0, 224 | element.length, (int)rangeNum * this.hashNum + i); 225 | int index = getIndex(hash); 226 | if (false == bs.get(index)) 227 | { 228 | result = false; 229 | break; 230 | } 231 | } 232 | return result; 233 | } 234 | 235 | public boolean query(byte[] element, long startTime, long endTime) 236 | { 237 | boolean result = false; 238 | if (startTime > endTime || endTime < 0) 239 | { 240 | result = false; 241 | } 242 | else 243 | { 244 | startTime = startTime < 0 ? 0:startTime; 245 | ArrayList startKeyArray = BinaryUtil.getStartKey(startTime, 246 | endTime, this.level); 247 | for (long startKey : startKeyArray) 248 | { 249 | if (query(element, startKey)) 250 | { 251 | result = true; 252 | break; 253 | } 254 | } 255 | } 256 | return result; 257 | } 258 | 259 | /** 260 | * Query a given String element with the given time range, and return the 261 | * ranges which may contain this element. 262 | * 263 | * @param element 264 | * @param startTime 265 | * @param endTime 266 | * @return ArrayList 267 | */ 268 | public ArrayList queryStringWithInfo(String element, long startTime, 269 | long endTime) 270 | { 271 | ArrayList array = new ArrayList(); 272 | 273 | if (startTime > endTime || startTime < 0 || endTime < 0) 274 | { 275 | } 276 | else 277 | { 278 | ArrayList startKeyArray = BinaryUtil.getStartKey(startTime, 279 | endTime, this.level); 280 | for (long startKey : startKeyArray) 281 | { 282 | if (queryString(element, startKey)) 283 | { 284 | array.add(startKey); 285 | } 286 | } 287 | } 288 | return array; 289 | } 290 | 291 | private int getIndex(int hash) 292 | { 293 | int index = hash % bitNum; 294 | return index >= 0 ? index : -index; 295 | } 296 | 297 | private long getRange(long timestamp) 298 | { 299 | return timestamp / g; 300 | } 301 | 302 | public static void main(String[] args) 303 | { 304 | TemporalRangeBloomFilterV2 trbf = new TemporalRangeBloomFilterV2(100000, 3, 1); 305 | System.out.println(trbf.getG()); 306 | System.out.println(trbf.getRange(2)); 307 | } 308 | } 309 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/element/DualTuple.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.element; 2 | 3 | public class DualTuple 4 | { 5 | private int first; 6 | private int second; 7 | 8 | public DualTuple(int first, int second) 9 | { 10 | this.first = first; 11 | this.second = second; 12 | } 13 | 14 | public int getFirst() 15 | { 16 | return this.first; 17 | } 18 | 19 | public int getSecond() 20 | { 21 | return this.second; 22 | } 23 | 24 | public String toString() 25 | { 26 | return "(" + first + "," + second + ")"; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/BinaryUtil.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | 6 | public class BinaryUtil 7 | { 8 | public static final int[] bitMask = { 0xffffffff, 0xfffffffe, 0xfffffffc, 9 | 0xfffffff8, 0xfffffff0, 0xffffffe0, 0xffffffc0, 0xffffff80, 10 | 0xffffff00, 0xfffffe00, 0xfffffc00, 0xfffff800, 0xfffff000, 11 | 0xffffe000, 0xffffc000, 0xffff8000, 0xffff0000, 0xfffe0000, 12 | 0xfffc0000, 0xfff80000, 0xfff00000, 0xffe00000, 0xffc00000, 13 | 0xff800000, 0xff000000, 0xfe000000, 0xfc000000, 0xf8000000, 14 | 0xf0000000, 0xe0000000, 0xc0000000, 0x80000000, 0x00000000 }; 15 | 16 | public static final int[] levelSize = { 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3, 17 | 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9, 1 << 10, 1 << 11, 18 | 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 19 | 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 20 | 1 << 26, 1 << 27, 1 << 28, 1 << 29, 1 << 30 }; 21 | 22 | public static HashMap getBinaryDecomposition(int start, 23 | int end) 24 | { 25 | HashMap resultMap = new HashMap(); 26 | for (int i = start; i <= end;) 27 | { 28 | int level = getBinaryLevel(i, end); 29 | resultMap.put(i, level); 30 | i = i + (1 << (level - 1)); 31 | } 32 | return resultMap; 33 | } 34 | 35 | public static int getBinaryLevel(int startKey, int endKey) 36 | { 37 | int level = 1; 38 | if (startKey == endKey) 39 | { 40 | level = 1; 41 | } 42 | else if (0 == startKey) 43 | { 44 | int size = endKey - startKey + 1; 45 | int start = 1; 46 | int end = 31; 47 | int middle = (start + end) / 2; 48 | while (start < end) 49 | { 50 | if (levelSize[middle] < size) 51 | { 52 | start = middle + 1; 53 | } 54 | else 55 | { 56 | end = middle; 57 | } 58 | middle = (start + end) / 2; 59 | } 60 | level = middle; 61 | } 62 | else 63 | { 64 | int start = 1; 65 | int end = 32; 66 | int middle = (start + end) / 2; 67 | while (start < end) 68 | { 69 | if (0 == (startKey & ~bitMask[middle])) 70 | { 71 | start = middle + 1; 72 | } 73 | else 74 | { 75 | end = middle; 76 | } 77 | middle = (start + end) / 2; 78 | } 79 | level = middle; 80 | while ((startKey + (1 << (level - 1))) > (endKey + 1)) 81 | { 82 | level--; 83 | } 84 | } 85 | return level; 86 | } 87 | 88 | /** 89 | * Get starting time points of {@code level}'s dyadic ranges which cover 90 | * [startTime, endTime] 91 | * 92 | * @param startTime 93 | * @param endTime 94 | * @param level 95 | * @return 96 | */ 97 | public static ArrayList getStartKey(long startTime, long endTime, 98 | int level) 99 | { 100 | ArrayList startKey = new ArrayList(); 101 | if (30 < level || 0 > level) 102 | { 103 | 104 | } 105 | else 106 | { 107 | long i = startTime / levelSize[level + 1]; 108 | long startKeyTemp = i * levelSize[level + 1]; 109 | while (startKeyTemp <= endTime) 110 | { 111 | startKey.add(startKeyTemp); 112 | startKeyTemp += levelSize[level + 1]; 113 | } 114 | } 115 | return startKey; 116 | } 117 | 118 | public static void main(String[] args) 119 | { 120 | HashMap map = getBinaryDecomposition(87296, 88319); 121 | System.out.println(map); 122 | 123 | long start = System.nanoTime(); 124 | for (int i = 0; i < 10000000; i++) 125 | { 126 | getBinaryDecomposition(i * 3, i * 3 + 127); 127 | } 128 | 129 | long end = System.nanoTime(); 130 | System.out.println("time: " + (end - start) / 1000); 131 | } 132 | } -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/BinaryUtil2.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.ArrayList; 4 | 5 | import edu.ecnu.pbf.element.DualTuple; 6 | 7 | 8 | public class BinaryUtil2 9 | { 10 | public static final int[] bitMask = { 0xffffffff, 0xfffffffe, 0xfffffffc, 11 | 0xfffffff8, 0xfffffff0, 0xffffffe0, 0xffffffc0, 0xffffff80, 12 | 0xffffff00, 0xfffffe00, 0xfffffc00, 0xfffff800, 0xfffff000, 13 | 0xffffe000, 0xffffc000, 0xffff8000, 0xffff0000, 0xfffe0000, 14 | 0xfffc0000, 0xfff80000, 0xfff00000, 0xffe00000, 0xffc00000, 15 | 0xff800000, 0xff000000, 0xfe000000, 0xfc000000, 0xf8000000, 16 | 0xf0000000, 0xe0000000, 0xc0000000, 0x80000000, 0x00000000 }; 17 | 18 | public static final int[] levelSize = { 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3, 19 | 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 8, 1 << 9, 1 << 10, 1 << 11, 20 | 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 21 | 1 << 19, 1 << 20, 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 25, 22 | 1 << 26, 1 << 27, 1 << 28, 1 << 29, 1 << 30 }; 23 | 24 | public static ArrayList getBinaryDecomposition(int start, 25 | int end) 26 | { 27 | ArrayList result = new ArrayList(); 28 | for (int i = start; i <= end;) 29 | { 30 | int level = getBinaryLevel(i, end); 31 | result.add(new DualTuple(i, level)); 32 | i = i + (1 << (level - 1)); 33 | } 34 | return result; 35 | } 36 | 37 | public static int getBinaryLevel(int startKey, int endKey) 38 | { 39 | int level = 1; 40 | if (startKey == endKey) 41 | { 42 | level = 1; 43 | } 44 | else if (0 == startKey) 45 | { 46 | int size = endKey - startKey + 1; 47 | int start = 1; 48 | int end = 31; 49 | int middle = (start + end) / 2; 50 | while (start < end) 51 | { 52 | middle = (start + end) / 2; 53 | if (levelSize[middle] == size) 54 | { 55 | break; 56 | } 57 | else if (levelSize[middle] < size) 58 | { 59 | start = middle + 1; 60 | } 61 | else 62 | { 63 | end = middle; 64 | } 65 | } 66 | if (levelSize[middle] > size) 67 | { 68 | level = middle - 1; 69 | } 70 | else 71 | { 72 | level = middle; 73 | } 74 | // level = middle; 75 | } 76 | else 77 | { 78 | int start = 1; 79 | int end = 32; 80 | int middle = (start + end) / 2; 81 | while (start < end) 82 | { 83 | if (0 == (startKey & ~bitMask[middle])) 84 | { 85 | start = middle + 1; 86 | } 87 | else 88 | { 89 | end = middle; 90 | } 91 | middle = (start + end) / 2; 92 | } 93 | level = middle; 94 | while ((startKey + (1 << (level - 1))) > (endKey + 1)) 95 | { 96 | level--; 97 | } 98 | } 99 | return level; 100 | } 101 | 102 | /** 103 | * Get starting time points of {@code level}'s dyadic ranges which cover 104 | * [startTime, endTime] 105 | * 106 | * @param startTime 107 | * @param endTime 108 | * @param level 109 | * @return 110 | */ 111 | public static ArrayList getStartKey(long startTime, long endTime, 112 | int level) 113 | { 114 | ArrayList startKey = new ArrayList(); 115 | if (30 < level || 0 > level) 116 | { 117 | 118 | } 119 | else 120 | { 121 | long i = startTime / levelSize[level + 1]; 122 | long startKeyTemp = i * levelSize[level + 1]; 123 | while (startKeyTemp <= endTime) 124 | { 125 | startKey.add(startKeyTemp); 126 | startKeyTemp += levelSize[level + 1]; 127 | } 128 | } 129 | return startKey; 130 | } 131 | 132 | public static void main(String[] args) 133 | { 134 | ArrayList list = getBinaryDecomposition(0, 16); 135 | for (int i = 0; i < list.size(); i++) 136 | { 137 | System.out.println(list.get(i).getFirst() + ": " + list.get(i).getSecond()); 138 | } 139 | // System.out.println(getBinaryLevel(0, 7)); 140 | } 141 | } -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/DataGen.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.io.FileWriter; 4 | import java.io.IOException; 5 | 6 | public class DataGen 7 | { 8 | public static void GenStringToFile(String filePrefix, int fileNum, 9 | int strLength, int fileSize) 10 | { 11 | for (int i = 1; i <= fileNum; i++) 12 | { 13 | String outFileName = filePrefix + i;// args[0]; 14 | FileWriter writer = null; 15 | 16 | try 17 | { 18 | writer = new FileWriter(outFileName, false); 19 | String tempString = null; 20 | //int recordNum = 2000000; // Integer.parseInt(args[1]); 21 | for (int start = 0; start < fileSize; start++) 22 | { 23 | tempString = RandomGenerator.getRandomString(strLength); 24 | writer.append(tempString); 25 | writer.append("\r\n"); 26 | } 27 | } 28 | catch (Exception e) 29 | { 30 | e.printStackTrace(); 31 | } 32 | finally 33 | { 34 | if (writer != null) 35 | { 36 | try 37 | { 38 | writer.close(); 39 | } 40 | catch (IOException e1) 41 | { 42 | } 43 | } 44 | } 45 | } 46 | 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/MathUtil.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | public class MathUtil 4 | { 5 | public static int gcd(int a, int b) 6 | { 7 | while (b != 0) 8 | { 9 | int temp = a % b; 10 | a = b; 11 | b = temp; 12 | } 13 | return a; 14 | } 15 | 16 | public static int getRandomIndex(int start, int end) 17 | { 18 | return start + (int)(Math.random() * (end - start + 1)); 19 | } 20 | 21 | public static boolean isHitPercent(int percent) 22 | { 23 | boolean result = false; 24 | int randomPercent = (int) (Math.random() * 100); 25 | if (randomPercent < percent) 26 | { 27 | result = true; 28 | } 29 | 30 | return result; 31 | } 32 | 33 | public static void main(String[] args) 34 | { 35 | for (int i = 0; i < 100; i++) 36 | { 37 | System.out.println(isHitPercent(2)); 38 | } 39 | int[] a = getGeometricSequence(100, 4, 1.4); 40 | for (int i = 0; i < a.length; i++) 41 | { 42 | System.out.println(a[i]); 43 | } 44 | System.out.println(gcd(0, 100)); 45 | } 46 | 47 | public static int[] getGeometricSequence(int sum, int length, double radio) 48 | { 49 | int[] result = new int[length]; 50 | int a = (int)((double)sum * (1D - radio) / (1D - Math.pow(radio, length))); 51 | for (int i = 0; i < length; i++) 52 | { 53 | result[i] = a; 54 | a = (int)(a * radio); 55 | } 56 | return result; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/MurmurHash3.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | /** 4 | * The MurmurHash3 algorithm was created by Austin Appleby and placed in the public domain. 5 | * This java port was authored by Yonik Seeley and also placed into the public domain. 6 | * The author hereby disclaims copyright to this source code. 7 | *

8 | * This produces exactly the same hash values as the final C++ 9 | * version of MurmurHash3 and is thus suitable for producing the same hash values across 10 | * platforms. 11 | *

12 | * The 32 bit x86 version of this hash should be the fastest variant for relatively short keys like ids. 13 | * murmurhash3_x64_128 is a good choice for longer strings or if you need more than 32 bits of hash. 14 | *

15 | * Note - The x86 and x64 versions do _not_ produce the same results, as the 16 | * algorithms are optimized for their respective platforms. 17 | *

18 | * See http://github.com/yonik/java_util for future updates to this file. 19 | */ 20 | public final class MurmurHash3 { 21 | 22 | /** 128 bits of state */ 23 | public static final class LongPair { 24 | public long val1; 25 | public long val2; 26 | } 27 | 28 | public static final int fmix32(int h) { 29 | h ^= h >>> 16; 30 | h *= 0x85ebca6b; 31 | h ^= h >>> 13; 32 | h *= 0xc2b2ae35; 33 | h ^= h >>> 16; 34 | return h; 35 | } 36 | 37 | public static final long fmix64(long k) { 38 | k ^= k >>> 33; 39 | k *= 0xff51afd7ed558ccdL; 40 | k ^= k >>> 33; 41 | k *= 0xc4ceb9fe1a85ec53L; 42 | k ^= k >>> 33; 43 | return k; 44 | } 45 | 46 | /** Gets a long from a byte buffer in little endian byte order. */ 47 | public static final long getLongLittleEndian(byte[] buf, int offset) { 48 | return ((long)buf[offset+7] << 56) // no mask needed 49 | | ((buf[offset+6] & 0xffL) << 48) 50 | | ((buf[offset+5] & 0xffL) << 40) 51 | | ((buf[offset+4] & 0xffL) << 32) 52 | | ((buf[offset+3] & 0xffL) << 24) 53 | | ((buf[offset+2] & 0xffL) << 16) 54 | | ((buf[offset+1] & 0xffL) << 8) 55 | | ((buf[offset ] & 0xffL)); // no shift needed 56 | } 57 | 58 | 59 | /** Returns the MurmurHash3_x86_32 hash. */ 60 | public static int murmurhash3_x86_32(byte[] data, int offset, int len, int seed) { 61 | 62 | final int c1 = 0xcc9e2d51; 63 | final int c2 = 0x1b873593; 64 | 65 | int h1 = seed; 66 | int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block 67 | 68 | for (int i=offset; i>> 17); // ROTL32(k1,15); 73 | k1 *= c2; 74 | 75 | h1 ^= k1; 76 | h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13); 77 | h1 = h1*5+0xe6546b64; 78 | } 79 | 80 | // tail 81 | int k1 = 0; 82 | 83 | switch(len & 0x03) { 84 | case 3: 85 | k1 = (data[roundedEnd + 2] & 0xff) << 16; 86 | // fallthrough 87 | case 2: 88 | k1 |= (data[roundedEnd + 1] & 0xff) << 8; 89 | // fallthrough 90 | case 1: 91 | k1 |= (data[roundedEnd] & 0xff); 92 | k1 *= c1; 93 | k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); 94 | k1 *= c2; 95 | h1 ^= k1; 96 | } 97 | 98 | // finalization 99 | h1 ^= len; 100 | 101 | // fmix(h1); 102 | h1 ^= h1 >>> 16; 103 | h1 *= 0x85ebca6b; 104 | h1 ^= h1 >>> 13; 105 | h1 *= 0xc2b2ae35; 106 | h1 ^= h1 >>> 16; 107 | 108 | return h1; 109 | } 110 | 111 | 112 | /** Returns the MurmurHash3_x86_32 hash of the UTF-8 bytes of the String without actually encoding 113 | * the string to a temporary buffer. This is more than 2x faster than hashing the result 114 | * of String.getBytes(). 115 | */ 116 | public static int murmurhash3_x86_32(CharSequence data, int offset, int len, int seed) { 117 | 118 | final int c1 = 0xcc9e2d51; 119 | final int c2 = 0x1b873593; 120 | 121 | int h1 = seed; 122 | 123 | int pos = offset; 124 | int end = offset + len; 125 | int k1 = 0; 126 | int k2 = 0; 127 | int shift = 0; 128 | int bits = 0; 129 | int nBytes = 0; // length in UTF8 bytes 130 | 131 | 132 | while (pos < end) { 133 | int code = data.charAt(pos++); 134 | if (code < 0x80) { 135 | k2 = code; 136 | bits = 8; 137 | 138 | /*** 139 | // optimized ascii implementation (currently slower!!! code size?) 140 | if (shift == 24) { 141 | k1 = k1 | (code << 24); 142 | 143 | k1 *= c1; 144 | k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); 145 | k1 *= c2; 146 | 147 | h1 ^= k1; 148 | h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13); 149 | h1 = h1*5+0xe6546b64; 150 | 151 | shift = 0; 152 | nBytes += 4; 153 | k1 = 0; 154 | } else { 155 | k1 |= code << shift; 156 | shift += 8; 157 | } 158 | continue; 159 | ***/ 160 | 161 | } 162 | else if (code < 0x800) { 163 | k2 = (0xC0 | (code >> 6)) 164 | | ((0x80 | (code & 0x3F)) << 8); 165 | bits = 16; 166 | } 167 | else if (code < 0xD800 || code > 0xDFFF || pos>=end) { 168 | // we check for pos>=end to encode an unpaired surrogate as 3 bytes. 169 | k2 = (0xE0 | (code >> 12)) 170 | | ((0x80 | ((code >> 6) & 0x3F)) << 8) 171 | | ((0x80 | (code & 0x3F)) << 16); 172 | bits = 24; 173 | } else { 174 | // surrogate pair 175 | // int utf32 = pos < end ? (int) data.charAt(pos++) : 0; 176 | int utf32 = (int) data.charAt(pos++); 177 | utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); 178 | k2 = (0xff & (0xF0 | (utf32 >> 18))) 179 | | ((0x80 | ((utf32 >> 12) & 0x3F))) << 8 180 | | ((0x80 | ((utf32 >> 6) & 0x3F))) << 16 181 | | (0x80 | (utf32 & 0x3F)) << 24; 182 | bits = 32; 183 | } 184 | 185 | 186 | k1 |= k2 << shift; 187 | 188 | // int used_bits = 32 - shift; // how many bits of k2 were used in k1. 189 | // int unused_bits = bits - used_bits; // (bits-(32-shift)) == bits+shift-32 == bits-newshift 190 | 191 | shift += bits; 192 | if (shift >= 32) { 193 | // mix after we have a complete word 194 | 195 | k1 *= c1; 196 | k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); 197 | k1 *= c2; 198 | 199 | h1 ^= k1; 200 | h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13); 201 | h1 = h1*5+0xe6546b64; 202 | 203 | shift -= 32; 204 | // unfortunately, java won't let you shift 32 bits off, so we need to check for 0 205 | if (shift != 0) { 206 | k1 = k2 >>> (bits-shift); // bits used == bits - newshift 207 | } else { 208 | k1 = 0; 209 | } 210 | nBytes += 4; 211 | } 212 | 213 | } // inner 214 | 215 | // handle tail 216 | if (shift > 0) { 217 | nBytes += shift >> 3; 218 | k1 *= c1; 219 | k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); 220 | k1 *= c2; 221 | h1 ^= k1; 222 | } 223 | 224 | // finalization 225 | h1 ^= nBytes; 226 | 227 | // fmix(h1); 228 | h1 ^= h1 >>> 16; 229 | h1 *= 0x85ebca6b; 230 | h1 ^= h1 >>> 13; 231 | h1 *= 0xc2b2ae35; 232 | h1 ^= h1 >>> 16; 233 | 234 | return h1; 235 | } 236 | 237 | 238 | /** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */ 239 | public static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) { 240 | // The original algorithm does have a 32 bit unsigned seed. 241 | // We have to mask to match the behavior of the unsigned types and prevent sign extension. 242 | long h1 = seed & 0x00000000FFFFFFFFL; 243 | long h2 = seed & 0x00000000FFFFFFFFL; 244 | 245 | final long c1 = 0x87c37b91114253d5L; 246 | final long c2 = 0x4cf5ad432745937fL; 247 | 248 | int roundedEnd = offset + (len & 0xFFFFFFF0); // round down to 16 byte block 249 | for (int i=offset; i 0 && result > kMax) 16 | { 17 | result = kMax; 18 | } 19 | return result; 20 | } 21 | 22 | public static void main(String[] args) 23 | { 24 | System.out.println(getOptimizedK(100000, 12500, 12)); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/PbfUtil.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.BitSet; 4 | 5 | import org.apache.commons.math3.analysis.UnivariateFunction; 6 | import org.apache.commons.math3.analysis.solvers.BisectionSolver; 7 | 8 | import edu.ecnu.pbf.CommonConstants; 9 | import edu.ecnu.pbf.base.impl.TemporalRangeBloomFilterV2; 10 | 11 | public class PbfUtil 12 | { 13 | public static final double INVARIANT = Math.log(2) * Math.log(2); 14 | 15 | public static int[] getOptimizedMForBeta1(int m, int[] d, int[] f, int queryLength) 16 | { 17 | int[] optimizedM = null; 18 | if (d.length != f.length) 19 | { 20 | // TODO 21 | } 22 | else 23 | { 24 | double absoluteAccuracy = 0; 25 | // if (queryLength <= 16) 26 | // { 27 | // absoluteAccuracy = 1E-43; 28 | // } 29 | // else if (queryLength <= 32) 30 | // { 31 | // absoluteAccuracy = 1E-36; 32 | // } 33 | // else if (queryLength <= 64) 34 | // { 35 | // absoluteAccuracy = 1E-32; 36 | // } 37 | // else 38 | // { 39 | // absoluteAccuracy = 1E-30; 40 | // } 41 | absoluteAccuracy = 1E-93; // 93 for beta1, 54 for beta2 42 | optimizedM = new int[d.length]; 43 | LamdaFunction lamdaFunc = new LamdaFunction(m, d, f); 44 | BisectionSolver solver = new BisectionSolver(absoluteAccuracy); 45 | double lamda = solver.solve(40000, lamdaFunc, -1, 0); 46 | // System.out.println("lamda: " + lamda); 47 | // System.out.println(lamda); 48 | for (int i = 0; i < optimizedM.length; i++) 49 | { 50 | if (f[i] == 0 || d[i] == 0) 51 | { 52 | optimizedM[i] = 0; 53 | } 54 | else 55 | { 56 | optimizedM[i] = (int) ((double) d[i] / INVARIANT 57 | * Math.log(1.0D - (double) f[i] * PbfUtil.INVARIANT / lamda / d[i])); 58 | } 59 | } 60 | } 61 | return optimizedM; 62 | } 63 | 64 | public static int[] getOptimizedM(int m, int[] d, int[] f, int queryLength, double acc) 65 | { 66 | int[] optimizedM = null; 67 | if (d.length != f.length) 68 | { 69 | // TODO 70 | } 71 | else 72 | { 73 | double absoluteAccuracy = 0; 74 | 75 | absoluteAccuracy = acc; // 93 for beta1, 54 for beta2 76 | optimizedM = new int[d.length]; 77 | LamdaFunction lamdaFunc = new LamdaFunction(m, d, f); 78 | BisectionSolver solver = new BisectionSolver(absoluteAccuracy); 79 | double lamda = solver.solve(40000, lamdaFunc, -1, 0); 80 | 81 | for (int i = 0; i < optimizedM.length; i++) 82 | { 83 | if (f[i] == 0 || d[i] == 0) 84 | { 85 | optimizedM[i] = 0; 86 | } 87 | else 88 | { 89 | optimizedM[i] = (int) ((double) d[i] / INVARIANT 90 | * Math.log(1.0D - (double) f[i] * PbfUtil.INVARIANT / lamda / d[i])); 91 | } 92 | } 93 | } 94 | return optimizedM; 95 | } 96 | 97 | public static int[] getOptimizedMForBeta2(int m, int[] d, int[] f, int queryLength) 98 | { 99 | int[] optimizedM = null; 100 | if (d.length != f.length) 101 | { 102 | // TODO 103 | } 104 | else 105 | { 106 | double absoluteAccuracy = 0; 107 | 108 | absoluteAccuracy = 1E-53; // 93 for beta1, 54 for beta2 109 | optimizedM = new int[d.length]; 110 | LamdaFunction lamdaFunc = new LamdaFunction(m, d, f); 111 | BisectionSolver solver = new BisectionSolver(absoluteAccuracy); 112 | double lamda = solver.solve(40000, lamdaFunc, -1, 0); 113 | 114 | for (int i = 0; i < optimizedM.length; i++) 115 | { 116 | if (f[i] == 0 || d[i] == 0) 117 | { 118 | optimizedM[i] = 0; 119 | } 120 | else 121 | { 122 | optimizedM[i] = (int) ((double) d[i] / INVARIANT 123 | * Math.log(1.0D - (double) f[i] * PbfUtil.INVARIANT / lamda / d[i])); 124 | } 125 | } 126 | } 127 | return optimizedM; 128 | } 129 | 130 | public static int[] getOptimizedKForBeta1(int[] m, int[] d, int kMax) 131 | { 132 | int[] optimizedK = null; 133 | if (m.length != d.length) 134 | { 135 | // TODO 136 | } 137 | else 138 | { 139 | optimizedK = new int[m.length]; 140 | for (int i = 0; i < optimizedK.length; i++) 141 | { 142 | if (m[i] == 0 || d[i] == 0) 143 | { 144 | optimizedK[i] = 0; 145 | } 146 | else 147 | { 148 | optimizedK[i] = (int) ((double) m[i] / d[i] * Math.log(2) + 1) + 1; 149 | if (kMax > 0 && optimizedK[i] > kMax) 150 | { 151 | optimizedK[i] = kMax; 152 | } 153 | } 154 | } 155 | } 156 | return optimizedK; 157 | } 158 | 159 | /** 160 | * Get the total number of levels. 161 | * 162 | * @param maxT 163 | * from 0 164 | * @return 165 | */ 166 | public static int getLevelNum(int maxT) 167 | { 168 | int result = 0; 169 | for (int i = 0; i < CommonConstants.g.length; i++) 170 | { 171 | if ((maxT + 1) <= CommonConstants.g[i]) 172 | { 173 | result = i + 1; 174 | break; 175 | } 176 | } 177 | return result; 178 | } 179 | 180 | /** 181 | * Merge the BitSets of two consecutive beta2 pbfs into one. 182 | * Note: the structures of the two pbfs should be the same, i.e., hash numbers and level numbers. 183 | * @param b1 184 | * @param b2 185 | * @return 186 | */ 187 | public static BitSet mergeBeta2(TemporalRangeBloomFilterV2 rbf1, TemporalRangeBloomFilterV2 rbf2) 188 | { 189 | BitSet bs1 = rbf1.getBitSet(); 190 | BitSet bs2 = rbf2.getBitSet(); 191 | int bitNumOfRbf1 = rbf1.getSize(); 192 | int bitNumOfRbf2 = rbf2.getSize(); 193 | int bitNum = MathUtil.gcd(bitNumOfRbf1, bitNumOfRbf2); 194 | BitSet bitSet = new BitSet(bitNum); 195 | for (int i = 0; i < bitNumOfRbf1; i++) 196 | { 197 | if (bs1.get(i) == true) 198 | { 199 | bitSet.set(i % bitNum); 200 | } 201 | } 202 | for (int i = 0; i < bitNumOfRbf2; i++) 203 | { 204 | if (bs2.get(i) == true) 205 | { 206 | bitSet.set(i % bitNum); 207 | } 208 | } 209 | return bitSet; 210 | } 211 | 212 | public static BitSet merge(BitSet bs1, int bitNumOfBs1, BitSet bs2, int bitNumOfBs2) 213 | { 214 | int bitNum = MathUtil.gcd(bitNumOfBs1, bitNumOfBs2); 215 | BitSet bitSet = new BitSet(bitNum); 216 | for (int i = 0; i < bitNumOfBs1; i++) 217 | { 218 | if (bs1.get(i) == true) 219 | { 220 | bitSet.set(i % bitNum); 221 | } 222 | } 223 | for (int i = 0; i < bitNumOfBs2; i++) 224 | { 225 | if (bs2.get(i) == true) 226 | { 227 | bitSet.set(i % bitNum); 228 | } 229 | } 230 | return bitSet; 231 | } 232 | 233 | public static void main(String[] args) 234 | { 235 | System.out.println(getLevelNum(127)); 236 | int m = 10000000; 237 | int[] d = {1000, 2000, 4000, 8000, 16000, 32000, 64000, 12800}; 238 | int[] f = {2, 2, 2, 2, 2, 2, 2, 1}; 239 | 240 | int[] mm = getOptimizedMForBeta1(m, d, f, 128); 241 | int sum = 0; 242 | 243 | for (int i = 0; i < mm.length; i++) 244 | { 245 | System.out.println(mm[i]); 246 | sum += mm[i]; 247 | } 248 | 249 | System.out.println(sum); 250 | 251 | } 252 | } 253 | 254 | class LamdaFunction implements UnivariateFunction 255 | { 256 | private int[] d; 257 | private int[] f; 258 | private long m; 259 | 260 | public LamdaFunction(long m, int[] d, int[] f) 261 | { 262 | this.m = m; 263 | this.d = d; 264 | this.f = f; 265 | } 266 | 267 | public double value(double x) 268 | { 269 | double result = 0; 270 | for (int i = 0; i < d.length; i++) 271 | { 272 | if (f[i] == 0 || d[i] == 0) 273 | { 274 | 275 | } 276 | else 277 | { 278 | result = result + (double) d[i] 279 | * Math.log(1.0D - (double) f[i] * PbfUtil.INVARIANT / x / d[i]); 280 | } 281 | } 282 | result = result - (double) m * PbfUtil.INVARIANT; 283 | return result; 284 | } 285 | } -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/PbfUtil2.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.BitSet; 4 | 5 | import org.apache.commons.math3.analysis.UnivariateFunction; 6 | import org.apache.commons.math3.analysis.solvers.BisectionSolver; 7 | 8 | import edu.ecnu.pbf.CommonConstants; 9 | import edu.ecnu.pbf.base.impl.TemporalRangeBloomFilterV2; 10 | 11 | public class PbfUtil2 12 | { 13 | public static final double INVARIANT = Math.log(2) * Math.log(2); 14 | 15 | public static int[] getOptimizedMForBeta1(int m, int[] d, int[] f, int queryLength) 16 | { 17 | int[] optimizedM = null; 18 | if (d.length != f.length) 19 | { 20 | // TODO 21 | } 22 | else 23 | { 24 | double absoluteAccuracy = 0; 25 | if (queryLength <= 16) 26 | { 27 | absoluteAccuracy = 1E-43; 28 | } 29 | else if (queryLength <= 32) 30 | { 31 | absoluteAccuracy = 1E-36; 32 | } 33 | else if (queryLength <= 64) 34 | { 35 | absoluteAccuracy = 1E-32; 36 | } 37 | else 38 | { 39 | absoluteAccuracy = 1E-30; 40 | } 41 | optimizedM = new int[d.length]; 42 | LamdaFunction lamdaFunc = new LamdaFunction(m, d, f); 43 | BisectionSolver solver = new BisectionSolver(absoluteAccuracy); 44 | double lamda = solver.solve(20000, lamdaFunc, -1, 0); 45 | System.out.println("lamda: " + lamda); 46 | System.out.println(lamda); 47 | for (int i = 0; i < optimizedM.length; i++) 48 | { 49 | if (f[i] == 0 || d[i] == 0) 50 | { 51 | optimizedM[i] = 0; 52 | } 53 | else 54 | { 55 | optimizedM[i] = (int) ((double) d[i] / INVARIANT 56 | * Math.log(1.0D - (double) f[i] * PbfUtil2.INVARIANT / lamda / d[i])); 57 | } 58 | } 59 | } 60 | return optimizedM; 61 | } 62 | 63 | public static int[] getOptimizedKForBeta1(int[] m, int[] d, int kMax) 64 | { 65 | int[] optimizedK = null; 66 | if (m.length != d.length) 67 | { 68 | // TODO 69 | } 70 | else 71 | { 72 | optimizedK = new int[m.length]; 73 | for (int i = 0; i < optimizedK.length; i++) 74 | { 75 | if (m[i] == 0 || d[i] == 0) 76 | { 77 | optimizedK[i] = 0; 78 | } 79 | else 80 | { 81 | optimizedK[i] = (int) ((double) m[i] / d[i] * Math.log(2) + 1) + 1; 82 | if (kMax > 0 && optimizedK[i] > kMax) 83 | { 84 | optimizedK[i] = kMax; 85 | } 86 | } 87 | } 88 | } 89 | return optimizedK; 90 | } 91 | 92 | /** 93 | * Get the total number of levels. 94 | * 95 | * @param maxT 96 | * from 0 97 | * @return 98 | */ 99 | public static int getLevelNum(int maxT) 100 | { 101 | int result = 0; 102 | for (int i = 0; i < CommonConstants.g.length; i++) 103 | { 104 | if ((maxT + 1) <= CommonConstants.g[i]) 105 | { 106 | result = i + 1; 107 | break; 108 | } 109 | } 110 | return result; 111 | } 112 | 113 | /** 114 | * Merge the BitSets of two consecutive beta2 pbfs into one. 115 | * Note: the structures of the two pbfs should be the same, i.e., hash numbers and level numbers. 116 | * @param b1 117 | * @param b2 118 | * @return 119 | */ 120 | public static BitSet mergeBeta2(TemporalRangeBloomFilterV2 rbf1, TemporalRangeBloomFilterV2 rbf2) 121 | { 122 | BitSet bs1 = rbf1.getBitSet(); 123 | BitSet bs2 = rbf2.getBitSet(); 124 | int bitNumOfRbf1 = rbf1.getSize(); 125 | int bitNumOfRbf2 = rbf2.getSize(); 126 | int bitNum = MathUtil.gcd(bitNumOfRbf1, bitNumOfRbf2); 127 | BitSet bitSet = new BitSet(bitNum); 128 | for (int i = 0; i < bitNumOfRbf1; i++) 129 | { 130 | if (bs1.get(i) == true) 131 | { 132 | bitSet.set(i % bitNum); 133 | } 134 | } 135 | for (int i = 0; i < bitNumOfRbf2; i++) 136 | { 137 | if (bs2.get(i) == true) 138 | { 139 | bitSet.set(i % bitNum); 140 | } 141 | } 142 | return bitSet; 143 | } 144 | 145 | public static BitSet merge(BitSet bs1, int bitNumOfBs1, BitSet bs2, int bitNumOfBs2) 146 | { 147 | int bitNum = MathUtil.gcd(bitNumOfBs1, bitNumOfBs2); 148 | BitSet bitSet = new BitSet(bitNum); 149 | for (int i = 0; i < bitNumOfBs1; i++) 150 | { 151 | if (bs1.get(i) == true) 152 | { 153 | bitSet.set(i % bitNum); 154 | } 155 | } 156 | for (int i = 0; i < bitNumOfBs2; i++) 157 | { 158 | if (bs2.get(i) == true) 159 | { 160 | bitSet.set(i % bitNum); 161 | } 162 | } 163 | return bitSet; 164 | } 165 | 166 | public static void main(String[] args) 167 | { 168 | System.out.println(getLevelNum(127)); 169 | int m = 10000000; 170 | int[] d = {1000, 2000, 4000, 8000, 16000, 32000, 64000, 12800}; 171 | int[] f = {2, 2, 2, 2, 2, 2, 2, 1}; 172 | 173 | int[] mm = getOptimizedMForBeta1(m, d, f, 128); 174 | int sum = 0; 175 | 176 | for (int i = 0; i < mm.length; i++) 177 | { 178 | System.out.println(mm[i]); 179 | sum += mm[i]; 180 | } 181 | 182 | System.out.println(sum); 183 | 184 | } 185 | } 186 | 187 | //class LamdaFunction implements UnivariateFunction 188 | //{ 189 | // private int[] d; 190 | // private int[] f; 191 | // private long m; 192 | // 193 | // public LamdaFunction(long m, int[] d, int[] f) 194 | // { 195 | // this.m = m; 196 | // this.d = d; 197 | // this.f = f; 198 | // } 199 | // 200 | // public double value(double x) 201 | // { 202 | // double result = 0; 203 | // for (int i = 0; i < d.length; i++) 204 | // { 205 | // if (f[i] == 0 || d[i] == 0) 206 | // { 207 | // 208 | // } 209 | // else 210 | // { 211 | // result = result + (double) d[i] 212 | // * Math.log(1.0D - (double) f[i] * PbfUtil2.INVARIANT / x / d[i]); 213 | // } 214 | // } 215 | // result = result - (double) m * PbfUtil2.INVARIANT; 216 | // return result; 217 | // } 218 | //} -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/RandomGenerator.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.Random; 4 | 5 | public class RandomGenerator 6 | { 7 | public static String getRandomString(int length) 8 | { 9 | String base = "abcdefghijklmnopqrstuvwxyz"; 10 | Random random = new Random(); 11 | StringBuffer sb = new StringBuffer(); 12 | for (int i = 0; i < length; i++) 13 | { 14 | int number = random.nextInt(base.length()); 15 | sb.append(base.charAt(number)); 16 | } 17 | return sb.toString(); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/edu/ecnu/pbf/util/ResultUtil.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.StringTokenizer; 6 | 7 | public class ResultUtil 8 | { 9 | public static double handleResult(ArrayList resultArray, int start, int number) 10 | { 11 | double result = 0; 12 | Collections.sort(resultArray); 13 | int s = start > 0 ? start : 0; 14 | number = number > 0 ? number : 1; 15 | int e = s + number - 1; 16 | e = e < resultArray.size() ? e : resultArray.size() - 1; 17 | double sum = 0; 18 | for (int i = s; i <= e; i++) 19 | { 20 | sum = sum + resultArray.get(i); 21 | } 22 | result = sum / number; 23 | return result; 24 | } 25 | 26 | public static String handleDouble(double input, int decimalPlace) 27 | { 28 | String result = ""; 29 | StringTokenizer tokenizer = new StringTokenizer(String.valueOf(input), "."); 30 | if (tokenizer.hasMoreTokens()) 31 | { 32 | result = tokenizer.nextToken(); 33 | } 34 | if (tokenizer.hasMoreTokens()) 35 | { 36 | decimalPlace = decimalPlace > 0 ? decimalPlace : 0; 37 | String decimalStr = tokenizer.nextToken(); 38 | decimalPlace = decimalPlace <= decimalStr.length() ? decimalPlace : decimalStr.length(); 39 | result = result + "." + decimalStr.substring(0, decimalPlace); 40 | } 41 | return result; 42 | } 43 | 44 | public static void main(String[] args) 45 | { 46 | double a = 12.234567812; 47 | System.out.println(handleDouble(a, 0)); 48 | System.out.println(handleDouble(a, 1)); 49 | System.out.println(handleDouble(a, 2)); 50 | System.out.println(handleDouble(a, 3)); 51 | System.out.println(handleDouble(a, 4)); 52 | System.out.println(handleDouble(a, 8)); 53 | System.out.println(handleDouble(a, 9)); 54 | System.out.println(handleDouble(a, 10)); 55 | System.out.println(handleDouble(a, 20)); 56 | 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/edu/ecnu/pbf/AppTest.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/edu/ecnu/pbf/Beta1Test.java: -------------------------------------------------------------------------------- 1 | package edu.ecnu.pbf; 2 | 3 | public class Beta1Test 4 | { 5 | 6 | } 7 | --------------------------------------------------------------------------------