├── lib └── alpn-boot-8.1.7.v20160121.jar ├── .gitignore ├── .travis.yml ├── run_tests.sh ├── src ├── main │ └── java │ │ └── com │ │ └── google │ │ └── cloud │ │ └── genomics │ │ └── utils │ │ ├── grpc │ │ ├── VariantEmitterStrategy.java │ │ ├── VariantCallUtils.java │ │ ├── VariantMergeStrategy.java │ │ ├── Example.java │ │ ├── MergeNonVariantSegmentsWithVariants.java │ │ ├── MergeNonVariantSegmentsWithSnps.java │ │ ├── ReadStreamIterator.java │ │ ├── VariantStreamIterator.java │ │ ├── GenomicsChannel.java │ │ └── GenomicsStreamIterator.java │ │ ├── CallSetUtils.java │ │ ├── ShardBoundary.java │ │ ├── RetryPolicy.java │ │ ├── VariantUtils.java │ │ ├── Contig.java │ │ ├── OfflineAuth.java │ │ ├── CredentialFactory.java │ │ └── GenomicsUtils.java └── test │ ├── java │ └── com │ │ └── google │ │ └── cloud │ │ └── genomics │ │ └── utils │ │ ├── OfflineAuthITCase.java │ │ ├── grpc │ │ ├── VariantCallUtilsTest.java │ │ ├── VariantMergeStrategyTestHelper.java │ │ ├── FaultyGenomicsServerITLongCase.java │ │ ├── MergeNonVariantSegmentsWithSnpsTest.java │ │ ├── MergeNonVariantSegmentsWithVariantsTest.java │ │ ├── MergeNonVariantSegmentsWithSnpsITCase.java │ │ ├── MergeAllVariantsAtSameSiteITCase.java │ │ ├── MergeNonVariantSegmentsWithVariantsITCase.java │ │ ├── ReadStreamIteratorITCase.java │ │ ├── TestHelper.java │ │ ├── FaultyGenomicsServerITCase.java │ │ ├── VariantStreamIteratorITCase.java │ │ ├── GenomicsStreamIteratorTest.java │ │ └── ReadUtilsTest.java │ │ ├── TestHelper.java │ │ ├── CallSetUtilsTest.java │ │ ├── OfflineAuthTest.java │ │ ├── GenomicsUtilsITCase.java │ │ ├── VariantUtilsTest.java │ │ ├── IntegrationTestHelper.java │ │ ├── ShardBoundaryTest.java │ │ ├── GenomicsFactoryITCase.java │ │ ├── GenomicsFactoryTest.java │ │ ├── ReadUtilsTest.java │ │ └── ContigTest.java │ └── resources │ └── com │ └── google │ └── cloud │ └── genomics │ └── utils │ └── conversion_test.sam ├── CONTRIBUTING.rst └── README.md /lib/alpn-boot-8.1.7.v20160121.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlegenomics/utils-java/master/lib/alpn-boot-8.1.7.v20160121.jar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | *.iml 4 | *.releaseBackup 5 | release.properties 6 | settings.xml 7 | .project 8 | .classpath 9 | .settings 10 | 11 | .Rproj.user 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk8 5 | - openjdk8 6 | 7 | install: /bin/true # Don't run "gradle assemble" (which is the default) 8 | 9 | script: mvn test javadoc:javadoc 10 | 11 | after_success: 12 | - mvn clean cobertura:cobertura -Dcobertura.report.format=xml coveralls:cobertura 13 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2017 Google Inc. 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 6 | # in compliance with the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software distributed under the License 11 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing permissions and limitations under 13 | # the License. 14 | 15 | set -o nounset 16 | set -o errexit 17 | 18 | # Check that required variables are explicitly set. 19 | GOOGLE_API_KEY="$GOOGLE_API_KEY" 20 | GOOGLE_APPLICATION_CREDENTIALS="$GOOGLE_APPLICATION_CREDENTIALS" 21 | TEST_PROJECT="$TEST_PROJECT" 22 | 23 | echo -e "\n\n\nRunning unit and integration tests." 24 | mvn test javadoc:javadoc verify 25 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/VariantEmitterStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.genomics.v1.Variant; 17 | 18 | /** 19 | * Strategy pattern interface for returning variant results to the underlying computational framework. 20 | * 21 | */ 22 | public interface VariantEmitterStrategy { 23 | 24 | /** 25 | * Given a variant, emit it using the implementation of the underlying computational framework. 26 | * @param variant 27 | */ 28 | public void emit(Variant variant); 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/VariantCallUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.common.base.Function; 17 | import com.google.common.collect.Ordering; 18 | import com.google.genomics.v1.VariantCall; 19 | 20 | import java.util.Comparator; 21 | 22 | /** 23 | * Predicates and comparators for variant calls. 24 | * 25 | */ 26 | public class VariantCallUtils { 27 | 28 | /** 29 | * Comparator for sorting calls by call set name. 30 | */ 31 | public static final Comparator CALL_COMPARATOR = Ordering.natural().onResultOf( 32 | new Function() { 33 | @Override 34 | public String apply(VariantCall call) { 35 | return call.getCallSetName(); 36 | } 37 | }); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/VariantMergeStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.genomics.v1.Variant; 17 | 18 | /** 19 | * Strategy pattern interface for variant merging logic. 20 | */ 21 | public interface VariantMergeStrategy { 22 | 23 | /** 24 | * Given a collection of variants and non-variant segments that overlap a genomic region, 25 | * emit their merged representation via the emitter. 26 | * 27 | * @param windowStart - use this to identify records that begin prior to the region we are computing, but overlap it 28 | * @param variants - the variants that overlap the region we are computing 29 | * @param emitter - the strategy instance to use to emit results 30 | */ 31 | public void merge(Long windowStart, Iterable variants, VariantEmitterStrategy emitter); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/OfflineAuthITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertNull; 21 | 22 | import org.junit.Test; 23 | import org.junit.runner.RunWith; 24 | import org.junit.runners.JUnit4; 25 | 26 | @RunWith(JUnit4.class) 27 | public class OfflineAuthITCase { 28 | 29 | @Test 30 | public void testOfflineAuthFromApplicationDefaultCredential() throws Exception { 31 | OfflineAuth auth = new OfflineAuth(); 32 | assertFalse(auth.hasApiKey()); 33 | assertFalse(auth.hasStoredCredential()); 34 | assertNotNull(auth.getCredential()); 35 | assertNotNull(auth.getCredentials()); 36 | assertNull(auth.getClientId()); 37 | assertNull(auth.getClientSecret()); 38 | assertNull(auth.getRefreshToken()); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/VariantCallUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertTrue; 17 | 18 | import com.google.genomics.v1.VariantCall; 19 | 20 | import org.junit.Test; 21 | import org.junit.runner.RunWith; 22 | import org.junit.runners.JUnit4; 23 | 24 | @RunWith(JUnit4.class) 25 | public class VariantCallUtilsTest { 26 | 27 | @Test 28 | public void testCallComparator() { 29 | assertTrue(0 == VariantCallUtils.CALL_COMPARATOR.compare( 30 | VariantCall.newBuilder().setCallSetName("NA12883").build(), 31 | VariantCall.newBuilder().setCallSetName("NA12883").build())); 32 | 33 | assertTrue(0 > VariantCallUtils.CALL_COMPARATOR.compare( 34 | VariantCall.newBuilder().setCallSetName("NA12883").build(), 35 | VariantCall.newBuilder().setCallSetName("NA12884").build())); 36 | 37 | assertTrue(0 < VariantCallUtils.CALL_COMPARATOR.compare( 38 | VariantCall.newBuilder().setCallSetName("NA12884").build(), 39 | VariantCall.newBuilder().setCallSetName("NA12883").build())); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/TestHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import com.google.api.services.genomics.model.Variant; 17 | import com.google.api.services.genomics.model.VariantCall; 18 | 19 | import java.util.Arrays; 20 | import java.util.List; 21 | 22 | public class TestHelper { 23 | 24 | public static VariantCall makeCall(String name, Integer... alleles) { 25 | return new VariantCall().setCallSetName(name).setGenotype(Arrays.asList(alleles)); 26 | } 27 | 28 | public static Variant makeSimpleVariant(VariantCall... calls) { 29 | return new Variant().setCalls(Arrays.asList(calls)); 30 | } 31 | 32 | public static Variant makeVariant(String referenceName, long start, long end, 33 | String referenceBases, List alternateBases, VariantCall... calls) { 34 | Variant variant = 35 | new Variant().setReferenceName(referenceName).setStart(start).setEnd(end) 36 | .setReferenceBases(referenceBases).setAlternateBases(alternateBases); 37 | if (null != calls) { 38 | variant.setCalls(Arrays.asList(calls)); 39 | } 40 | return variant; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/CallSetUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import com.google.api.services.genomics.model.CallSet; 19 | import com.google.common.collect.BiMap; 20 | 21 | import org.junit.Rule; 22 | import org.junit.Test; 23 | import org.junit.rules.ExpectedException; 24 | import org.junit.runner.RunWith; 25 | import org.junit.runners.JUnit4; 26 | 27 | import java.util.Arrays; 28 | import java.util.List; 29 | 30 | @RunWith(JUnit4.class) 31 | public class CallSetUtilsTest { 32 | 33 | @Rule 34 | public ExpectedException thrown = ExpectedException.none(); 35 | 36 | @Test 37 | public void testGetCallSetNameMapping() { 38 | thrown.expect(IllegalArgumentException.class); 39 | thrown.expectMessage("value already present: duplicate"); 40 | 41 | List callSets = Arrays.asList( 42 | new CallSet().setName("unique").setId("123-0"), 43 | new CallSet().setName("duplicate").setId("123-1"), 44 | new CallSet().setName("duplicate").setId("123-2") 45 | ); 46 | BiMap namesToIds = CallSetUtils.getCallSetNameMapping(callSets); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/VariantMergeStrategyTestHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.google.genomics.v1.Variant; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | public class VariantMergeStrategyTestHelper { 24 | 25 | public static class AccumulatingVariantEmitter implements VariantEmitterStrategy { 26 | private List results = new ArrayList(); 27 | 28 | @Override 29 | public void emit(Variant variant) { 30 | results.add(variant); 31 | } 32 | 33 | public List getVariants() { 34 | return results; 35 | } 36 | } 37 | 38 | public static void mergeTest(Long windowStart, List input, List expectedOutput, 39 | Class clazz) throws InstantiationException, IllegalAccessException { 40 | VariantMergeStrategy merger = clazz.newInstance(); 41 | VariantMergeStrategyTestHelper.AccumulatingVariantEmitter emitter = 42 | new VariantMergeStrategyTestHelper.AccumulatingVariantEmitter(); 43 | 44 | merger.merge(windowStart, input, emitter); 45 | List output = emitter.getVariants(); 46 | assertEquals(expectedOutput.size(), output.size()); 47 | 48 | for (int i = 0; i < expectedOutput.size(); i++) { 49 | assertEquals(expectedOutput.get(i), output.get(i)); 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/Example.java: -------------------------------------------------------------------------------- 1 | package com.google.cloud.genomics.utils.grpc; 2 | 3 | import com.google.genomics.v1.ReferenceServiceV1Grpc; 4 | import com.google.genomics.v1.ReferenceServiceV1Grpc.ReferenceServiceV1BlockingStub; 5 | import com.google.genomics.v1.ReferenceSet; 6 | import com.google.genomics.v1.SearchReferenceSetsRequest; 7 | import com.google.genomics.v1.SearchReferenceSetsResponse; 8 | import com.google.genomics.v1.StreamVariantsRequest; 9 | import com.google.genomics.v1.StreamVariantsResponse; 10 | import com.google.genomics.v1.StreamingVariantServiceGrpc; 11 | import com.google.genomics.v1.StreamingVariantServiceGrpc.StreamingVariantServiceBlockingStub; 12 | 13 | import io.grpc.ManagedChannel; 14 | 15 | import java.util.Iterator; 16 | 17 | public class Example { 18 | 19 | public static void main(String[] args) throws Exception { 20 | ManagedChannel channel = GenomicsChannel.fromDefaultCreds(); 21 | 22 | // Regular RPC example: list all reference set assembly ids. 23 | ReferenceServiceV1BlockingStub refStub = 24 | ReferenceServiceV1Grpc.newBlockingStub(channel); 25 | SearchReferenceSetsRequest request = 26 | SearchReferenceSetsRequest.newBuilder().build(); 27 | SearchReferenceSetsResponse response = refStub.searchReferenceSets(request); 28 | for (ReferenceSet rs : response.getReferenceSetsList()) { 29 | System.out.println(rs.getAssemblyId()); 30 | } 31 | 32 | // Streaming RPC example: request the variants within BRCA1 for the Platinum Genomes variant set. 33 | StreamingVariantServiceBlockingStub varStub = 34 | StreamingVariantServiceGrpc.newBlockingStub(channel); 35 | StreamVariantsRequest varRequest = StreamVariantsRequest.newBuilder() 36 | .setVariantSetId("3049512673186936334") 37 | .setReferenceName("chr17") 38 | .setStart(41196311) 39 | .setEnd(41277499) 40 | .build(); 41 | 42 | try { 43 | Iterator iter = varStub.streamVariants(varRequest); 44 | while (iter.hasNext()) { 45 | StreamVariantsResponse varResponse = iter.next(); 46 | System.out.println("Response:"); 47 | System.out.println(varResponse.toString()); 48 | System.out.println(); 49 | } 50 | System.out.println("Done"); 51 | } finally { 52 | channel.shutdownNow(); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/CallSetUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import com.google.api.services.genomics.model.CallSet; 19 | import com.google.common.base.Function; 20 | import com.google.common.collect.BiMap; 21 | import com.google.common.collect.HashBiMap; 22 | 23 | /** 24 | * A collection of utility methods for working with call sets. 25 | * 26 | */ 27 | public class CallSetUtils { 28 | 29 | /** 30 | * Given a collection of callsets, return all the names. 31 | */ 32 | public static final Function GET_NAMES = new Function() { 33 | @Override 34 | public String apply(CallSet c) { 35 | return c.getName(); 36 | } 37 | }; 38 | 39 | /** 40 | * Given a collection of callsets, return all the ids. 41 | */ 42 | public static final Function GET_IDS = new Function() { 43 | @Override 44 | public String apply(CallSet c) { 45 | return c.getId(); 46 | } 47 | }; 48 | 49 | /** 50 | * Create a bi-directional map of names to ids for a collection of callsets. 51 | * 52 | * As a side effect, this will throw an IllegalArgumentException when the collection of callsets 53 | * is malformed due to multiple is mapping to the same name. 54 | * 55 | * @param callSets 56 | * @return the bi-directional map 57 | */ 58 | public static final BiMap getCallSetNameMapping(Iterable callSets) { 59 | BiMap idToName = HashBiMap.create(); 60 | for(CallSet callSet : callSets) { 61 | // Dev Note: Be sure to keep this map loading as id -> name since it ensures that 62 | // the values are unique. 63 | idToName.put(callSet.getId(), callSet.getName()); 64 | } 65 | return idToName.inverse(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/FaultyGenomicsServerITLongCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.cloud.genomics.utils.ShardBoundary; 17 | import com.google.cloud.genomics.utils.ShardUtils; 18 | import com.google.common.collect.ImmutableList; 19 | import com.google.genomics.v1.StreamVariantsRequest; 20 | 21 | import org.junit.Test; 22 | import org.junit.runner.RunWith; 23 | import org.junit.runners.JUnit4; 24 | 25 | import java.io.IOException; 26 | import java.security.GeneralSecurityException; 27 | 28 | /** 29 | * This is a long-running test (~20 minutes) and will not be run by either surefire or failsafe by 30 | * default. 31 | * 32 | * To run it: mvn -Dit.test=FaultyGenomicsServerITLongCase verify 33 | * 34 | */ 35 | @RunWith(JUnit4.class) 36 | public class FaultyGenomicsServerITLongCase extends FaultyGenomicsServerITCase { 37 | 38 | // Create one long stream. 39 | private static final ImmutableList requests = ShardUtils.getVariantRequests( 40 | PROTOTYPE, 1000000000L, "chrY:0:60032946"); 41 | private static final int EXPECTED_CHRY_NUM_VARIANTS = 5971309; 42 | 43 | @Test 44 | public void testOnePercentVariantFaults() throws IOException, GeneralSecurityException { 45 | VariantStreamIterator iter = 46 | VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 47 | ShardBoundary.Requirement.STRICT, null); 48 | runRetryTest(iter, 0.01, EXPECTED_CHRY_NUM_VARIANTS); 49 | } 50 | 51 | @Test 52 | public void testFivePercentVariantFaults() throws IOException, GeneralSecurityException { 53 | VariantStreamIterator iter = 54 | VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 55 | ShardBoundary.Requirement.STRICT, null); 56 | runRetryTest(iter, 0.05, EXPECTED_CHRY_NUM_VARIANTS); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithSnpsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.genomics.v1.Variant; 17 | 18 | import org.junit.Test; 19 | import org.junit.runner.RunWith; 20 | import org.junit.runners.JUnit4; 21 | 22 | import java.util.Arrays; 23 | 24 | @RunWith(JUnit4.class) 25 | public class MergeNonVariantSegmentsWithSnpsTest { 26 | 27 | @Test 28 | public void mergeVariants() throws Exception { 29 | Variant snp1 = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("C"), "het-RA", "hom-AA").build(); 30 | Variant snp2 = TestHelper.makeVariant("chr7", 200019, "T", Arrays.asList("G"), "het-RA", "hom-AA").build(); 31 | Variant insert = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("AC"), "het-RA", "hom-AA").build(); 32 | Variant blockRecord1 = TestHelper.makeBlockRecord("chr7", 199005, 202050, "A", TestHelper.EMPTY_ALT_LIST).build(); 33 | Variant blockRecord2 = TestHelper.makeBlockRecord("chr7", 200011, 202020, "C", Arrays.asList(VariantUtils.GATK_NON_VARIANT_SEGMENT_ALT)).build(); 34 | 35 | Variant expectedSnp1 = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("C")) 36 | .addCalls(TestHelper.makeCall("het-RA-[C]", 0,1)) 37 | .addCalls(TestHelper.makeCall("hom-AA-[C]", 1,1)) 38 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 39 | .build(); 40 | 41 | Variant expectedSnp2 = TestHelper.makeVariant("chr7", 200019, "T", Arrays.asList("G")) 42 | .addCalls(TestHelper.makeCall("het-RA-[G]", 0,1)) 43 | .addCalls(TestHelper.makeCall("hom-AA-[G]", 1,1)) 44 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 45 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 46 | .build(); 47 | 48 | Variant expectedInsert = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("AC")) 49 | .addCalls(TestHelper.makeCall("het-RA-[AC]", 0,1)) 50 | .addCalls(TestHelper.makeCall("hom-AA-[AC]", 1,1)) 51 | .build(); 52 | 53 | VariantMergeStrategyTestHelper.mergeTest(200000L, Arrays.asList(snp1, snp2, insert, blockRecord1, blockRecord2), 54 | Arrays.asList(expectedInsert, expectedSnp1, expectedSnp2), 55 | MergeNonVariantSegmentsWithSnps.class); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithVariantsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.genomics.v1.Variant; 17 | 18 | import org.junit.Test; 19 | import org.junit.runner.RunWith; 20 | import org.junit.runners.JUnit4; 21 | 22 | import java.util.Arrays; 23 | 24 | @RunWith(JUnit4.class) 25 | public class MergeNonVariantSegmentsWithVariantsTest { 26 | 27 | @Test 28 | public void mergeVariants() throws Exception { 29 | Variant snp1 = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("C"), "het-RA", "hom-AA").build(); 30 | Variant snp2 = TestHelper.makeVariant("chr7", 200019, "T", Arrays.asList("G"), "het-RA", "hom-AA").build(); 31 | Variant insert = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("AC"), "het-RA", "hom-AA").build(); 32 | Variant blockRecord1 = TestHelper.makeBlockRecord("chr7", 199005, 202050, "A", TestHelper.EMPTY_ALT_LIST).build(); 33 | Variant blockRecord2 = TestHelper.makeBlockRecord("chr7", 200011, 202020, "C", Arrays.asList(VariantUtils.GATK_NON_VARIANT_SEGMENT_ALT)).build(); 34 | 35 | Variant expectedSnp1 = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("C")) 36 | .addCalls(TestHelper.makeCall("het-RA-[C]", 0,1)) 37 | .addCalls(TestHelper.makeCall("hom-AA-[C]", 1,1)) 38 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 39 | .build(); 40 | 41 | Variant expectedSnp2 = TestHelper.makeVariant("chr7", 200019, "T", Arrays.asList("G")) 42 | .addCalls(TestHelper.makeCall("het-RA-[G]", 0,1)) 43 | .addCalls(TestHelper.makeCall("hom-AA-[G]", 1,1)) 44 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 45 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 46 | .build(); 47 | 48 | Variant expectedInsert = TestHelper.makeVariant("chr7", 200010, "A", Arrays.asList("AC")) 49 | .addCalls(TestHelper.makeCall("het-RA-[AC]", 0,1)) 50 | .addCalls(TestHelper.makeCall("hom-AA-[AC]", 1,1)) 51 | .addCalls(TestHelper.makeCall("hom-RR-[]", 0,0)) 52 | .build(); 53 | 54 | VariantMergeStrategyTestHelper.mergeTest(200000L, Arrays.asList(snp1, snp2, insert, blockRecord1, blockRecord2), 55 | Arrays.asList(expectedInsert, expectedSnp1, expectedSnp2), 56 | MergeNonVariantSegmentsWithVariants.class); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/OfflineAuthTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertNull; 21 | import static org.junit.Assert.assertThat; 22 | import static org.junit.Assert.assertTrue; 23 | 24 | import com.google.api.client.auth.oauth2.Credential; 25 | import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; 26 | import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; 27 | import com.google.api.client.json.jackson2.JacksonFactory; 28 | 29 | import org.hamcrest.CoreMatchers; 30 | import org.junit.Test; 31 | import org.junit.runner.RunWith; 32 | import org.junit.runners.JUnit4; 33 | 34 | import java.io.ByteArrayOutputStream; 35 | import java.io.ObjectOutputStream; 36 | 37 | @RunWith(JUnit4.class) 38 | public class OfflineAuthTest { 39 | 40 | @Test 41 | public void testOfflineAuthFromApiKey() throws Exception { 42 | OfflineAuth auth = new OfflineAuth("xyz"); 43 | assertTrue(auth.hasApiKey()); 44 | assertFalse(auth.hasStoredCredential()); 45 | assertNull(auth.getClientId()); 46 | assertNull(auth.getClientSecret()); 47 | assertNull(auth.getRefreshToken()); 48 | 49 | // Depending upon the environment in which these unit tests are run, the 50 | // Application Default Credentials may or may not be available. 51 | try { 52 | Credential cred = auth.getCredential(); 53 | assertNotNull(cred); 54 | assertNotNull(auth.getCredentials()); 55 | } catch (Exception e) { 56 | assertThat(e.getMessage(), 57 | CoreMatchers.containsString("Unable to get application default credentials.")); 58 | } 59 | } 60 | 61 | @Test 62 | public void testOfflineAuth_apiKeyIsSerializable() throws Exception { 63 | OfflineAuth auth = new OfflineAuth("xyz"); 64 | 65 | // This mimics the serialization flow used by Dataflow pipelines. 66 | ObjectOutputStream oos = new ObjectOutputStream(new ByteArrayOutputStream()); 67 | oos.writeObject(auth); 68 | } 69 | 70 | @Test 71 | public void testOfflineAuth_credentialIsSerializable() throws Exception { 72 | Credential credential = new GoogleCredential.Builder() 73 | .setJsonFactory(JacksonFactory.getDefaultInstance()) 74 | .setTransport(GoogleNetHttpTransport.newTrustedTransport()) 75 | .setClientSecrets("theClientId", "theClientSecret") 76 | .build() 77 | .setRefreshToken("theRefreshToken"); 78 | OfflineAuth auth = new OfflineAuth(credential); 79 | 80 | // This mimics the serialization flow used by Dataflow pipelines. 81 | ObjectOutputStream oos = new ObjectOutputStream(new ByteArrayOutputStream()); 82 | oos.writeObject(auth); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithVariants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.common.collect.Lists; 17 | import com.google.genomics.v1.Variant; 18 | import com.google.genomics.v1.Variant.Builder; 19 | 20 | import java.util.Collections; 21 | import java.util.Iterator; 22 | import java.util.LinkedList; 23 | import java.util.List; 24 | 25 | /** 26 | * This strategy converts data with non-variant segments (such as data that was in 27 | * source format Genome VCF (gVCF) or Complete Genomics) to variant-only data occurring 28 | * within the genomic window with calls from non-variant-segments merged into the 29 | * variants with which they overlap. 30 | * 31 | * Dealing with ambiguous data: 32 | * If a particular sample has both a variant and one or more non-variant segments that overlap it, 33 | * all ambiguous calls are returned to be disambiguated or flagged further downstream. 34 | */ 35 | public class MergeNonVariantSegmentsWithVariants implements VariantMergeStrategy { 36 | 37 | @Override 38 | public void merge(Long windowStart, Iterable variants, VariantEmitterStrategy emitter) { 39 | // The sort order is critical here so that candidate overlapping reference matching blocks 40 | // occur prior to any variants they may overlap. 41 | List records = Lists.newArrayList(variants); // Get a modifiable list. 42 | Collections.sort(records, VariantUtils.NON_VARIANT_SEGMENT_COMPARATOR); 43 | 44 | // The upper bound on potential overlaps is the sample size plus the number of 45 | // block records that occur between actual variants. 46 | List blockRecords = new LinkedList<>(); 47 | 48 | for (Variant record : records) { 49 | if (!VariantUtils.IS_NON_VARIANT_SEGMENT.apply(record)) { 50 | if (record.getStart() < windowStart) { 51 | // This is a variant that begins before our window. Skip it. 52 | continue; 53 | } 54 | Builder updatedRecord = Variant.newBuilder(record); 55 | for (Iterator iterator = blockRecords.iterator(); iterator.hasNext();) { 56 | Variant blockRecord = iterator.next(); 57 | if (VariantUtils.isOverlapping(blockRecord, record)) { 58 | updatedRecord.addAllCalls(blockRecord.getCallsList()); 59 | } else { 60 | // Remove the current element from the iterator and the list since it is 61 | // left of the genomic region we are currently working on due to our sort. 62 | iterator.remove(); 63 | } 64 | } 65 | // Emit this variant and move on (no need to hang onto it in memory). 66 | emitter.emit(updatedRecord.build()); 67 | } else { 68 | blockRecords.add(record); 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | How to contribute 2 | =================================== 3 | 4 | First of all, thank you for contributing! 5 | 6 | The mailing list 7 | ---------------- 8 | 9 | For general questions or if you are having trouble getting started, try the 10 | `Google Genomics Discuss mailing list `_. 11 | It's a good way to sync up with other people who use googlegenomics including the core developers. You can subscribe 12 | by sending an email to ``google-genomics-discuss+subscribe@googlegroups.com`` or just post using 13 | the `web forum page `_. 14 | 15 | Local development 16 | ----------------- 17 | 18 | With Maven you can locally install a SNAPSHOT version of the code, to use from other projects 19 | directly without having to wait for the Maven repository. Use: 20 | 21 | ```` 22 | mvn install 23 | ```` 24 | 25 | to run the full tests and do a local install. You can also use 26 | 27 | ```` 28 | mvn install -DskipITs 29 | ```` 30 | 31 | to run only the unit tests and do a local install. This is faster. 32 | 33 | 34 | Submitting issues 35 | ----------------- 36 | 37 | If you are encountering a bug in the code or have a feature request in mind - file away! 38 | 39 | 40 | Submitting a pull request 41 | ------------------------- 42 | 43 | If you are ready to contribute code, Github provides a nice `overview on how to create a pull request 44 | `_. 45 | 46 | Some general rules to follow: 47 | 48 | * Do your work in `a fork `_ of this repo. 49 | * Create a branch for each update that you're working on. 50 | These branches are often called "feature" or "topic" branches. Any changes 51 | that you push to your feature branch will automatically be shown in the pull request. 52 | * Keep your pull requests as small as possible. Large pull requests are hard to review. 53 | Try to break up your changes into self-contained and incremental pull requests. 54 | * The first line of commit messages should be a short (<80 character) summary, 55 | followed by an empty line and then any details that you want to share about the commit. 56 | * Please try to follow the existing syntax style 57 | 58 | When you submit or change your pull request, the Travis build system will automatically run tests. 59 | If your pull request fails to pass tests, review the test log, make changes and 60 | then push them to your feature branch to be tested again. 61 | 62 | 63 | Contributor License Agreements 64 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 65 | 66 | All pull requests are welcome. Before we can submit them though, there is a legal hurdle we have to jump. 67 | You'll need to fill out either the individual or corporate Contributor License Agreement 68 | (CLA). 69 | 70 | * If you are an individual writing original source code and you're sure you 71 | own the intellectual property, then you'll need to sign an `individual CLA 72 | `_. 73 | * If you work for a company that wants to allow you to contribute your work, 74 | then you'll need to sign a `corporate CLA 75 | `_. 76 | 77 | Follow either of the two links above to access the appropriate CLA and 78 | instructions for how to sign and return it. Once we receive it, we'll be able to 79 | accept your pull requests. 80 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithSnps.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.common.collect.Lists; 17 | import com.google.genomics.v1.Variant; 18 | import com.google.genomics.v1.Variant.Builder; 19 | 20 | import java.util.Collections; 21 | import java.util.Iterator; 22 | import java.util.LinkedList; 23 | import java.util.List; 24 | 25 | /** 26 | * This strategy converts data with non-variant segments (such as data that was in 27 | * source format Genome VCF (gVCF) or Complete Genomics) to variant-only data occurring 28 | * within the genomic window with calls from non-variant-segments merged into the SNPs 29 | * with which they overlap. Indels and structural variants are left as-is. 30 | * 31 | * Dealing with ambiguous data: 32 | * If a particular sample has both a SNP and one or more non-variant segments that overlap it, 33 | * all ambiguous calls are returned to be disambiguated or flagged further downstream. 34 | */ 35 | public class MergeNonVariantSegmentsWithSnps implements VariantMergeStrategy { 36 | 37 | @Override 38 | public void merge(Long windowStart, Iterable variants, VariantEmitterStrategy emitter) { 39 | // The sort order is critical here so that candidate overlapping reference matching blocks 40 | // occur prior to any variants they may overlap. 41 | List records = Lists.newArrayList(variants); // Get a modifiable list. 42 | Collections.sort(records, VariantUtils.NON_VARIANT_SEGMENT_COMPARATOR); 43 | 44 | // The upper bound on potential overlaps is the sample size plus the number of 45 | // block records that occur between actual variants. 46 | List blockRecords = new LinkedList<>(); 47 | 48 | for (Variant record : records) { 49 | if (!VariantUtils.IS_NON_VARIANT_SEGMENT.apply(record)) { 50 | if (record.getStart() < windowStart) { 51 | // This is a variant that begins before our window. Skip it. 52 | continue; 53 | } 54 | Builder updatedRecord = Variant.newBuilder(record); 55 | if (VariantUtils.IS_SNP.apply(record)) { 56 | for (Iterator iterator = blockRecords.iterator(); iterator.hasNext();) { 57 | Variant blockRecord = iterator.next(); 58 | if (VariantUtils.isOverlapping(blockRecord, record)) { 59 | updatedRecord.addAllCalls(blockRecord.getCallsList()); 60 | } else { 61 | // Remove the current element from the iterator and the list since it is 62 | // left of the genomic region we are currently working on due to our sort. 63 | iterator.remove(); 64 | } 65 | } 66 | } 67 | // Emit this variant and move on (no need to hang onto it in memory). 68 | emitter.emit(updatedRecord.build()); 69 | } else { 70 | blockRecords.add(record); 71 | } 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/ShardBoundary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import com.google.api.client.util.Strings; 17 | import com.google.common.base.Preconditions; 18 | import com.google.common.base.Predicate; 19 | import com.google.genomics.v1.Read; 20 | import com.google.genomics.v1.Variant; 21 | 22 | import java.util.regex.Pattern; 23 | 24 | /** 25 | * By default cluster compute jobs working with sharded data from the Genomics API will 26 | * see any records that span a shard boundary in both shards. In some cases this is 27 | * desired; in others it is not. It just depends upon the particular analysis. 28 | */ 29 | public class ShardBoundary { 30 | 31 | /** 32 | * Enum for shard boundary requirement. 33 | */ 34 | public enum Requirement { 35 | /** 36 | * Use OVERLAPS if data overlapping the start of the shard should be returned. 37 | */ 38 | OVERLAPS, 39 | /** 40 | * Use STRICT if data overlapping the start of the shard should be excluded. 41 | */ 42 | STRICT, 43 | } 44 | 45 | private static final Pattern READ_FIELD_PATTERN = Pattern.compile(".*\\p{Punct}alignment\\p{Punct}.*"); 46 | private static final Pattern VARIANT_FIELD_PATTERN = Pattern.compile(".*\\p{Punct}start\\p{Punct}.*"); 47 | 48 | /** 49 | * Predicate expressing the logic for which variants should and should not be included in the shard. 50 | * 51 | * @param start The start position of the shard. 52 | * @return Whether the variant would be included in a strict shard boundary. 53 | */ 54 | public static Predicate getStrictVariantPredicate(final long start, String fields) { 55 | Preconditions 56 | .checkArgument(Strings.isNullOrEmpty(fields) 57 | || VARIANT_FIELD_PATTERN.matcher(fields).matches(), 58 | "Insufficient fields requested in partial response. At a minimum " 59 | + "include 'variants(start)' to enforce a strict shard boundary."); 60 | return new Predicate() { 61 | @Override 62 | public boolean apply(Variant variant) { 63 | return variant.getStart() >= start; 64 | } 65 | }; 66 | } 67 | 68 | /** 69 | * Predicate expressing the logic for which reads should and should not be included in the shard. 70 | * 71 | * @param start The start position of the shard. 72 | * @return Whether the read would be included in a strict shard boundary. 73 | */ 74 | public static Predicate getStrictReadPredicate(final long start, final String fields) { 75 | Preconditions 76 | .checkArgument(Strings.isNullOrEmpty(fields) 77 | || READ_FIELD_PATTERN.matcher(fields).matches(), 78 | "Insufficient fields requested in partial response. At a minimum " 79 | + "include 'alignments(alignment)' to enforce a strict shard boundary."); 80 | return new Predicate() { 81 | @Override 82 | public boolean apply(Read read) { 83 | return read.getAlignment().getPosition().getPosition() >= start; 84 | } 85 | }; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/GenomicsUtilsITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.hamcrest.MatcherAssert.assertThat; 19 | import static org.junit.Assert.assertEquals; 20 | 21 | import com.google.common.collect.Iterables; 22 | 23 | import org.hamcrest.CoreMatchers; 24 | import org.hamcrest.collection.IsIterableContainingInOrder; 25 | import org.junit.Test; 26 | import org.junit.runner.RunWith; 27 | import org.junit.runners.JUnit4; 28 | 29 | @RunWith(JUnit4.class) 30 | public class GenomicsUtilsITCase { 31 | 32 | @Test 33 | public void testGetReadGroupSetIds() throws Exception { 34 | assertThat(GenomicsUtils.getReadGroupSetIds(IntegrationTestHelper.PLATINUM_GENOMES_DATASET, 35 | IntegrationTestHelper.getAuthFromApiKey()), 36 | CoreMatchers.allOf(CoreMatchers.hasItems(IntegrationTestHelper.PLATINUM_GENOMES_READGROUPSETS))); 37 | } 38 | 39 | @Test 40 | public void testGetReferenceSetIdForReadGroupSet() throws Exception { 41 | assertEquals(IntegrationTestHelper.PLATINUM_GENOMES_REFERENCE_SET_ID, 42 | GenomicsUtils.getReferenceSetId(IntegrationTestHelper.PLATINUM_GENOMES_READGROUPSETS[0], 43 | IntegrationTestHelper.getAuthFromApiKey())); 44 | } 45 | 46 | @Test 47 | public void testGetVariantSetIds() throws Exception { 48 | assertThat(GenomicsUtils.getVariantSetIds(IntegrationTestHelper.PLATINUM_GENOMES_DATASET, 49 | IntegrationTestHelper.getAuthFromApiKey()), 50 | CoreMatchers.allOf(CoreMatchers.hasItems(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET))); 51 | } 52 | 53 | @Test 54 | public void testGetCallSets() throws Exception { 55 | assertThat(Iterables.transform(GenomicsUtils.getCallSets(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET, 56 | IntegrationTestHelper.getAuthFromApiKey()), CallSetUtils.GET_NAMES), 57 | IsIterableContainingInOrder.contains(IntegrationTestHelper.PLATINUM_GENOMES_CALLSET_NAMES)); 58 | } 59 | 60 | @Test 61 | public void testGetCallSetsNames() throws Exception { 62 | assertThat(GenomicsUtils.getCallSetsNames(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET, 63 | IntegrationTestHelper.getAuthFromApiKey()), 64 | CoreMatchers.allOf(CoreMatchers.hasItems(IntegrationTestHelper.PLATINUM_GENOMES_CALLSET_NAMES))); 65 | } 66 | 67 | @Test 68 | public void testGetReferenceBounds() throws Exception { 69 | assertThat(GenomicsUtils.getReferenceBounds(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET, 70 | IntegrationTestHelper.getAuthFromApiKey()), 71 | CoreMatchers.allOf(CoreMatchers.hasItems(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET_BOUNDS))); 72 | } 73 | 74 | @Test 75 | public void testGetReferenceBoundsApplicationDefaultCredential() throws Exception { 76 | assertThat(GenomicsUtils.getReferenceBounds(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET, 77 | IntegrationTestHelper.getAuthFromApplicationDefaultCredential()), 78 | CoreMatchers.allOf(CoreMatchers.hasItems(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET_BOUNDS))); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/RetryPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import com.google.api.services.genomics.GenomicsRequest; 17 | 18 | import java.io.IOException; 19 | import java.io.Serializable; 20 | 21 | /** 22 | * An object describing when to retry if a request fails. 23 | */ 24 | public abstract class RetryPolicy implements Serializable { 25 | 26 | /** 27 | * An instance is instantiated each time a request is made to the API and is consulted only if 28 | * the request failed. 29 | */ 30 | public abstract class Instance { 31 | 32 | /** 33 | * Should we retry the request or not? 34 | */ 35 | public abstract boolean shouldRetry(GenomicsRequest request, IOException e); 36 | } 37 | 38 | public static RetryPolicy defaultPolicy() { 39 | return nAttempts(3); 40 | } 41 | 42 | public static RetryPolicy alwaysRetry() { 43 | return constant(true); 44 | } 45 | 46 | public static RetryPolicy neverRetry() { 47 | return constant(false); 48 | } 49 | 50 | private static RetryPolicy constant(final boolean retry) { 51 | return new RetryPolicy() { 52 | 53 | private final Instance instance = 54 | new Instance() { 55 | @Override public boolean shouldRetry(GenomicsRequest genomicsRequest, IOException e) { 56 | return retry; 57 | } 58 | }; 59 | 60 | @Override public Instance createInstance() { 61 | return instance; 62 | } 63 | }; 64 | } 65 | 66 | /** 67 | * Retry requests up to {@code n} times. 68 | */ 69 | public static RetryPolicy nAttempts(final int n) { 70 | return new RetryPolicy() { 71 | @Override public Instance createInstance() { 72 | return new Instance() { 73 | 74 | private int count = 0; 75 | 76 | @Override public boolean shouldRetry(GenomicsRequest genomicsRequest, IOException e) { 77 | return count++ < n; 78 | } 79 | }; 80 | } 81 | }; 82 | } 83 | 84 | public abstract Instance createInstance(); 85 | 86 | /** 87 | * Execute the given {@link GenomicsRequest} and return the response. 88 | * 89 | * This method begins by creating an {@code RetryPolicy.Instance} and then attempts to execute the 90 | * request by invoking {@link GenomicsRequest#execute}. If the request fails with an 91 | * {@link IOException}, the {@link RetryPolicy.Instance#shouldRetry} method is consulted to 92 | * determine if the request should be retried. 93 | * 94 | * @param request The {@code GenomicsRequest} to execute. 95 | * @return The response from executing the request. 96 | * @throws IOException if executing the request fails and {@code this} RetryPolicy decides not to 97 | * retry the request. 98 | */ 99 | public final , D> D execute(C request) throws IOException { 100 | for (Instance instance = createInstance(); true;) { 101 | try { 102 | return request.execute(); 103 | } catch (IOException e) { 104 | if (!instance.shouldRetry(request, e)) { 105 | throw e; 106 | } 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/VariantUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import static org.junit.Assert.assertFalse; 17 | import static org.junit.Assert.assertTrue; 18 | 19 | import com.google.api.services.genomics.model.VariantCall; 20 | 21 | import org.junit.Test; 22 | import org.junit.runner.RunWith; 23 | import org.junit.runners.JUnit4; 24 | 25 | import java.util.Arrays; 26 | import java.util.List; 27 | 28 | @RunWith(JUnit4.class) 29 | public class VariantUtilsTest { 30 | 31 | private List emptyAlt = Arrays.asList(); 32 | 33 | @Test 34 | public void testIsVariant() { 35 | // SNPs 36 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 37 | 200001, "A", Arrays.asList("C"), (VariantCall[]) null))); 38 | 39 | // Insertions 40 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 41 | 200001, "A", Arrays.asList("AC"), (VariantCall[]) null))); 42 | 43 | // Deletions NOTE: These are all the same mutation, just encoded in different ways. 44 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 45 | 200001, "CAG", Arrays.asList("C"), (VariantCall[]) null))); 46 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 47 | 200001, "AG", emptyAlt, (VariantCall[]) null))); 48 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 49 | 200001, "AG", null, (VariantCall[]) null))); 50 | 51 | // Multi-allelic sites 52 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 53 | 200001, "A", Arrays.asList("C", "AC"), (VariantCall[]) null))); 54 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 55 | 200001, "A", Arrays.asList("C", "G"), (VariantCall[]) null))); 56 | 57 | // Non-Variant Block Records 58 | assertTrue(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 59 | 200001, "A", emptyAlt, (VariantCall[]) null))); 60 | assertTrue(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 61 | 200001, "A", null, (VariantCall[]) null))); 62 | assertTrue(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 63 | 200001, "A", Arrays.asList(VariantUtils.GATK_NON_VARIANT_SEGMENT_ALT), (VariantCall[]) null))); 64 | 65 | // A variant with a alternate. 66 | assertFalse(VariantUtils.IS_NON_VARIANT_SEGMENT.apply(TestHelper.makeVariant("chr7", 200000, 67 | 200001, "A", Arrays.asList("T", VariantUtils.GATK_NON_VARIANT_SEGMENT_ALT), (VariantCall[]) null))); 68 | } 69 | 70 | @Test 71 | public void testIsSNP() { 72 | assertTrue(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "A", 73 | Arrays.asList("C"), (VariantCall[]) null))); 74 | // Deletion 75 | assertFalse(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "CA", 76 | Arrays.asList("C"), (VariantCall[]) null))); 77 | // Insertion 78 | assertFalse(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "C", 79 | Arrays.asList("CA"), (VariantCall[]) null))); 80 | 81 | // SNP and Insertion 82 | assertFalse(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "C", 83 | Arrays.asList("A", "CA"), (VariantCall[]) null))); 84 | 85 | // Block Records 86 | assertFalse(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "A", 87 | emptyAlt, (VariantCall[]) null))); 88 | assertFalse(VariantUtils.IS_SNP.apply(TestHelper.makeVariant("chr7", 200000, 200001, "A", null, 89 | (VariantCall[]) null))); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithSnpsITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.google.cloud.genomics.utils.IntegrationTestHelper; 19 | import com.google.cloud.genomics.utils.ShardBoundary; 20 | import com.google.cloud.genomics.utils.ShardUtils; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.genomics.v1.StreamVariantsRequest; 23 | import com.google.genomics.v1.StreamVariantsResponse; 24 | import com.google.genomics.v1.Variant; 25 | 26 | import org.junit.Test; 27 | import org.junit.runner.RunWith; 28 | import org.junit.runners.JUnit4; 29 | 30 | import java.util.Arrays; 31 | import java.util.Iterator; 32 | 33 | /** 34 | * To run the test: mvn -Dit.test=MergeNonVariantSegmentsWithSnpsITCase verify 35 | * 36 | * Use the following API explorer link or BigQuery query to see the data used in this test. 37 | * https://developers.google.com/apis-explorer/#p/genomics/v1/genomics.variants.stream?fields=variants(alternateBases%252Ccalls(callSetName%252Cgenotype)%252Cend%252CreferenceBases%252CreferenceName%252Cstart)&_h=2&resource=%257B%250A++%2522variantSetId%2522%253A+%25223049512673186936334%2522%252C%250A++%2522referenceName%2522%253A+%2522chr17%2522%252C%250A++%2522start%2522%253A+%252241204796%2522%252C%250A++%2522end%2522%253A+%252241204797%2522%250A%257D& 38 | * 39 | SELECT 40 | reference_name, 41 | start, 42 | end, 43 | reference_bases, 44 | GROUP_CONCAT(alternate_bases) WITHIN RECORD AS alternate_bases, 45 | call.call_set_name, 46 | GROUP_CONCAT(STRING(call.genotype)) WITHIN call AS genotype, 47 | FROM 48 | [genomics-public-data:platinum_genomes.variants] 49 | WHERE 50 | reference_name CONTAINS 'chr17' 51 | AND start <= 41204796 and end >= 41204797 52 | ORDER BY 53 | call.call_set_name, 54 | start, 55 | alternate_bases 56 | * 57 | */ 58 | @RunWith(JUnit4.class) 59 | public class MergeNonVariantSegmentsWithSnpsITCase { 60 | 61 | public static final StreamVariantsRequest PROTOTYPE = StreamVariantsRequest.newBuilder() 62 | .setVariantSetId(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET) 63 | .setProjectId(IntegrationTestHelper.getTEST_PROJECT()) 64 | .build(); 65 | 66 | @Test 67 | public void testMerge() throws Exception { 68 | ImmutableList requests = 69 | ShardUtils.getVariantRequests(PROTOTYPE, 70 | 100, "chr17:41198773:41198774"); 71 | assertEquals(1, requests.size()); 72 | 73 | Iterator iter = 74 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 75 | requests.get(0), 76 | ShardBoundary.Requirement.OVERLAPS, 77 | "variants(alternateBases,calls(callSetName,genotype),end,referenceBases,referenceName,start)"); 78 | 79 | // Platinum genomes has both a snp and an insertion at this genomic site. 80 | Variant expectedOutput1 = TestHelper.makeVariant("chr17", 41198773, 41198774, "C", Arrays.asList("A")) 81 | .addCalls(TestHelper.makeCall("NA12878", 0, 1)) 82 | .addCalls(TestHelper.makeCall("NA12877", 0, 0)) 83 | .addCalls(TestHelper.makeCall("NA12889", 0, 0)) 84 | .addCalls(TestHelper.makeCall("NA12891", 0, 0)) 85 | .addCalls(TestHelper.makeCall("NA12892", 0, 0)) 86 | .addCalls(TestHelper.makeCall("NA12890", 0, 0)) 87 | .build(); 88 | 89 | Variant expectedOutput2 = TestHelper.makeVariant("chr17", 41198773, 41198774, "C", Arrays.asList("CA")) 90 | .addCalls(TestHelper.makeCall("NA12892", 0, 1)) 91 | .addCalls(TestHelper.makeCall("NA12878", 0, 1)) 92 | .build(); 93 | 94 | VariantMergeStrategyTestHelper.mergeTest(requests.get(0).getStart(), iter.next().getVariantsList(), 95 | Arrays.asList(expectedOutput1, expectedOutput2), 96 | MergeNonVariantSegmentsWithSnps.class); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/VariantUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import com.google.api.services.genomics.model.Variant; 17 | import com.google.api.services.genomics.model.VariantCall; 18 | import com.google.common.base.Function; 19 | import com.google.common.base.Predicate; 20 | import com.google.common.base.Predicates; 21 | import com.google.common.collect.Iterables; 22 | import com.google.common.collect.Ordering; 23 | 24 | import java.util.Comparator; 25 | import java.util.List; 26 | 27 | public class VariantUtils { 28 | 29 | public static final String GATK_NON_VARIANT_SEGMENT_ALT = ""; 30 | 31 | /** 32 | * Determine whether the variant has any values in alternate bases. 33 | */ 34 | public static final Predicate HAS_ALTERNATE = new Predicate() { 35 | @Override 36 | public boolean apply(Variant variant) { 37 | List alternateBases = variant.getAlternateBases(); 38 | return !(null == alternateBases || alternateBases.isEmpty()); 39 | } 40 | }; 41 | 42 | public static final Predicate LENGTH_IS_1 = Predicates.compose(Predicates.equalTo(1), 43 | new Function() { 44 | @Override 45 | public Integer apply(String string) { 46 | return string.length(); 47 | } 48 | }); 49 | 50 | /** 51 | * Determine whether the variant is a SNP. 52 | */ 53 | public static final Predicate IS_SNP = Predicates.and(HAS_ALTERNATE, 54 | new Predicate() { 55 | @Override 56 | public boolean apply(Variant variant) { 57 | return LENGTH_IS_1.apply(variant.getReferenceBases()) 58 | && Iterables.all(variant.getAlternateBases(), LENGTH_IS_1); 59 | } 60 | }); 61 | 62 | /** 63 | * Determine whether the variant is a non-variant segment (a.k.a. non-variant block record). 64 | * 65 | * For Complete Genomics data and gVCFs such as Platinum Genomes, we wind up with zero alternates 66 | * (the missing value indicator "." in the VCF ALT field gets converted to null). See 67 | * https://sites.google.com/site/gvcftools/home/about-gvcf for more detail. 68 | */ 69 | public static final Predicate IS_NON_VARIANT_SEGMENT_WITH_MISSING_ALT = Predicates.and( 70 | Predicates.not(HAS_ALTERNATE), new Predicate() { 71 | @Override 72 | public boolean apply(Variant variant) { 73 | // The same deletion can be specified as [CAG -> C] or [AG -> null], so double check that 74 | // the reference bases are also of length 1 when there are no alternates. 75 | return LENGTH_IS_1.apply(variant.getReferenceBases()); 76 | } 77 | }); 78 | 79 | /** 80 | * Determine whether the variant is a non-variant segment (a.k.a. non-variant block record). 81 | * 82 | * For data processed by GATK the value of ALT is "<NON_REF>". See 83 | * https://www.broadinstitute.org/gatk/guide/article?id=4017 for more detail. 84 | */ 85 | public static final Predicate IS_NON_VARIANT_SEGMENT_WITH_GATK_ALT = Predicates.and( 86 | HAS_ALTERNATE, new Predicate() { 87 | @Override 88 | public boolean apply(Variant variant) { 89 | return Iterables.all(variant.getAlternateBases(), 90 | Predicates.equalTo(GATK_NON_VARIANT_SEGMENT_ALT)); 91 | } 92 | }); 93 | 94 | /** 95 | * Determine whether the variant is a non-variant segment (a.k.a. non-variant block record). 96 | */ 97 | public static final Predicate IS_NON_VARIANT_SEGMENT = Predicates.or( 98 | IS_NON_VARIANT_SEGMENT_WITH_MISSING_ALT, IS_NON_VARIANT_SEGMENT_WITH_GATK_ALT); 99 | 100 | 101 | /** 102 | * Comparator for sorting calls by call set name. 103 | */ 104 | public static final Comparator CALL_COMPARATOR = Ordering.natural().onResultOf( 105 | new Function() { 106 | @Override 107 | public String apply(VariantCall call) { 108 | return call.getCallSetName(); 109 | } 110 | }); 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/MergeAllVariantsAtSameSiteITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.google.cloud.genomics.utils.IntegrationTestHelper; 19 | import com.google.cloud.genomics.utils.ShardBoundary; 20 | import com.google.cloud.genomics.utils.ShardUtils; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.genomics.v1.StreamVariantsRequest; 23 | import com.google.genomics.v1.StreamVariantsResponse; 24 | import com.google.genomics.v1.Variant; 25 | import com.google.protobuf.ListValue; 26 | 27 | import org.junit.Test; 28 | import org.junit.runner.RunWith; 29 | import org.junit.runners.JUnit4; 30 | 31 | import java.util.Arrays; 32 | import java.util.HashMap; 33 | import java.util.Iterator; 34 | import java.util.Map; 35 | 36 | /** 37 | * To run the test: mvn -Dit.test=MergeAllVariantsAtSameSiteITCase verify 38 | * 39 | * Use the following API explorer link or BigQuery query to see the data used in this test. 40 | * https://developers.google.com/apis-explorer/#p/genomics/v1/genomics.variants.stream?fields=variants(alternateBases%252Ccalls(callSetName%252Cgenotype)%252Cend%252CreferenceBases%252CreferenceName%252Cstart)&_h=2&resource=%257B%250A++%2522variantSetId%2522%253A+%25223049512673186936334%2522%252C%250A++%2522referenceName%2522%253A+%2522chr17%2522%252C%250A++%2522start%2522%253A+%252241204796%2522%252C%250A++%2522end%2522%253A+%252241204797%2522%250A%257D& 41 | * 42 | SELECT 43 | reference_name, 44 | start, 45 | end, 46 | reference_bases, 47 | GROUP_CONCAT(alternate_bases) WITHIN RECORD AS alternate_bases, 48 | call.call_set_name, 49 | GROUP_CONCAT(STRING(call.genotype)) WITHIN call AS genotype, 50 | FROM 51 | [genomics-public-data:platinum_genomes.variants] 52 | WHERE 53 | reference_name CONTAINS 'chr17' 54 | AND start <= 41204796 and end >= 41204797 55 | ORDER BY 56 | call.call_set_name, 57 | start, 58 | alternate_bases 59 | * 60 | */ 61 | @RunWith(JUnit4.class) 62 | public class MergeAllVariantsAtSameSiteITCase { 63 | 64 | public static final StreamVariantsRequest PROTOTYPE = StreamVariantsRequest.newBuilder() 65 | .setVariantSetId(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET) 66 | .setProjectId(IntegrationTestHelper.getTEST_PROJECT()) 67 | .build(); 68 | 69 | @Test 70 | public void testMerge() throws Exception { 71 | ImmutableList requests = 72 | ShardUtils.getVariantRequests(PROTOTYPE, 73 | 100L, "chr17:41198773:41198774"); 74 | assertEquals(1, requests.size()); 75 | 76 | Iterator iter = 77 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 78 | requests.get(0), 79 | ShardBoundary.Requirement.OVERLAPS, 80 | "variants(alternateBases,calls(callSetName,genotype),end,referenceBases,referenceName,start)"); 81 | 82 | Map emptyInfo = new HashMap(); 83 | emptyInfo.put(MergeAllVariantsAtSameSite.OVERLAPPING_CALLSETS_FIELD, ListValue.newBuilder().build()); 84 | 85 | // Platinum genomes has both a snp and an insertion at this genomic site 86 | // but merging the calls together yields an ambiguous result. 87 | Variant expectedOutput1 = TestHelper.makeVariant("chr17", 41198773, 41198774, "C", Arrays.asList("A", "CA")) 88 | .addCalls(TestHelper.makeCall("NA12878", 0, 1)) // ambiguous 89 | .addCalls(TestHelper.makeCall("NA12892", 0, 2)) // ambiguous 90 | .addCalls(TestHelper.makeCall("NA12878", 0, 2)) // ambiguous 91 | .addCalls(TestHelper.makeCall("NA12877", 0, 0)) 92 | .addCalls(TestHelper.makeCall("NA12889", 0, 0)) 93 | .addCalls(TestHelper.makeCall("NA12891", 0, 0)) 94 | .addCalls(TestHelper.makeCall("NA12892", 0, 0)) // ambiguous 95 | .addCalls(TestHelper.makeCall("NA12890", 0, 0)) 96 | .putAllInfo(emptyInfo) 97 | .build(); 98 | 99 | VariantMergeStrategyTestHelper.mergeTest(requests.get(0).getStart(), iter.next().getVariantsList(), 100 | Arrays.asList(expectedOutput1), 101 | MergeAllVariantsAtSameSite.class); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/MergeNonVariantSegmentsWithVariantsITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2016 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.google.cloud.genomics.utils.IntegrationTestHelper; 19 | import com.google.cloud.genomics.utils.ShardBoundary; 20 | import com.google.cloud.genomics.utils.ShardUtils; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.genomics.v1.StreamVariantsRequest; 23 | import com.google.genomics.v1.StreamVariantsResponse; 24 | import com.google.genomics.v1.Variant; 25 | 26 | import org.junit.Test; 27 | import org.junit.runner.RunWith; 28 | import org.junit.runners.JUnit4; 29 | 30 | import java.util.Arrays; 31 | import java.util.Iterator; 32 | 33 | /** 34 | * To run the test: mvn -Dit.test=MergeNonVariantSegmentsWithVariantsITCase verify 35 | * 36 | * Use the following API explorer link or BigQuery query to see the data used in this test. 37 | * https://developers.google.com/apis-explorer/#p/genomics/v1/genomics.variants.stream?fields=variants(alternateBases%252Ccalls(callSetName%252Cgenotype)%252Cend%252CreferenceBases%252CreferenceName%252Cstart)&_h=2&resource=%257B%250A++%2522variantSetId%2522%253A+%25223049512673186936334%2522%252C%250A++%2522referenceName%2522%253A+%2522chr17%2522%252C%250A++%2522start%2522%253A+%252241204796%2522%252C%250A++%2522end%2522%253A+%252241204797%2522%250A%257D& 38 | * 39 | SELECT 40 | reference_name, 41 | start, 42 | end, 43 | reference_bases, 44 | GROUP_CONCAT(alternate_bases) WITHIN RECORD AS alternate_bases, 45 | call.call_set_name, 46 | GROUP_CONCAT(STRING(call.genotype)) WITHIN call AS genotype, 47 | FROM 48 | [genomics-public-data:platinum_genomes.variants] 49 | WHERE 50 | reference_name CONTAINS 'chr17' 51 | AND start <= 41204796 and end >= 41204797 52 | ORDER BY 53 | call.call_set_name, 54 | start, 55 | alternate_bases 56 | * 57 | */ 58 | @RunWith(JUnit4.class) 59 | public class MergeNonVariantSegmentsWithVariantsITCase { 60 | 61 | public static final StreamVariantsRequest PROTOTYPE = StreamVariantsRequest.newBuilder() 62 | .setVariantSetId(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET) 63 | .setProjectId(IntegrationTestHelper.getTEST_PROJECT()) 64 | .build(); 65 | 66 | @Test 67 | public void testMerge() throws Exception { 68 | ImmutableList requests = 69 | ShardUtils.getVariantRequests(PROTOTYPE, 70 | 100, "chr17:41198773:41198774"); 71 | assertEquals(1, requests.size()); 72 | 73 | Iterator iter = 74 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 75 | requests.get(0), 76 | ShardBoundary.Requirement.OVERLAPS, 77 | "variants(alternateBases,calls(callSetName,genotype),end,referenceBases,referenceName,start)"); 78 | 79 | // Platinum genomes has both a snp and an insertion at this genomic site. 80 | Variant expectedOutput1 = TestHelper.makeVariant("chr17", 41198773, 41198774, "C", Arrays.asList("A")) 81 | .addCalls(TestHelper.makeCall("NA12878", 0, 1)) 82 | .addCalls(TestHelper.makeCall("NA12877", 0, 0)) 83 | .addCalls(TestHelper.makeCall("NA12889", 0, 0)) 84 | .addCalls(TestHelper.makeCall("NA12891", 0, 0)) 85 | .addCalls(TestHelper.makeCall("NA12892", 0, 0)) 86 | .addCalls(TestHelper.makeCall("NA12890", 0, 0)) 87 | .build(); 88 | 89 | Variant expectedOutput2 = TestHelper.makeVariant("chr17", 41198773, 41198774, "C", Arrays.asList("CA")) 90 | .addCalls(TestHelper.makeCall("NA12892", 0, 1)) // ambiguous 91 | .addCalls(TestHelper.makeCall("NA12878", 0, 1)) // ambiguous 92 | .addCalls(TestHelper.makeCall("NA12877", 0, 0)) 93 | .addCalls(TestHelper.makeCall("NA12889", 0, 0)) 94 | .addCalls(TestHelper.makeCall("NA12891", 0, 0)) 95 | .addCalls(TestHelper.makeCall("NA12892", 0, 0)) // ambiguous 96 | .addCalls(TestHelper.makeCall("NA12890", 0, 0)) 97 | .build(); 98 | 99 | VariantMergeStrategyTestHelper.mergeTest(requests.get(0).getStart(), iter.next().getVariantsList(), 100 | Arrays.asList(expectedOutput1, expectedOutput2), 101 | MergeNonVariantSegmentsWithVariants.class); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/test/resources/com/google/cloud/genomics/utils/conversion_test.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.0 SO:coordinate 2 | @SQ SN:chr1 LN:101 3 | @SQ SN:chr2 LN:101 4 | @SQ SN:chr3 LN:101 5 | @SQ SN:chr4 LN:101 6 | @SQ SN:chr5 LN:101 7 | @SQ SN:chr6 LN:101 8 | @SQ SN:chr7 LN:202 9 | @SQ SN:chr8 LN:202 10 | @RG ID:0 SM:Hi,Momma! LB:whatever PU:me PL:ILLUMINA 11 | SL-XAV:1:1:0:764#0/1 89 chr1 1 255 101M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCAN &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0& RG:Z:0 NN:Z:Hello 12 | SL-XAV:1:1:0:1668#0/2 153 chr2 1 255 101M * 0 0 CATCTCTACANGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATACTTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTNGACACCTTTN (/,'-/'0////(1'&&1&&&&'2''-6/,/3-33653.6:1'.86/-++32.-4864653/5/583/346423203+28888644446688456/4880& RG:Z:0 NN:Z:Goodbye 13 | SL-XAV:1:1:0:1914#0/2 153 chr3 1 255 101M * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0 14 | SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0 15 | SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0 16 | SL-XAV:1:1:0:700#0/2 137 chr6 1 255 101M * 0 0 NAATTGTTCTNAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACAATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACNAGTGTCGATC &0::887::::6/646::838388811/679:87640&./2+/-4/28:3,536/4''&&.78/(/554/./02*)*',-(57()&.6(6:(0601'/(,* RG:Z:0 17 | SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGGNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCGAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 18 | SL-XAV:1:1:0:105#0/2 403 chr7 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0 19 | SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 20 | SL-XAV:1:1:0:105#0/2 147 chr8 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0 21 | SL-XAV:1:1:0:105#0/1 99 chr8 102 255 101M = 1 -79 NCAGGTTCAANTGTGCAGCCCNTTTTGAGAGATNNNNNNNNTGNNCTGNAANANNGACACAGCTATTCCTAAGATGACAAGATCAGANAANAAGTCAAGCA &06665578::41.*/7577/&/77403-324.&&&&&&&&/.&&..&&.0&&&&',:9:/-/(55002020+3'12+2/&.2-&//&),&*&&&&&&&51 RG:Z:0 22 | SL-XAV:1:1:0:1300#0/1 77 * 0 0 * * 0 0 NAAACACAAGNNANAGTCTTANCNGCTATTCCNNNNNNNNNCTNNNCTNAGNANNACATACAACAGTATCCACACAAGTGTACTCGTNCANACATGTGAAC &*5535)*-,,&.&.*-1)*,&'&)&1&&.,)&&&&&&&&&)0&&&0'&&&&.&&*2'/4''0/**&)&,'-&*,&,&&&.0.&)&&&**&,.&&&')&&) RG:Z:0 23 | SL-XAV:1:1:0:1300#0/2 141 * 0 0 * * 0 0 NGATCATGGANGACTCTCCCCATCCCCCGCTCCAGCGCTCAGTTATATGCCTAGCCTCGGACACGTCACCAACATCTCACGCACTCTGCANAGTCTCTCAC &&'+''3*&-/)/1'26/*-2-/542-*&-&/'/*/&-'&)-')&.'-/&&2+122*'&+,(/-&)((,/-,,.'2(2'+)/&/&-66-&&/16&)&*&'3 RG:Z:0 24 | SL-XAV:1:1:0:1639#0/1 101 * 0 0 * chr1 1 0 NCCCTCTCAGNNTNTCTGCCANANCCTTAAGCNNNNNNNNNTANNNCTNAANCNNAAACTTTTGCCTCAGGCATCCGCAGAATGTTTNTCNGCCTATATCG &1::::::64/&/&0:3.280&/&087881,/&&&&&&&&&..&&&..&,,&-&&,265341-)/5680&-.5552-25/322/42/&)&&).421&-&-/ RG:Z:0 25 | SL-XAV:1:1:0:1668#0/1 101 * 0 0 * chr2 1 0 NATAGCATACNNTNCATTGGANTNCAGCACAANNNNNNNNNTGNNNCANTNNANNCCTTTGAGATCGGAAGAGCGGTTCAGCAGGAANNCNCAGACCGATC &1988998890&0&.8863//&.&.0-2875.&&&&&&&&&.)&&&..&.&&.&&.5782-2+262)&-0-0510*.332-2.-,0*&&*&'.&-2-)0., RG:Z:0 26 | SL-XAV:1:1:0:1914#0/1 101 * 0 0 * chr3 1 0 NTTTTTCTCCNNCNGTGCCTANTNTAGCCCCTNNNNNNNNNAANNNATNANNANNTTTACTTAAAAAACTGAAACTAGTAATGTGCANNANATCGNAAGAG &0::::<<;90&/&.244760&,&.414798/&&&&&&&&&00&&&0.&/&&-&&.4475687363504.&.557/.*)65.&/*./&&.&.+*)&..).& RG:Z:0 27 | SL-XAV:1:1:0:68#0/1 581 * 0 0 * chr4 1 0 NAATATTCATNNGNTCAGCCTNTNCATTAATTNNNNNNNNNTTNNNATNATNANNTTTTTTATAACCATTTATAAATGAGAGAGATCNTANCACAATATCA &0<<::::: requests = 57 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE), 58 | 100L, REFERENCES); 59 | assertEquals(1, requests.size()); 60 | 61 | Iterator iter = 62 | ReadStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 63 | requests.get(0), 64 | ShardBoundary.Requirement.OVERLAPS, null); 65 | 66 | assertTrue(iter.hasNext()); 67 | StreamReadsResponse readResponse = iter.next(); 68 | assertEquals(63, readResponse.getAlignmentsList().size()); 69 | assertFalse(iter.hasNext()); 70 | 71 | iter = ReadStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 72 | requests.get(0), 73 | ShardBoundary.Requirement.STRICT, null); 74 | 75 | assertTrue(iter.hasNext()); 76 | readResponse = iter.next(); 77 | assertEquals(2, readResponse.getAlignmentsList().size()); 78 | assertFalse(iter.hasNext()); 79 | } 80 | 81 | @Test 82 | public void testPartialResponses() throws IOException, GeneralSecurityException { 83 | ImmutableList requests = 84 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE), 85 | 100L, REFERENCES); 86 | assertEquals(1, requests.size()); 87 | 88 | Iterator iter = 89 | ReadStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 90 | requests.get(0), 91 | ShardBoundary.Requirement.STRICT, "alignments(alignment)"); 92 | 93 | assertTrue(iter.hasNext()); 94 | StreamReadsResponse readResponse = iter.next(); 95 | List reads = readResponse.getAlignmentsList(); 96 | assertEquals(2, reads.size()); 97 | assertFalse(iter.hasNext()); 98 | 99 | assertEquals("chr13", reads.get(0).getAlignment().getPosition().getReferenceName()); 100 | assertEquals(33628135, reads.get(0).getAlignment().getPosition().getPosition()); 101 | assertTrue(Strings.isNullOrEmpty(reads.get(0).getAlignedSequence())); 102 | } 103 | 104 | @Test 105 | public void testPartialResponsesInsufficientFields() throws IOException, GeneralSecurityException { 106 | thrown.expect(IllegalArgumentException.class); 107 | thrown.expectMessage(containsString("Insufficient fields requested in partial response. " 108 | + "At a minimum include 'alignments(alignment)' to enforce a strict shard boundary.")); 109 | 110 | ImmutableList requests = 111 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE), 112 | 100L, REFERENCES); 113 | assertEquals(1, requests.size()); 114 | 115 | Iterator iter = 116 | ReadStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 117 | requests.get(0), 118 | ShardBoundary.Requirement.STRICT, "alignments(alignedSequence)"); 119 | } 120 | } 121 | 122 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/ShardBoundaryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import static org.hamcrest.Matchers.containsString; 17 | import static org.junit.Assert.assertEquals; 18 | import static org.junit.Assert.assertThat; 19 | 20 | import com.google.common.base.Predicate; 21 | import com.google.common.collect.Iterables; 22 | import com.google.common.collect.Lists; 23 | import com.google.genomics.v1.LinearAlignment; 24 | import com.google.genomics.v1.Position; 25 | import com.google.genomics.v1.Read; 26 | import com.google.genomics.v1.Variant; 27 | 28 | import org.hamcrest.CoreMatchers; 29 | import org.junit.Rule; 30 | import org.junit.Test; 31 | import org.junit.rules.ExpectedException; 32 | import org.junit.runner.RunWith; 33 | import org.junit.runners.JUnit4; 34 | 35 | import java.util.Arrays; 36 | import java.util.List; 37 | 38 | @RunWith(JUnit4.class) 39 | public class ShardBoundaryTest { 40 | @Rule 41 | public ExpectedException thrown = ExpectedException.none(); 42 | 43 | @Test 44 | public void testGetStrictVariantPredicate() { 45 | long start = 1000L; 46 | long end = 2000L; // This is not used, but its the "extent" mentioned below. 47 | 48 | Variant overlapStartWithinExtent = Variant.newBuilder().setStart(900L).setEnd(1005L).build(); 49 | Variant overlapStartExtent = Variant.newBuilder().setStart(999L).setEnd(5000L).build(); 50 | Variant atStartWithinExtent = Variant.newBuilder().setStart(1000L).setEnd(1002L).build(); 51 | Variant atStartOverlapExtent = Variant.newBuilder().setStart(1000L).setEnd(5000L).build(); 52 | Variant beyondStartWithinExtent = Variant.newBuilder().setStart(1500L).setEnd(1502L).build(); 53 | Variant beyondOverlapExtent = Variant.newBuilder().setStart(1500L).setEnd(5000L).build(); 54 | Variant[] variants = new Variant[] { overlapStartWithinExtent, overlapStartExtent, atStartWithinExtent, 55 | atStartOverlapExtent, beyondStartWithinExtent, beyondOverlapExtent }; 56 | 57 | Predicate shardPredicate = ShardBoundary.getStrictVariantPredicate(start, null); 58 | List filteredVariants = Lists.newArrayList(Iterables.filter(Arrays.asList(variants), shardPredicate)); 59 | assertEquals(4, filteredVariants.size()); 60 | assertThat(filteredVariants, CoreMatchers.allOf(CoreMatchers.hasItems(atStartWithinExtent, 61 | atStartOverlapExtent, beyondStartWithinExtent, beyondOverlapExtent))); 62 | } 63 | 64 | @Test 65 | public void testGetStrictVariantPredicateInsufficientFields() { 66 | thrown.expect(IllegalArgumentException.class); 67 | thrown.expectMessage(containsString("Insufficient fields requested in partial response. " 68 | + "At a minimum include 'variants(start)' to enforce a strict shard boundary.")); 69 | ShardBoundary.getStrictVariantPredicate(123, "variants(alternate_bases)"); 70 | } 71 | 72 | static Read readHelper(int start, int end) { 73 | Position position = Position.newBuilder().setPosition(start).build(); 74 | LinearAlignment alignment = LinearAlignment.newBuilder().setPosition(position).build(); 75 | return Read.newBuilder().setAlignment(alignment).setFragmentLength(end-start).build(); 76 | } 77 | 78 | @Test 79 | public void testGetStrictReadPredicate() { 80 | long start = 1000L; 81 | long end = 2000L; // This is not used, but its the "extent" mentioned below. 82 | 83 | Read overlapStartWithinExtent = readHelper(900,1005); 84 | Read overlapStartExtent = readHelper(999, 5000); 85 | Read atStartWithinExtent = readHelper(1000, 1002); 86 | Read atStartOverlapExtent = readHelper(1000, 5000); 87 | Read beyondStartWithinExtent = readHelper(1500, 1502); 88 | Read beyondOverlapExtent = readHelper(1500, 5000); 89 | Read[] reads = new Read[] { overlapStartWithinExtent, overlapStartExtent, atStartWithinExtent, 90 | atStartOverlapExtent, beyondStartWithinExtent, beyondOverlapExtent }; 91 | 92 | Predicate shardPredicate = ShardBoundary.getStrictReadPredicate(start, null); 93 | List filteredReads = Lists.newArrayList(Iterables.filter(Arrays.asList(reads), shardPredicate)); 94 | assertEquals(4, filteredReads.size()); 95 | assertThat(filteredReads, CoreMatchers.allOf(CoreMatchers.hasItems(atStartWithinExtent, 96 | atStartOverlapExtent, beyondStartWithinExtent, beyondOverlapExtent))); 97 | } 98 | 99 | @Test 100 | public void testGetStrictReadPredicateInsufficientFields() { 101 | thrown.expect(IllegalArgumentException.class); 102 | thrown.expectMessage(containsString("Insufficient fields requested in partial response. " 103 | + "At a minimum include 'alignments(alignment)' to enforce a strict shard boundary.")); 104 | ShardBoundary.getStrictReadPredicate(123, "alignments(alignedSequence)"); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/GenomicsFactoryITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.fail; 20 | 21 | import com.google.api.client.http.HttpRequest; 22 | import com.google.api.client.http.HttpResponseException; 23 | import com.google.api.client.http.HttpTransport; 24 | import com.google.api.client.http.LowLevelHttpRequest; 25 | import com.google.api.client.http.LowLevelHttpResponse; 26 | import com.google.api.client.testing.http.HttpTesting; 27 | import com.google.api.client.testing.http.MockHttpTransport; 28 | import com.google.api.client.testing.http.MockLowLevelHttpRequest; 29 | import com.google.api.client.testing.http.MockLowLevelHttpResponse; 30 | import com.google.api.services.genomics.Genomics; 31 | 32 | import org.junit.Test; 33 | import org.junit.runner.RunWith; 34 | import org.junit.runners.JUnit4; 35 | 36 | import java.io.IOException; 37 | 38 | @RunWith(JUnit4.class) 39 | public class GenomicsFactoryITCase { 40 | 41 | @Test 42 | public void testBackendErrorRetries() throws Exception { 43 | 44 | HttpTransport transport = new MockHttpTransport() { 45 | @Override 46 | public LowLevelHttpRequest buildRequest(String method, String url) throws IOException { 47 | return new MockLowLevelHttpRequest() { 48 | @Override 49 | public LowLevelHttpResponse execute() throws IOException { 50 | MockLowLevelHttpResponse response = new MockLowLevelHttpResponse(); 51 | response.setStatusCode(500); 52 | return response; 53 | } 54 | }; 55 | } 56 | }; 57 | 58 | GenomicsFactory genomicsFactory = 59 | GenomicsFactory.builder("test_client").setHttpTransport(transport).build(); 60 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 61 | 62 | HttpRequest request = 63 | genomics.getRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); 64 | try { 65 | request.execute(); 66 | fail("this request should not have succeeded"); 67 | } catch (HttpResponseException e) { 68 | } 69 | 70 | assertEquals(1, genomicsFactory.initializedRequestsCount()); 71 | assertEquals(6, genomicsFactory.unsuccessfulResponsesCount()); 72 | assertEquals(0, genomicsFactory.ioExceptionsCount()); 73 | } 74 | 75 | @Test 76 | public void testIOExceptionRetries() throws Exception { 77 | 78 | HttpTransport transport = new MockHttpTransport() { 79 | @Override 80 | public LowLevelHttpRequest buildRequest(String method, String url) throws IOException { 81 | return new MockLowLevelHttpRequest() { 82 | @Override 83 | public LowLevelHttpResponse execute() throws IOException { 84 | throw new IOException(); 85 | } 86 | }; 87 | } 88 | }; 89 | 90 | GenomicsFactory genomicsFactory = 91 | GenomicsFactory.builder("test_client").setHttpTransport(transport).build(); 92 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 93 | 94 | HttpRequest request = 95 | genomics.getRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); 96 | try { 97 | request.execute(); 98 | fail("this request should not have succeeded"); 99 | } catch (IOException e) { 100 | } 101 | 102 | assertEquals(1, genomicsFactory.initializedRequestsCount()); 103 | assertEquals(0, genomicsFactory.unsuccessfulResponsesCount()); 104 | assertEquals(6, genomicsFactory.ioExceptionsCount()); 105 | } 106 | 107 | @Test 108 | public void testUserErrorRetries() throws Exception { 109 | 110 | HttpTransport transport = new MockHttpTransport() { 111 | @Override 112 | public LowLevelHttpRequest buildRequest(String method, String url) throws IOException { 113 | return new MockLowLevelHttpRequest() { 114 | @Override 115 | public LowLevelHttpResponse execute() throws IOException { 116 | MockLowLevelHttpResponse response = new MockLowLevelHttpResponse(); 117 | response.setStatusCode(404); 118 | return response; 119 | } 120 | }; 121 | } 122 | }; 123 | 124 | GenomicsFactory genomicsFactory = 125 | GenomicsFactory.builder("test_client").setHttpTransport(transport).build(); 126 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 127 | 128 | HttpRequest request = 129 | genomics.getRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); 130 | try { 131 | request.execute(); 132 | fail("this request should not have succeeded"); 133 | } catch (HttpResponseException e) { 134 | } 135 | 136 | assertEquals(1, genomicsFactory.initializedRequestsCount()); 137 | assertEquals(1, genomicsFactory.unsuccessfulResponsesCount()); 138 | assertEquals(0, genomicsFactory.ioExceptionsCount()); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/GenomicsFactoryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import com.google.api.client.googleapis.json.GoogleJsonResponseException; 21 | import com.google.api.client.http.HttpRequest; 22 | import com.google.api.client.testing.http.HttpTesting; 23 | import com.google.api.services.genomics.Genomics; 24 | import com.google.api.services.genomics.GenomicsScopes; 25 | import com.google.api.services.storage.Storage; 26 | import com.google.api.services.storage.StorageScopes; 27 | import com.google.common.collect.Lists; 28 | 29 | import org.junit.Test; 30 | import org.junit.runner.RunWith; 31 | import org.junit.runners.JUnit4; 32 | 33 | @RunWith(JUnit4.class) 34 | public class GenomicsFactoryTest { 35 | 36 | @Test 37 | public void testBasic() throws Exception { 38 | GenomicsFactory genomicsFactory = GenomicsFactory.builder("test_client").build(); 39 | 40 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 41 | assertEquals(0, genomicsFactory.initializedRequestsCount()); 42 | 43 | // TODO: Mock out more of this test if it becomes a problem 44 | try { 45 | genomics.operations().get("operations/123").execute(); 46 | } catch (GoogleJsonResponseException e) { 47 | // Expected 48 | } 49 | assertEquals(1, genomicsFactory.initializedRequestsCount()); 50 | assertEquals(1, genomicsFactory.unsuccessfulResponsesCount()); 51 | assertEquals(0, genomicsFactory.ioExceptionsCount()); 52 | 53 | try { 54 | genomics.readgroupsets().get("123").execute(); 55 | } catch (GoogleJsonResponseException e) { 56 | // Expected 57 | } 58 | assertEquals(2, genomicsFactory.initializedRequestsCount()); 59 | assertEquals(2, genomicsFactory.unsuccessfulResponsesCount()); 60 | assertEquals(0, genomicsFactory.ioExceptionsCount()); 61 | } 62 | 63 | @Test 64 | public void testFromOfflineAuth() throws Exception { 65 | GenomicsFactory genomicsFactory = GenomicsFactory.builder("test_client").build(); 66 | OfflineAuth auth = new OfflineAuth("xyz"); 67 | Genomics genomics = genomicsFactory.fromOfflineAuth(auth); 68 | 69 | assertEquals(0, genomicsFactory.initializedRequestsCount()); 70 | 71 | try { 72 | genomics.operations().get("operations/123").execute(); 73 | } catch (GoogleJsonResponseException e) { 74 | // Expected 75 | } 76 | assertEquals(1, genomicsFactory.initializedRequestsCount()); 77 | 78 | try { 79 | genomics.operations().get("operations/123").execute(); 80 | } catch (GoogleJsonResponseException e) { 81 | // Expected 82 | } 83 | assertEquals(2, genomicsFactory.initializedRequestsCount()); 84 | } 85 | 86 | @Test 87 | public void testCustomBuilder() throws Exception { 88 | GenomicsFactory factory = GenomicsFactory.builder("test_client") 89 | .setScopes(Lists.newArrayList(StorageScopes.DEVSTORAGE_READ_ONLY, GenomicsScopes.GENOMICS)) 90 | .build(); 91 | 92 | Storage storage = factory.fromApiKey(new Storage.Builder( 93 | factory.getHttpTransport(), factory.getJsonFactory(), null), "xyz").build(); 94 | assertEquals(0, factory.initializedRequestsCount()); 95 | 96 | try { 97 | storage.buckets().get("123").execute(); 98 | } catch (GoogleJsonResponseException e) { 99 | // Expected 100 | } 101 | assertEquals(1, factory.initializedRequestsCount()); 102 | 103 | Genomics genomics = factory.fromApiKey("abc"); 104 | try { 105 | genomics.operations().get("operations/123").execute(); 106 | } catch (GoogleJsonResponseException e) { 107 | // Expected 108 | } 109 | assertEquals(2, factory.initializedRequestsCount()); 110 | } 111 | 112 | @Test 113 | public void testDefaultTimeoutConfiguration() throws Exception { 114 | GenomicsFactory genomicsFactory = GenomicsFactory.builder("test_client").build(); 115 | 116 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 117 | HttpRequest request = 118 | genomics.getRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); 119 | assertEquals(20000, request.getConnectTimeout()); 120 | assertEquals(20000, request.getReadTimeout()); 121 | assertEquals(5, request.getNumberOfRetries()); 122 | } 123 | 124 | @Test 125 | public void testTimeoutConfiguration() throws Exception { 126 | GenomicsFactory genomicsFactory = 127 | GenomicsFactory.builder("test_client").setConnectTimeout(42).setReadTimeout(7) 128 | .setNumberOfRetries(9).build(); 129 | 130 | Genomics genomics = genomicsFactory.fromApiKey("xyz"); 131 | HttpRequest request = 132 | genomics.getRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); 133 | assertEquals(42, request.getConnectTimeout()); 134 | assertEquals(7, request.getReadTimeout()); 135 | assertEquals(9, request.getNumberOfRetries()); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/ReadStreamIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.cloud.genomics.utils.OfflineAuth; 17 | import com.google.cloud.genomics.utils.ShardBoundary; 18 | import com.google.cloud.genomics.utils.ShardBoundary.Requirement; 19 | import com.google.common.base.Predicate; 20 | import com.google.genomics.v1.Read; 21 | import com.google.genomics.v1.StreamReadsRequest; 22 | import com.google.genomics.v1.StreamReadsResponse; 23 | import com.google.genomics.v1.StreamingReadServiceGrpc; 24 | import com.google.genomics.v1.StreamingReadServiceGrpc.StreamingReadServiceBlockingStub; 25 | 26 | import io.grpc.ManagedChannel; 27 | 28 | import java.io.IOException; 29 | import java.security.GeneralSecurityException; 30 | import java.util.Iterator; 31 | import java.util.List; 32 | 33 | /** 34 | * An iterator for streaming genomic reads via gRPC with shard boundary semantics. 35 | * 36 | * This function integrates complex retry logic, which upon encountering a failure 37 | * will resume the stream at the last known valid start position, skipping over any 38 | * data that was already returned to the client prior to the failure. 39 | */ 40 | public class ReadStreamIterator 41 | extends 42 | GenomicsStreamIterator { 43 | 44 | /** 45 | * Create a stream iterator that can enforce shard boundary semantics. 46 | * 47 | * @param auth The OfflineAuth to use for the request. 48 | * @param request The request for the shard of data. 49 | * @param shardBoundary The shard boundary semantics to enforce. 50 | * @param fields Which fields to include in a partial response or null for all. 51 | * @throws IOException 52 | * @throws GeneralSecurityException 53 | */ 54 | public static ReadStreamIterator enforceShardBoundary(OfflineAuth auth, 55 | StreamReadsRequest request, Requirement shardBoundary, String fields) throws IOException, 56 | GeneralSecurityException { 57 | return ReadStreamIterator.enforceShardBoundary(GenomicsChannel.fromOfflineAuth(auth, fields), request, 58 | shardBoundary, fields); 59 | } 60 | 61 | /** 62 | * Create a stream iterator that can enforce shard boundary semantics. 63 | * 64 | * @param channel The ManagedChannel. 65 | * @param request The request for the shard of data. 66 | * @param shardBoundary The shard boundary semantics to enforce. 67 | * @param fields Used to check whether the specified fields would meet the minimum required 68 | * fields for the shard boundary predicate, if applicable. 69 | */ 70 | public static ReadStreamIterator enforceShardBoundary(ManagedChannel channel, 71 | StreamReadsRequest request, Requirement shardBoundary, String fields) { 72 | Predicate shardPredicate = 73 | (ShardBoundary.Requirement.STRICT == shardBoundary) ? ShardBoundary 74 | .getStrictReadPredicate(request.getStart(), fields) : null; 75 | return new ReadStreamIterator(channel, request, shardPredicate); 76 | } 77 | 78 | /** 79 | * Create a stream iterator. 80 | * 81 | * @param channel The ManagedChannel. 82 | * @param request The request for the shard of data. 83 | * @param shardPredicate A predicate used to client-side filter results returned (e.g., enforce a 84 | * shard boundary and/or limit to SNPs only) or null for no filtering. 85 | */ 86 | public ReadStreamIterator(ManagedChannel channel, StreamReadsRequest request, 87 | Predicate shardPredicate) { 88 | super(channel, request, shardPredicate); 89 | } 90 | 91 | @Override 92 | StreamingReadServiceBlockingStub createStub(ManagedChannel channel) { 93 | return StreamingReadServiceGrpc.newBlockingStub(channel); 94 | } 95 | 96 | @Override 97 | Iterator createIteratorFromStub(StreamReadsRequest request) { 98 | return stub.streamReads(request); 99 | } 100 | 101 | @Override 102 | long getRequestStart(StreamReadsRequest request) { 103 | return request.getStart(); 104 | } 105 | 106 | @Override 107 | long getDataItemStart(Read dataItem) { 108 | return dataItem.getAlignment().getPosition().getPosition(); 109 | } 110 | 111 | @Override 112 | String getDataItemId(Read dataItem) { 113 | return dataItem.getId(); 114 | } 115 | 116 | @Override 117 | StreamReadsRequest getRevisedRequest(long updatedStart) { 118 | return StreamReadsRequest.newBuilder(originalRequest).setStart(updatedStart).build(); 119 | } 120 | 121 | @Override 122 | List getDataList(StreamReadsResponse response) { 123 | return response.getAlignmentsList(); 124 | } 125 | 126 | @Override 127 | StreamReadsResponse buildResponse(StreamReadsResponse response, Iterable dataList) { 128 | return StreamReadsResponse.newBuilder(response).clearAlignments().addAllAlignments(dataList) 129 | .build(); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/ReadUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import com.google.api.services.genomics.model.CigarUnit; 21 | import com.google.api.services.genomics.model.LinearAlignment; 22 | import com.google.api.services.genomics.model.Position; 23 | import com.google.api.services.genomics.model.Read; 24 | import com.google.common.collect.Lists; 25 | 26 | import org.junit.Test; 27 | import org.junit.runner.RunWith; 28 | import org.junit.runners.JUnit4; 29 | 30 | import htsjdk.samtools.SAMFileHeader; 31 | import htsjdk.samtools.SAMRecord; 32 | import htsjdk.samtools.SamReader; 33 | import htsjdk.samtools.SamReaderFactory; 34 | 35 | import java.io.File; 36 | import java.io.IOException; 37 | import java.util.List; 38 | 39 | @RunWith(JUnit4.class) 40 | public class ReadUtilsTest { 41 | 42 | @Test 43 | public void testGetCigarString() throws Exception { 44 | Read read = new Read(); 45 | assertEquals(null, ReadUtils.getCigarString(read)); 46 | 47 | List cigar = Lists.newArrayList( 48 | new CigarUnit().setOperation("ALIGNMENT_MATCH").setOperationLength(100L), 49 | new CigarUnit().setOperation("CLIP_SOFT").setOperationLength(3L)); 50 | read.setAlignment(new LinearAlignment().setCigar(cigar)); 51 | assertEquals("100M3S", ReadUtils.getCigarString(read)); 52 | } 53 | 54 | @Test 55 | public void testGetFlags() throws Exception { 56 | Read read = new Read(); 57 | 58 | // Read unmapped (4) 59 | assertEquals(4, ReadUtils.getFlags(read)); 60 | 61 | // All conditions false 62 | Position position = new Position().setPosition(1L); 63 | read.setAlignment(new LinearAlignment().setPosition(position)).setNextMatePosition(position); 64 | assertEquals(0, ReadUtils.getFlags(read)); 65 | 66 | // Read paired (1) + Proper Pair (2) + Read and Mate unmapped (12) + 67 | // First in pair (64) + Secondary (256) + Duplicate (1024) + Supplementary (2048) 68 | read = new Read(); 69 | read.setNumberReads(2); 70 | read.setProperPlacement(true); 71 | read.setReadNumber(0); 72 | read.setSecondaryAlignment(true); 73 | read.setDuplicateFragment(true); 74 | read.setSupplementaryAlignment(true); 75 | 76 | assertEquals(3407, ReadUtils.getFlags(read)); 77 | } 78 | 79 | @Test 80 | public void testConversion() { 81 | SAMRecord record = new SAMRecord(null); 82 | record.setReferenceName("chr20"); 83 | record.setAlignmentStart(1); 84 | record.setCigarString(String.format("%dM", 10)); 85 | record.setMateReferenceName("chr20"); 86 | record.setMateAlignmentStart(100); 87 | record.setReadPairedFlag(true); 88 | record.setFirstOfPairFlag(true); 89 | record.setMateNegativeStrandFlag(true); 90 | 91 | Read read = ReadUtils.makeRead(record); 92 | assertEquals((long)0, (long)read.getAlignment().getPosition().getPosition()); 93 | assertEquals(1, read.getAlignment().getCigar().size()); 94 | assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); 95 | assertEquals(0, (int)read.getReadNumber()); 96 | assertEquals((long)99, (long)read.getNextMatePosition().getPosition()); 97 | assertEquals("chr20", read.getNextMatePosition().getReferenceName()); 98 | assertEquals(true, read.getNextMatePosition().getReverseStrand()); 99 | } 100 | @Test 101 | public void testByteArrayAttributes() { 102 | // Client code of SamRecord can pass anything to setAttribute including 103 | // byte[] (which doesn't have toString defined). This verifies 104 | // we handle that case correctly. 105 | SAMRecord record = new SAMRecord(null); 106 | record.setReferenceName("chr20"); 107 | record.setAlignmentStart(1); 108 | record.setCigarString(String.format("%dM", 10)); 109 | String s = "123456"; 110 | record.setAttribute("FZ", s.getBytes()); 111 | 112 | Read read = ReadUtils.makeRead(record); 113 | assertEquals((long)0, (long)read.getAlignment().getPosition().getPosition()); 114 | assertEquals(1, read.getAlignment().getCigar().size()); 115 | assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); 116 | assertEquals(s, read.getInfo().get("FZ").get(0)); 117 | } 118 | 119 | @Test 120 | public void SamToReadToSamTest() throws IOException { 121 | String filePath = "src/test/resources/com/google/cloud/genomics/utils/conversion_test.sam"; 122 | File samInput = new File(filePath); 123 | SamReader reads = SamReaderFactory.makeDefault().open(samInput); 124 | SAMFileHeader header = reads.getFileHeader(); 125 | 126 | int numReads = 0; 127 | for (SAMRecord sam : reads){ 128 | Read read = ReadUtils.makeRead(sam); 129 | SAMRecord newSam = ReadUtils.makeSAMRecord(read, header ); 130 | assertEquals(newSam.getSAMString(), sam.getSAMString()); 131 | numReads++; 132 | } 133 | assertEquals(19, numReads);//sanity check to make sure we actually read the file 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/VariantStreamIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.cloud.genomics.utils.OfflineAuth; 17 | import com.google.cloud.genomics.utils.ShardBoundary; 18 | import com.google.cloud.genomics.utils.ShardBoundary.Requirement; 19 | import com.google.common.base.Predicate; 20 | import com.google.genomics.v1.StreamVariantsRequest; 21 | import com.google.genomics.v1.StreamVariantsResponse; 22 | import com.google.genomics.v1.StreamingVariantServiceGrpc; 23 | import com.google.genomics.v1.StreamingVariantServiceGrpc.StreamingVariantServiceBlockingStub; 24 | import com.google.genomics.v1.Variant; 25 | 26 | import io.grpc.ManagedChannel; 27 | 28 | import java.io.IOException; 29 | import java.security.GeneralSecurityException; 30 | import java.util.Iterator; 31 | import java.util.List; 32 | 33 | /** 34 | * An iterator for streaming genomic variants via gRPC with shard boundary semantics. 35 | * 36 | * This function integrates complex retry logic, which upon encountering a failure 37 | * will resume the stream at the last known valid start position, skipping over any 38 | * data that was already returned to the client prior to the failure. 39 | */ 40 | public class VariantStreamIterator 41 | extends 42 | GenomicsStreamIterator { 43 | 44 | /** 45 | * Create a stream iterator that can enforce shard boundary semantics. 46 | * 47 | * @param auth The OfflineAuth to use for the request. 48 | * @param request The request for the shard of data. 49 | * @param shardBoundary The shard boundary semantics to enforce. 50 | * @param fields Which fields to include in a partial response or null for all. 51 | * @throws IOException 52 | * @throws GeneralSecurityException 53 | */ 54 | public static VariantStreamIterator enforceShardBoundary(OfflineAuth auth, 55 | StreamVariantsRequest request, Requirement shardBoundary, String fields) throws IOException, 56 | GeneralSecurityException { 57 | return VariantStreamIterator.enforceShardBoundary(GenomicsChannel.fromOfflineAuth(auth, fields), 58 | request, shardBoundary, fields); 59 | } 60 | 61 | /** 62 | * Create a stream iterator that can enforce shard boundary semantics. 63 | * 64 | * @param channel The ManagedChannel. 65 | * @param request The request for the shard of data. 66 | * @param shardBoundary The shard boundary semantics to enforce. 67 | * @param fields Used to check whether the specified fields would meet the minimum required 68 | * fields for the shard boundary predicate, if applicable. 69 | */ 70 | public static VariantStreamIterator enforceShardBoundary(ManagedChannel channel, 71 | StreamVariantsRequest request, Requirement shardBoundary, String fields) { 72 | Predicate shardPredicate; 73 | if(ShardBoundary.Requirement.STRICT == shardBoundary) { 74 | shardPredicate = ShardBoundary.getStrictVariantPredicate(request.getStart(), fields); 75 | } else { 76 | shardPredicate = null; 77 | } 78 | 79 | return new VariantStreamIterator(channel, request, shardPredicate); 80 | } 81 | 82 | /** 83 | * Create a stream iterator. 84 | * 85 | * @param channel The ManagedChannel. 86 | * @param request The request for the shard of data. 87 | * @param shardPredicate A predicate used to client-side filter results returned (e.g., enforce a 88 | * shard boundary and/or limit to SNPs only) or null for no filtering. 89 | */ 90 | public VariantStreamIterator(ManagedChannel channel, StreamVariantsRequest request, 91 | Predicate shardPredicate) { 92 | super(channel, request, shardPredicate); 93 | } 94 | 95 | @Override 96 | StreamingVariantServiceBlockingStub createStub(ManagedChannel channel) { 97 | return StreamingVariantServiceGrpc.newBlockingStub(channel); 98 | } 99 | 100 | @Override 101 | Iterator createIteratorFromStub(StreamVariantsRequest request) { 102 | return stub.streamVariants(request); 103 | } 104 | 105 | @Override 106 | long getRequestStart(StreamVariantsRequest request) { 107 | return request.getStart(); 108 | } 109 | 110 | @Override 111 | long getDataItemStart(Variant dataItem) { 112 | return dataItem.getStart(); 113 | } 114 | 115 | @Override 116 | String getDataItemId(Variant dataItem) { 117 | return dataItem.getId(); 118 | } 119 | 120 | @Override 121 | StreamVariantsRequest getRevisedRequest(long updatedStart) { 122 | return StreamVariantsRequest.newBuilder(originalRequest).setStart(updatedStart).build(); 123 | } 124 | 125 | @Override 126 | List getDataList(StreamVariantsResponse response) { 127 | return response.getVariantsList(); 128 | } 129 | 130 | @Override 131 | StreamVariantsResponse buildResponse(StreamVariantsResponse response, Iterable dataList) { 132 | return StreamVariantsResponse.newBuilder(response).clearVariants().addAllVariants(dataList) 133 | .build(); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/TestHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | 18 | import com.google.api.client.util.ExponentialBackOff; 19 | import com.google.common.base.Function; 20 | import com.google.common.base.Joiner; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.common.collect.ImmutableMap; 23 | import com.google.common.collect.Lists; 24 | import com.google.genomics.v1.LinearAlignment; 25 | import com.google.genomics.v1.Position; 26 | import com.google.genomics.v1.Read; 27 | import com.google.genomics.v1.Variant; 28 | import com.google.genomics.v1.VariantCall; 29 | import com.google.protobuf.Message; 30 | 31 | import java.util.Arrays; 32 | import java.util.HashSet; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.Set; 36 | import java.util.UUID; 37 | 38 | public class TestHelper { 39 | 40 | public static final List EMPTY_ALT_LIST = ImmutableList.of(); 41 | 42 | public static final Map> CALL_MAP = ImmutableMap.>builder() 43 | .put("hom-NN", Arrays.asList(-1, -1)) 44 | .put("het-NR", Arrays.asList(-1, 0)) 45 | .put("hom-RR", Arrays.asList(0, 0)) 46 | .put("het-RA", Arrays.asList(0, 1)) 47 | .put("hom-AA", Arrays.asList(1, 1)) 48 | .put("het-AA", Arrays.asList(1, 2)) 49 | .build(); 50 | 51 | public static VariantCall.Builder makeCall(String name, String callType) { 52 | return VariantCall.newBuilder() 53 | .setCallSetName(name) 54 | .addAllGenotype(CALL_MAP.get(callType)); 55 | } 56 | 57 | public static VariantCall.Builder makeCall(String name, Integer... gt) { 58 | return VariantCall.newBuilder() 59 | .setCallSetName(name) 60 | .addAllGenotype(Arrays.asList(gt)); 61 | } 62 | 63 | public static Variant.Builder makeVariant(String chr, long start, long end, String ref, List alts) { 64 | return Variant.newBuilder() 65 | .setReferenceName(chr) 66 | .setStart(start) 67 | .setEnd(end) 68 | .setReferenceBases(ref) 69 | .addAllAlternateBases(alts); 70 | } 71 | 72 | public static Variant.Builder makeVariant(String chr, long start, String ref, String... alts) { 73 | return Variant.newBuilder() 74 | .addFilter("PASS") 75 | .setReferenceName(chr) 76 | .setStart(start) 77 | .setEnd(start + ref.length()) 78 | .setReferenceBases(ref) 79 | .addAllAlternateBases(Arrays.asList(alts)); 80 | } 81 | 82 | public static Variant.Builder makeVariant(String chr, long start, String ref, List alts, String... callTypes) { 83 | List calls = Lists.newArrayList(); 84 | for (String callType : callTypes) { 85 | calls.add(TestHelper.makeCall(Joiner.on('-').join(callType, alts), callType).build()); 86 | } 87 | return makeVariant(chr, start, start + ref.length(), ref, alts) 88 | .addAllCalls(calls); 89 | } 90 | 91 | public static Variant.Builder makeBlockRecord(String chr, long start, long end, String ref, List alts) { 92 | return makeVariant(chr, start, end, ref, alts) 93 | .addCalls(TestHelper.makeCall(Joiner.on('-').join("hom-RR", alts), "hom-RR")); 94 | } 95 | 96 | public static Variant makeVariant(long start, long end) { 97 | return Variant.newBuilder() 98 | .setId(UUID.randomUUID().toString()) 99 | .setStart(start) 100 | .setEnd(end) 101 | .build(); 102 | } 103 | 104 | public static Read makeRead(long start, long end) { 105 | Position position = Position.newBuilder().setPosition(start).build(); 106 | LinearAlignment alignment = LinearAlignment.newBuilder().setPosition(position).build(); 107 | return Read.newBuilder() 108 | .setId(UUID.randomUUID().toString()) 109 | .setAlignment(alignment) 110 | .setFragmentLength((int) (end - start)) 111 | .build(); 112 | } 113 | 114 | public static void consumeStreamTest(final GenomicsStreamIterator iter, int expectedNumItems) { 115 | // Tweak the backoff to be static instead of exponential since we are possibly injecting 116 | // fake faults. Also note that this is used by both unit and integration tests. 117 | iter.backoff = 118 | new ExponentialBackOff.Builder().setInitialIntervalMillis(50).setMultiplier(1).build(); 119 | 120 | Function getId = new Function() { 121 | @Override 122 | public String apply(Message m) { 123 | return iter.getDataItemId(m); 124 | } 125 | }; 126 | 127 | Set uniqueReceivedIds = new HashSet(expectedNumItems); 128 | int numItemsReceived = 0; 129 | while (iter.hasNext()) { 130 | List items = iter.getDataList(iter.next()); 131 | numItemsReceived += items.size(); 132 | System.out.println("Received so far: " + numItemsReceived); 133 | uniqueReceivedIds.addAll(Lists.transform(items, getId)); 134 | } 135 | assertEquals("confirm that we received all the data we expected", expectedNumItems, 136 | uniqueReceivedIds.size()); 137 | assertEquals("confirm that all data received is unique", uniqueReceivedIds.size(), 138 | numItemsReceived); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/FaultyGenomicsServerITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | 15 | package com.google.cloud.genomics.utils.grpc; 16 | 17 | import com.google.cloud.genomics.utils.IntegrationTestHelper; 18 | import com.google.cloud.genomics.utils.ShardBoundary; 19 | import com.google.cloud.genomics.utils.ShardUtils; 20 | import com.google.common.collect.ImmutableList; 21 | import com.google.genomics.v1.StreamVariantsRequest; 22 | import com.google.genomics.v1.StreamVariantsResponse; 23 | import com.google.genomics.v1.StreamingVariantServiceGrpc; 24 | 25 | import org.junit.AfterClass; 26 | import org.junit.BeforeClass; 27 | import org.junit.Test; 28 | import org.junit.runner.RunWith; 29 | import org.junit.runners.JUnit4; 30 | 31 | import io.grpc.ManagedChannel; 32 | import io.grpc.Server; 33 | import io.grpc.Status; 34 | import io.grpc.inprocess.InProcessChannelBuilder; 35 | import io.grpc.inprocess.InProcessServerBuilder; 36 | import io.grpc.stub.StreamObserver; 37 | 38 | import java.io.IOException; 39 | import java.security.GeneralSecurityException; 40 | import java.util.Random; 41 | 42 | @RunWith(JUnit4.class) 43 | public class FaultyGenomicsServerITCase { 44 | public static final String SERVER_NAME = "integrationTest"; 45 | public static final StreamVariantsRequest PROTOTYPE = StreamVariantsRequest.newBuilder() 46 | .setProjectId(IntegrationTestHelper.getTEST_PROJECT()) 47 | .setVariantSetId(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET) 48 | .build(); 49 | 50 | protected static Server server; 51 | protected static ManagedChannel genomicsChannel; 52 | 53 | // Variable accessed by both the InProcess Server executor threads and the test thread. 54 | protected static volatile double faultPercentage = 0.0; 55 | 56 | /** 57 | * Starts the in-process server that calls the real service. 58 | * 59 | * @throws GeneralSecurityException 60 | * @throws IOException 61 | */ 62 | @BeforeClass 63 | public static void startServer() throws IOException, GeneralSecurityException { 64 | try { 65 | server = InProcessServerBuilder.forName(SERVER_NAME) 66 | .addService(new VariantsIntegrationServerImpl()) 67 | .build().start(); 68 | } catch (IOException ex) { 69 | throw new RuntimeException(ex); 70 | } 71 | genomicsChannel = GenomicsChannel.fromOfflineAuth(IntegrationTestHelper.getAuthFromApplicationDefaultCredential()); 72 | } 73 | 74 | @AfterClass 75 | public static void stopServer() { 76 | server.shutdownNow(); 77 | } 78 | 79 | protected static class VariantsIntegrationServerImpl extends 80 | StreamingVariantServiceGrpc.StreamingVariantServiceImplBase { 81 | final Random random = new Random(); 82 | 83 | @Override 84 | public void streamVariants(StreamVariantsRequest request, 85 | final StreamObserver responseObserver) { 86 | 87 | StreamingVariantServiceGrpc.newStub(genomicsChannel).streamVariants(request, 88 | new StreamObserver() { 89 | private boolean injectedError; 90 | 91 | @Override 92 | public void onNext(StreamVariantsResponse response) { 93 | if (injectedError) { 94 | return; 95 | } 96 | double rand = random.nextDouble(); 97 | if (faultPercentage > rand) { 98 | responseObserver.onError(Status.UNAVAILABLE.withDescription("injected fault") 99 | .asRuntimeException()); 100 | injectedError = true; 101 | // TODO: this works to cancel the call, but investigate other options 102 | throw new RuntimeException("cancel the call"); 103 | } 104 | responseObserver.onNext(response); 105 | } 106 | 107 | @Override 108 | public void onError(Throwable t) { 109 | if (injectedError) { 110 | return; 111 | } 112 | responseObserver.onError(t); 113 | } 114 | 115 | @Override 116 | public void onCompleted() { 117 | if (injectedError) { 118 | return; 119 | } 120 | responseObserver.onCompleted(); 121 | } 122 | }); 123 | } 124 | } 125 | 126 | public ManagedChannel createChannel() { 127 | return InProcessChannelBuilder.forName(SERVER_NAME).build(); 128 | } 129 | 130 | public void runRetryTest(final GenomicsStreamIterator iter, double percentage, int expectedNumItems) { 131 | FaultyGenomicsServerITCase.faultPercentage = percentage; 132 | TestHelper.consumeStreamTest(iter, expectedNumItems); 133 | } 134 | 135 | @Test 136 | public void testVariantRetries() { 137 | ImmutableList requests = 138 | ShardUtils.getVariantRequests(PROTOTYPE, 139 | 1000000000L, IntegrationTestHelper.PLATINUM_GENOMES_BRCA1_REFERENCES); 140 | VariantStreamIterator iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 141 | ShardBoundary.Requirement.STRICT, null); 142 | // Dev Note: this data currently comes back as 20 separate lists but this is controlled server-side. 143 | // We're using a pretty high fault rate here (25%) to ensure we see a few faults during each test run. 144 | runRetryTest(iter, 0.25, IntegrationTestHelper.PLATINUM_GENOMES_BRCA1_EXPECTED_NUM_VARIANTS); 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/GenomicsChannel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.auth.oauth2.GoogleCredentials; 17 | import com.google.cloud.genomics.utils.CredentialFactory; 18 | import com.google.cloud.genomics.utils.OfflineAuth; 19 | import com.google.common.base.Strings; 20 | 21 | import io.grpc.ClientInterceptor; 22 | import io.grpc.ManagedChannel; 23 | import io.grpc.Metadata; 24 | import io.grpc.auth.ClientAuthInterceptor; 25 | import io.grpc.netty.GrpcSslContexts; 26 | import io.grpc.netty.NegotiationType; 27 | import io.grpc.netty.NettyChannelBuilder; 28 | import io.grpc.stub.MetadataUtils; 29 | 30 | import java.io.IOException; 31 | import java.security.GeneralSecurityException; 32 | import java.util.ArrayList; 33 | import java.util.Arrays; 34 | import java.util.List; 35 | import java.util.concurrent.Executors; 36 | 37 | import javax.net.ssl.SSLException; 38 | 39 | /** 40 | * A convenience class for creating gRPC channels to the Google Genomics API. 41 | */ 42 | public class GenomicsChannel { 43 | private static final String GENOMICS_ENDPOINT = "genomics.googleapis.com"; 44 | private static final String GENOMICS_SCOPE = "https://www.googleapis.com/auth/genomics"; 45 | private static final String PARTIAL_RESPONSE_HEADER = "X-Goog-FieldMask"; 46 | 47 | private static ManagedChannel getGenomicsManagedChannel(List interceptors) 48 | throws SSLException { 49 | // Java 8's implementation of GCM ciphers is extremely slow. Therefore we disable 50 | // them here. 51 | List defaultCiphers = GrpcSslContexts.forClient().ciphers(null).build().cipherSuites(); 52 | List performantCiphers = new ArrayList<>(); 53 | for (String cipher : defaultCiphers) { 54 | if (!cipher.contains("GCM")) { 55 | performantCiphers.add(cipher); 56 | } 57 | } 58 | 59 | return NettyChannelBuilder.forAddress(GENOMICS_ENDPOINT, 443) 60 | .negotiationType(NegotiationType.TLS) 61 | .sslContext(GrpcSslContexts.forClient().ciphers(performantCiphers).build()) 62 | .intercept(interceptors) 63 | .build(); 64 | } 65 | 66 | /** 67 | * Create a new gRPC channel to the Google Genomics API, using the provided credentials for auth. 68 | * 69 | * @param creds The credential. 70 | * @param fields Which fields to return in the partial response, or null for none. 71 | * @return The ManagedChannel. 72 | * @throws SSLException 73 | */ 74 | public static ManagedChannel fromCreds(GoogleCredentials creds, String fields) throws SSLException { 75 | List interceptors = new ArrayList(); 76 | interceptors.add(new ClientAuthInterceptor(creds.createScoped(Arrays.asList(GENOMICS_SCOPE)), 77 | Executors.newSingleThreadExecutor())); 78 | if (!Strings.isNullOrEmpty(fields)) { 79 | Metadata headers = new Metadata(); 80 | Metadata.Key partialResponseHeader = 81 | Metadata.Key.of(PARTIAL_RESPONSE_HEADER, Metadata.ASCII_STRING_MARSHALLER); 82 | headers.put(partialResponseHeader, fields); 83 | interceptors.add(MetadataUtils.newAttachHeadersInterceptor(headers)); 84 | } 85 | return getGenomicsManagedChannel(interceptors); 86 | } 87 | 88 | /** 89 | * Create a new gRPC channel to the Google Genomics API, using the application default credentials 90 | * for auth. 91 | * 92 | * @return The ManagedChannel. 93 | * @throws SSLException 94 | * @throws IOException 95 | */ 96 | public static ManagedChannel fromDefaultCreds() throws SSLException, IOException { 97 | return fromDefaultCreds(null); 98 | } 99 | 100 | /** 101 | * Create a new gRPC channel to the Google Genomics API, using the application default credentials 102 | * for auth. 103 | * 104 | * @param fields Which fields to return in the partial response, or null for none. 105 | * @return The ManagedChannel. 106 | * @throws SSLException 107 | * @throws IOException 108 | */ 109 | public static ManagedChannel fromDefaultCreds(String fields) throws SSLException, IOException { 110 | return fromCreds(CredentialFactory.getApplicationDefaultCredentials(), fields); 111 | } 112 | 113 | /** 114 | * Create a new gRPC channel to the Google Genomics API, using OfflineAuth or the application 115 | * default credentials. 116 | * 117 | * This library works with both the older and newer support for OAuth2 clients. 118 | * 119 | * https://developers.google.com/identity/protocols/application-default-credentials 120 | * 121 | * @param auth The OfflineAuth object. 122 | * @return The ManagedChannel. 123 | * @throws IOException 124 | * @throws GeneralSecurityException 125 | */ 126 | public static ManagedChannel fromOfflineAuth(OfflineAuth auth) 127 | throws IOException, GeneralSecurityException { 128 | return fromOfflineAuth(auth, null); 129 | } 130 | 131 | /** 132 | * Create a new gRPC channel to the Google Genomics API, using either OfflineAuth 133 | * or the application default credentials. 134 | * 135 | * This library will work with both the newer and older versions of OAuth2 client-side support. 136 | * 137 | * https://developers.google.com/identity/protocols/application-default-credentials 138 | * 139 | * @param auth The OfflineAuth object. 140 | * @param fields Which fields to return in the partial response, or null for none. 141 | * @return The ManagedChannel. 142 | * @throws IOException 143 | * @throws GeneralSecurityException 144 | */ 145 | public static ManagedChannel fromOfflineAuth(OfflineAuth auth, String fields) 146 | throws IOException, GeneralSecurityException { 147 | return fromCreds(auth.getCredentials(), fields); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/Contig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static com.google.common.base.Objects.equal; 19 | import static com.google.common.collect.Lists.newArrayList; 20 | import static java.util.Objects.hash; 21 | import static java.util.Objects.requireNonNull; 22 | 23 | import com.google.api.client.util.Preconditions; 24 | import com.google.common.base.Function; 25 | import com.google.common.base.Splitter; 26 | import com.google.common.collect.Iterables; 27 | import com.google.common.collect.Lists; 28 | import com.google.genomics.v1.StreamReadsRequest; 29 | import com.google.genomics.v1.StreamVariantsRequest; 30 | 31 | import java.io.Serializable; 32 | import java.util.ArrayList; 33 | import java.util.List; 34 | 35 | /** 36 | * A Contig is a contiguous region of the genome. 37 | */ 38 | public class Contig implements Serializable { 39 | 40 | private static final long serialVersionUID = -1730387112193404207L; 41 | 42 | public final String referenceName; 43 | public final long start; 44 | public final long end; 45 | 46 | public Contig(String referenceName, long start, long end) { 47 | this.referenceName = requireNonNull(referenceName); 48 | this.start = start; 49 | this.end = end; 50 | } 51 | @Override 52 | public int hashCode() { 53 | return hash(referenceName, start, end); 54 | } 55 | 56 | @Override 57 | public boolean equals(Object obj) { 58 | if (!(obj instanceof Contig)) { 59 | return false; 60 | } 61 | Contig c = (Contig) obj; 62 | return equal(referenceName, c.referenceName) && equal(start, c.start) && equal(end, c.end); 63 | } 64 | 65 | @Override 66 | public String toString() { 67 | return referenceName + ':' + start + ':' + end; 68 | } 69 | 70 | 71 | /** 72 | * Parse the list of Contigs expressed in the string argument. 73 | * 74 | * The common use case is to parse the value of a command line parameter. 75 | * 76 | * @param contigsArgument - a string expressing the specified contiguous region(s) of the genome. 77 | * The format is chromosome:start:end[,chromosome:start:end] 78 | * @return a list of Contig objects 79 | */ 80 | public static Iterable parseContigsFromCommandLine(String contigsArgument) { 81 | return Iterables.transform(Splitter.on(",").split(contigsArgument), 82 | new Function() { 83 | @Override 84 | public Contig apply(String contigString) { 85 | ArrayList contigInfo = newArrayList(Splitter.on(":").split(contigString)); 86 | Long start = Long.valueOf(contigInfo.get(1)); 87 | Long end = Long.valueOf(contigInfo.get(2)); 88 | Preconditions.checkArgument(start <= end, 89 | "Contig coordinates are incorrectly specified: start " + start + " is greater than end " + end); 90 | return new Contig(contigInfo.get(0), start, end); 91 | } 92 | }); 93 | } 94 | 95 | // The following methods have package scope and are helpers for ShardUtils. For sharded Contigs, 96 | // the ShardUtils methods should be used to ensure that shards are shuffled all together before 97 | // being returned to clients. 98 | List getShards(long numberOfBasesPerShard) { 99 | double shardCount = (end - start) / (double) numberOfBasesPerShard; 100 | List shards = Lists.newArrayList(); 101 | for (int i = 0; i < shardCount; i++) { 102 | long shardStart = start + (i * numberOfBasesPerShard); 103 | long shardEnd = Math.min(end, shardStart + numberOfBasesPerShard); 104 | 105 | shards.add(new Contig(referenceName, shardStart, shardEnd)); 106 | } 107 | return shards; 108 | } 109 | 110 | /** 111 | * Construct a StreamVariantsRequest for the Contig. 112 | * @param variantSetId 113 | * @return the request object 114 | */ 115 | @Deprecated 116 | public StreamVariantsRequest getStreamVariantsRequest(String variantSetId) { 117 | return StreamVariantsRequest.newBuilder() 118 | .setVariantSetId(variantSetId) 119 | .setReferenceName(referenceName) 120 | .setStart(start) 121 | .setEnd(end) 122 | .build(); 123 | } 124 | 125 | /** 126 | * Construct a StreamReadsRequest for the Contig. 127 | * @param readGroupSetId 128 | * @return the request object 129 | */ 130 | @Deprecated 131 | public StreamReadsRequest getStreamReadsRequest(String readGroupSetId) { 132 | return StreamReadsRequest.newBuilder() 133 | .setReadGroupSetId(readGroupSetId) 134 | .setReferenceName(referenceName) 135 | .setStart(start) 136 | .setEnd(end) 137 | .build(); 138 | } 139 | 140 | /** 141 | * Construct a StreamVariantsRequest for the Contig using a prototype using a prototype request. 142 | * 143 | * @param prototype A partially filled in request object. 144 | * @return the request object 145 | */ 146 | public StreamVariantsRequest getStreamVariantsRequest(StreamVariantsRequest prototype) { 147 | return StreamVariantsRequest.newBuilder(prototype) 148 | .setReferenceName(referenceName) 149 | .setStart(start) 150 | .setEnd(end) 151 | .build(); 152 | } 153 | 154 | /** 155 | * Construct a StreamReadsRequest for the Contig using a prototype request. 156 | * 157 | * @param prototype A partially filled in request object. 158 | * @return the request object 159 | */ 160 | public StreamReadsRequest getStreamReadsRequest(StreamReadsRequest prototype) { 161 | return StreamReadsRequest.newBuilder(prototype) 162 | .setReferenceName(referenceName) 163 | .setStart(start) 164 | .setEnd(end) 165 | .build(); 166 | } 167 | } 168 | 169 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/VariantStreamIteratorITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import static org.hamcrest.Matchers.containsString; 17 | import static org.junit.Assert.assertEquals; 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | 21 | import com.google.cloud.genomics.utils.IntegrationTestHelper; 22 | import com.google.cloud.genomics.utils.ShardBoundary; 23 | import com.google.cloud.genomics.utils.ShardUtils; 24 | import com.google.common.base.Strings; 25 | import com.google.common.collect.ImmutableList; 26 | import com.google.genomics.v1.StreamVariantsRequest; 27 | import com.google.genomics.v1.StreamVariantsResponse; 28 | import com.google.genomics.v1.Variant; 29 | 30 | import org.junit.Rule; 31 | import org.junit.Test; 32 | import org.junit.rules.ExpectedException; 33 | import org.junit.runner.RunWith; 34 | import org.junit.runners.JUnit4; 35 | 36 | import java.io.IOException; 37 | import java.security.GeneralSecurityException; 38 | import java.util.Iterator; 39 | import java.util.List; 40 | 41 | @RunWith(JUnit4.class) 42 | public class VariantStreamIteratorITCase { 43 | public static final StreamVariantsRequest PROTOTYPE = StreamVariantsRequest.newBuilder() 44 | .setVariantSetId(IntegrationTestHelper.PLATINUM_GENOMES_VARIANTSET) 45 | .setProjectId(IntegrationTestHelper.getTEST_PROJECT()) 46 | .build(); 47 | 48 | @Rule 49 | public ExpectedException thrown = ExpectedException.none(); 50 | 51 | @Test 52 | public void testBasic() throws IOException, GeneralSecurityException { 53 | ImmutableList requests = 54 | ShardUtils.getVariantRequests(PROTOTYPE, 55 | 100L, IntegrationTestHelper.PLATINUM_GENOMES_KLOTHO_REFERENCES); 56 | assertEquals(1, requests.size()); 57 | 58 | Iterator iter = 59 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 60 | requests.get(0), 61 | ShardBoundary.Requirement.OVERLAPS, null); 62 | 63 | assertTrue(iter.hasNext()); 64 | StreamVariantsResponse variantResponse = iter.next(); 65 | List variants = variantResponse.getVariantsList(); 66 | // This includes the klotho SNP and three non-variant segments which overlap it. 67 | assertEquals(4, variants.size()); 68 | assertFalse(iter.hasNext()); 69 | 70 | iter = 71 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 72 | requests.get(0), 73 | ShardBoundary.Requirement.STRICT, null); 74 | 75 | assertTrue(iter.hasNext()); 76 | variantResponse = iter.next(); 77 | // This includes only the klotho SNP. 78 | assertEquals(1, variantResponse.getVariantsList().size()); 79 | assertFalse(iter.hasNext()); 80 | } 81 | 82 | @Test 83 | public void testEmptyRegion() throws IOException, GeneralSecurityException { 84 | ImmutableList requests = 85 | ShardUtils.getVariantRequests(PROTOTYPE, 86 | 100L, "chrDoesNotExist:100:200"); 87 | assertEquals(1, requests.size()); 88 | 89 | Iterator iter = 90 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 91 | requests.get(0), 92 | ShardBoundary.Requirement.OVERLAPS, null); 93 | assertFalse(iter.hasNext()); 94 | 95 | iter = 96 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 97 | requests.get(0), 98 | ShardBoundary.Requirement.STRICT, null); 99 | assertFalse(iter.hasNext()); 100 | } 101 | 102 | @Test 103 | public void testPartialResponses() throws IOException, GeneralSecurityException { 104 | ImmutableList requests = 105 | ShardUtils.getVariantRequests(PROTOTYPE, 106 | 100L, IntegrationTestHelper.PLATINUM_GENOMES_KLOTHO_REFERENCES); 107 | assertEquals(1, requests.size()); 108 | 109 | Iterator iter = 110 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 111 | requests.get(0), 112 | ShardBoundary.Requirement.STRICT, "variants(reference_name,start)"); 113 | 114 | assertTrue(iter.hasNext()); 115 | StreamVariantsResponse variantResponse = iter.next(); 116 | List variants = variantResponse.getVariantsList(); 117 | // This includes only the klotho SNP. 118 | assertEquals(1, variants.size()); 119 | assertFalse(iter.hasNext()); 120 | 121 | assertEquals("chr13", variants.get(0).getReferenceName()); 122 | assertEquals(33628137, variants.get(0).getStart()); 123 | assertTrue(Strings.isNullOrEmpty(variants.get(0).getReferenceBases())); 124 | } 125 | 126 | @Test 127 | public void testPartialResponsesInsufficientFields() throws IOException, GeneralSecurityException { 128 | thrown.expect(IllegalArgumentException.class); 129 | thrown.expectMessage(containsString("Insufficient fields requested in partial response. " 130 | + "At a minimum include 'variants(start)' to enforce a strict shard boundary.")); 131 | 132 | ImmutableList requests = 133 | ShardUtils.getVariantRequests(PROTOTYPE, 134 | 100L, IntegrationTestHelper.PLATINUM_GENOMES_KLOTHO_REFERENCES); 135 | assertEquals(1, requests.size()); 136 | 137 | Iterator iter = 138 | VariantStreamIterator.enforceShardBoundary(IntegrationTestHelper.getAuthFromApplicationDefaultCredential(), 139 | requests.get(0), 140 | ShardBoundary.Requirement.STRICT, "variants(reference_bases)"); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/ContigTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import static com.google.common.collect.Lists.newArrayList; 19 | 20 | import static org.hamcrest.Matchers.containsString; 21 | import static org.junit.Assert.assertEquals; 22 | 23 | import com.google.common.base.Joiner; 24 | import com.google.common.collect.ImmutableSet; 25 | import com.google.genomics.v1.StreamReadsRequest; 26 | import com.google.genomics.v1.StreamVariantsRequest; 27 | 28 | import org.junit.Rule; 29 | import org.junit.Test; 30 | import org.junit.rules.ExpectedException; 31 | import org.junit.runner.RunWith; 32 | import org.junit.runners.JUnit4; 33 | 34 | import java.util.List; 35 | 36 | @RunWith(JUnit4.class) 37 | public class ContigTest { 38 | 39 | @Rule 40 | public ExpectedException thrown = ExpectedException.none(); 41 | 42 | @Test 43 | public void testGetShards() throws Exception { 44 | Contig contig = new Contig("1", 0, 9); 45 | List shards = contig.getShards(5); 46 | 47 | assertEquals(2, shards.size()); 48 | Contig shard1 = shards.get(0); 49 | Contig shard2 = shards.get(1); 50 | 51 | // The code shuffles the shard, so lets make sure that we test the right shards 52 | if (shard1.start > shard2.start) { 53 | shard1 = shards.get(1); 54 | shard2 = shards.get(0); 55 | } 56 | 57 | assertEquals("1", shard1.referenceName); 58 | assertEquals(0, shard1.start); 59 | assertEquals(5, shard1.end); 60 | 61 | assertEquals("1", shard2.referenceName); 62 | assertEquals(5, shard2.start); 63 | assertEquals(9, shard2.end); 64 | } 65 | 66 | @Test 67 | public void testGetVariantsRequest() throws Exception { 68 | StreamVariantsRequest prototype = StreamVariantsRequest.newBuilder() 69 | .setProjectId("theProjectId") 70 | .setVariantSetId("theVariantSetId") 71 | .build(); 72 | StreamVariantsRequest request = new Contig("1", 0, 9) 73 | .getStreamVariantsRequest(prototype); 74 | assertEquals("theProjectId", request.getProjectId()); 75 | assertEquals("theVariantSetId", request.getVariantSetId()); 76 | assertEquals("1", request.getReferenceName()); 77 | assertEquals(0, request.getStart()); 78 | assertEquals(9, request.getEnd()); 79 | } 80 | 81 | @Test 82 | public void testGetVariantsRequestWithCallSetIds() throws Exception { 83 | ImmutableSet callSetIds = ImmutableSet.builder() 84 | .add("callSetId-0") 85 | .add("callSetId-1") 86 | .build(); 87 | StreamVariantsRequest prototype = StreamVariantsRequest.newBuilder() 88 | .setProjectId("theProjectId") 89 | .setVariantSetId("theVariantSetId") 90 | .addAllCallSetIds(callSetIds) 91 | .build(); 92 | StreamVariantsRequest request = new Contig("1", 0, 9) 93 | .getStreamVariantsRequest(prototype); 94 | assertEquals("theProjectId", request.getProjectId()); 95 | assertEquals("theVariantSetId", request.getVariantSetId()); 96 | assertEquals(2, request.getCallSetIdsList().size()); 97 | assertEquals("callSetId-0", request.getCallSetIds(0)); 98 | assertEquals("callSetId-1", request.getCallSetIds(1)); 99 | assertEquals("1", request.getReferenceName()); 100 | assertEquals(0, request.getStart()); 101 | assertEquals(9, request.getEnd()); 102 | } 103 | 104 | @Test 105 | public void testGetVariantsRequestWithEmptyCallSetIds() throws Exception { 106 | ImmutableSet callSetIds = ImmutableSet.builder() 107 | .build(); 108 | StreamVariantsRequest prototype = StreamVariantsRequest.newBuilder() 109 | .setProjectId("theProjectId") 110 | .setVariantSetId("theVariantSetId") 111 | .addAllCallSetIds(callSetIds) 112 | .build(); 113 | StreamVariantsRequest request = new Contig("1", 0, 9) 114 | .getStreamVariantsRequest(prototype); 115 | assertEquals("theProjectId", request.getProjectId()); 116 | assertEquals("theVariantSetId", request.getVariantSetId()); 117 | assertEquals(0, request.getCallSetIdsList().size()); 118 | assertEquals("1", request.getReferenceName()); 119 | assertEquals(0, request.getStart()); 120 | assertEquals(9, request.getEnd()); 121 | } 122 | 123 | @Test 124 | public void testGetReadsRequest() throws Exception { 125 | StreamReadsRequest prototype = StreamReadsRequest.newBuilder() 126 | .setProjectId("theProjectId") 127 | .setReadGroupSetId("theReadGroupSetId") 128 | .build(); 129 | StreamReadsRequest request = new Contig("1", 0, 9) 130 | .getStreamReadsRequest(prototype); 131 | assertEquals("theProjectId", request.getProjectId()); 132 | assertEquals("theReadGroupSetId", request.getReadGroupSetId()); 133 | assertEquals("1", request.getReferenceName()); 134 | assertEquals(0, request.getStart()); 135 | assertEquals(9, request.getEnd()); 136 | } 137 | 138 | @Test 139 | public void testParseContigs() { 140 | Contig brca1Contig = new Contig("17", 41196311, 41277499); 141 | String brca1ContigString = "17:41196311:41277499"; 142 | 143 | Contig klothoContig = new Contig("13", 33628137, 33628138); 144 | String klothoContigString = "13:33628137:33628138"; 145 | 146 | assertEquals(newArrayList(brca1Contig), 147 | newArrayList(Contig.parseContigsFromCommandLine(brca1ContigString))); 148 | 149 | assertEquals(newArrayList(brca1Contig, klothoContig), 150 | newArrayList(Contig.parseContigsFromCommandLine((Joiner.on(",").join( 151 | brca1ContigString, klothoContigString))))); 152 | } 153 | 154 | @Test 155 | public void testParseContigsValidation() { 156 | String contigEndBeforeStart = "17:41277499:41196311"; 157 | thrown.expect(IllegalArgumentException.class); 158 | thrown.expectMessage(containsString("Contig coordinates are incorrectly specified")); 159 | Iterable contigs = Contig.parseContigsFromCommandLine(contigEndBeforeStart); 160 | contigs.toString(); // The operation is lazy, force it here. 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/OfflineAuth.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import com.google.api.client.auth.oauth2.ClientParametersAuthentication; 19 | import com.google.api.client.auth.oauth2.Credential; 20 | import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; 21 | import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; 22 | import com.google.api.client.http.HttpTransport; 23 | import com.google.api.client.json.jackson2.JacksonFactory; 24 | import com.google.auth.oauth2.GoogleCredentials; 25 | import com.google.auth.oauth2.UserCredentials; 26 | import com.google.common.base.Preconditions; 27 | 28 | import java.io.IOException; 29 | import java.io.Serializable; 30 | import java.security.GeneralSecurityException; 31 | 32 | /** 33 | * The purpose of the OfflineAuth class is to encapsulate an apiKey or stored user credential 34 | * for offline use in pipeline systems such as Dataflow or Spark. 35 | * 36 | * If the OfflineAuth object contains neither an apiKey or user credential, it will fall back to 37 | * the Application Default Credential. 38 | * 39 | * For more information about auth, please see: 40 | *
    41 | *
  • https://developers.google.com/identity/protocols/application-default-credentials 42 | *
  • https://developers.google.com/api-client-library/java/google-oauth-java-client/oauth2 43 | *
  • https://github.com/google/google-api-java-client/tree/master/google-api-client/src/main/java/com/google/api/client/googleapis/auth/oauth2 44 | *
  • https://github.com/google/google-oauth-java-client 45 | *
  • https://github.com/google/google-auth-library-java 46 | *
47 | */ 48 | @SuppressWarnings("serial") 49 | public class OfflineAuth implements Serializable { 50 | private String apiKey; 51 | private String clientId; 52 | private String clientSecret; 53 | private String refreshToken; 54 | 55 | /** 56 | * Creates an empty offline-friendly auth object. 57 | * 58 | * Use this method when your application will fall back to using the Application Default 59 | * Credential (e.g., via Google Cloud Dataflow) but the code interface takes an OfflineAuth object 60 | * so that it could alternatively use a user credential or an apiKey. 61 | */ 62 | public OfflineAuth() {} 63 | 64 | /** 65 | * Creates offline-friendly auth object using a credential object. 66 | * 67 | * Use this method when your application has already performed the oauth 68 | * flow and needs to store and use the credential later in an offline 69 | * manner (e.g., via Google Cloud Dataflow). 70 | * 71 | * @param credential The credential to be used for requests. 72 | */ 73 | public OfflineAuth(Credential credential) { 74 | Preconditions.checkNotNull(credential); 75 | ClientParametersAuthentication clientParams = 76 | (ClientParametersAuthentication) credential.getClientAuthentication(); 77 | this.clientId = clientParams.getClientId(); 78 | this.clientSecret = clientParams.getClientSecret(); 79 | this.refreshToken = credential.getRefreshToken(); 80 | } 81 | 82 | /** 83 | * Creates offline-friendly auth object using an apiKey. 84 | * 85 | * Use this method when you need to store the resulting OfflineAuth for later use. 86 | * 87 | * @param apiKey The API key of the Google Cloud Platform project to be used for requests. 88 | */ 89 | public OfflineAuth(String apiKey) { 90 | Preconditions.checkNotNull(apiKey); 91 | this.apiKey = apiKey; 92 | } 93 | 94 | /** 95 | * @return Whether an api key is stored in this OfflineAuth. 96 | */ 97 | public boolean hasApiKey() { 98 | return null != apiKey; 99 | } 100 | 101 | /** 102 | * @return the apiKey 103 | */ 104 | public String getApiKey() { 105 | return apiKey; 106 | } 107 | 108 | /** 109 | * @return Whether a credential is stored in this OfflineAuth. 110 | */ 111 | public boolean hasStoredCredential() { 112 | return null != refreshToken; 113 | } 114 | 115 | /** 116 | * @return The stored clientId or null if the Application Default Credential is to be used. 117 | */ 118 | public String getClientId() { 119 | return clientId; 120 | } 121 | 122 | /** 123 | * @return The stored clientSecret or null if the Application Default Credential is to be used. 124 | */ 125 | public String getClientSecret() { 126 | return clientSecret; 127 | } 128 | 129 | /** 130 | * @return The stored refreshToken or null if the Application Default Credential is to be used. 131 | */ 132 | public String getRefreshToken() { 133 | return refreshToken; 134 | } 135 | 136 | /** 137 | * Return the stored user credential, if applicable, or fall back to the Application Default Credential. 138 | * 139 | * @return The com.google.api.client.auth.oauth2.Credential object. 140 | */ 141 | public Credential getCredential() { 142 | if (hasStoredCredential()) { 143 | HttpTransport httpTransport; 144 | try { 145 | httpTransport = GoogleNetHttpTransport.newTrustedTransport(); 146 | } catch (IOException | GeneralSecurityException e) { 147 | throw new RuntimeException("Could not create HTTPS transport for use in credential creation", e); 148 | } 149 | 150 | return new GoogleCredential.Builder() 151 | .setJsonFactory(JacksonFactory.getDefaultInstance()) 152 | .setTransport(httpTransport) 153 | .setClientSecrets(getClientId(), getClientSecret()) 154 | .build() 155 | .setRefreshToken(getRefreshToken()); 156 | } 157 | return CredentialFactory.getApplicationDefaultCredential(); 158 | } 159 | 160 | /** 161 | * Return the stored user credentials, if applicable, or fall back to the Application Default Credentials. 162 | * 163 | * Specifically, gRPC uses the new Google OAuth library. See https://github.com/google/google-auth-library-java 164 | * 165 | * @return The com.google.auth.Credentials object. 166 | */ 167 | public GoogleCredentials getCredentials() { 168 | if (hasStoredCredential()) { 169 | return new UserCredentials(clientId, clientSecret, refreshToken); 170 | } 171 | return CredentialFactory.getApplicationDefaultCredentials(); 172 | } 173 | 174 | } 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | utils-java [![Build Status](https://img.shields.io/travis/googlegenomics/utils-java.svg?style=flat)](https://travis-ci.org/googlegenomics/utils-java) [![Coverage Status](https://img.shields.io/coveralls/googlegenomics/utils-java.svg?style=flat)](https://coveralls.io/r/googlegenomics/utils-java) 2 | ========== 3 | 4 | This project's goal is to reduce duplicate code across different Google Genomics Java integrations. 5 | 6 | If you have duplicate code appearing in your projects, or see useful functions in the other [googlegenomics Java repositories](https://github.com/googlegenomics?query=-java) that you want to depend on, please [contribute](CONTRIBUTING.rst)! 7 | 8 | ## Depending on this project 9 | 10 | Note: you can find the latest available version of this project in [Maven's central repository](https://search.maven.org/#search%7Cga%7C1%7Ca%3A%22google-genomics-utils%22). 11 | 12 | ### Maven 13 | Add the following to your `pom.xml` file: 14 | ``` 15 | 16 | 17 | 18 | com.google.cloud.genomics 19 | google-genomics-utils 20 | v1-0.8 21 | 22 | 23 | 24 | ``` 25 | 26 | ### Gradle 27 | Add the following to your `build.gradle` file, updating the version to [the most recent release](https://search.maven.org/#search%7Cgav%7C1%7Cg%3A%22com.google.cloud.genomics%22%20AND%20a%3A%22google-genomics-utils%22): 28 | ``` 29 | repositories { 30 | mavenCentral() 31 | } 32 | 33 | dependencies { 34 | compile 'com.google.cloud.genomics:google-genomics-utils:v1-0.8' 35 | } 36 | ``` 37 | 38 | ## Building this project 39 | 40 | 1. git clone this repository. 41 | 42 | 1. If you have not already done so, follow the Google Genomics [getting started instructions](https://cloud.google.com/genomics/install-genomics-tools) to set up your environment 43 | including [installing gcloud](https://cloud.google.com/sdk/) and running `gcloud init`. 44 | 45 | 1. This project now includes code for calling the Genomics API using [gRPC](http://www.grpc.io). To use gRPC, you'll need a version of ALPN that matches your JRE version. 46 | 47 | 2. See the [ALPN documentation](http://www.eclipse.org/jetty/documentation/9.2.10.v20150310/alpn-chapter.html) for a table of which ALPN jar to use for your JRE version. 48 | 2. Then download the correct version from [here](http://mvnrepository.com/artifact/org.mortbay.jetty.alpn/alpn-boot). 49 | 50 | 1. Use a recent version of [Apache Maven](http://maven.apache.org/download.cgi) (e.g., version 3.3.3) to build this code: 51 | ``` 52 | cd utils-java 53 | mvn package 54 | ``` 55 | 56 | If you wish to modify this code while also modifying a project that depends upon it (e.g., [dataflow-java](https://github.com/googlegenomics/dataflow-java)), just 57 | 58 | 1. Build and install the jar to your local maven repository via `mvn install`. 59 | 2. Then update the `pom.xml` of the dependent project to temporarily depend upon the particular SNAPSHOT version of utils-java now installed in your local maven repository. For example, make a change similar to [this](https://github.com/googlegenomics/dataflow-java/commit/17122798d579e593470c864868a46092f8795dfa) but only in your local copy of `pom.xml`. 60 | 3. Eclipse users: 61 | * For each of utils-java, dataflow-java, etc... in Eclipse do: `File -> Import -> Maven -> Existing Maven Projects` 62 | * If at any point in Eclipse becomes confused (lot of red X's) in Eclipse do: `Project -> Clean -> Clean All Projects` 63 | 4. IntelliJ users: 64 | * Import the project via File > Open and then double-click on the pom.xml file. 65 | 66 | ## gRPC 67 | 68 | See [Example.java](src/main/java/com/google/cloud/genomics/utils/grpc/Example.java) for some example code that uses gRPC. The protocol buffer schema for the API can be found in [src/main/proto/google/genomics/v1](src/main/proto/google/genomics/v1). 69 | 70 | * To run the example: 71 | ``` 72 | MAVEN_OPTS="-Xbootclasspath/p:/YOUR/PATH/TO/alpn-boot-YOUR-VERSION.jar" \ 73 | mvn exec:java -Dexec.mainClass=com.google.cloud.genomics.utils.grpc.Example 74 | ``` 75 | 76 | ## Code Layout 77 | 78 | * [com.google.cloud.genomics.utils](src/main/java/com/google/cloud/genomics/utils) 79 | * [GenomicsFactory.java](src/main/java/com/google/cloud/genomics/utils/GenomicsFactory.java) Makes it easier to construct an authenticated Genomics service. 80 | * [Paginator.java](src/main/java/com/google/cloud/genomics/utils/Paginator.java) Lazily paginates through readsets, reads, variants, callsets, etc... 81 | * [com.google.cloud.genomics.utils.grpc](src/main/java/com/google/cloud/genomics/utils/grpc) 82 | * [Channels.java](src/main/java/com/google/cloud/genomics/utils/grpc/Channels.java) Makes it easier to creating gRPC channels to the Google Genomics API. 83 | * [Example.java](src/main/java/com/google/cloud/genomics/utils/grpc/Example.java) Demonstrates usage of gRPC via a minimal example. 84 | * [proto](src/main/proto) 85 | * [Protocol Buffer](http://www.grpc.io/docs/#working-with-protocol-buffers 86 | ) files defing the gRPC interface to the Genomics API. 87 | 88 | ## Releasing new versions 89 | 90 | This section contains details on getting a new release into Maven central and can be safely ignored by most people. If you need a new release of this code, go ahead and just [file an issue](https://github.com/googlegenomics/utils-java/issues/new). 91 | 92 | ### Prereqs 93 | * [Create a Sonatype Jira Account](http://central.sonatype.org/pages/ossrh-guide.html#initial-setup) 94 | * [File a ticket](https://issues.sonatype.org/browse/OSSRH-11629) to get access to the Sonatype com.google.cloud.genomics group 95 | * [Setup gpg](http://central.sonatype.org/pages/working-with-pgp-signatures.html) (Don't forget to publish a public key) 96 | * [Setup GitHub SSH keys](https://help.github.com/articles/generating-ssh-keys) (make sure `ssh -T git@github.com` works) 97 | * Create a `~/.m2/settings.xml` file which has the following: 98 | ``` 99 | 100 | 101 | 102 | ossrh 103 | sonatype-username 104 | sonatype-password 105 | 106 | 107 | 108 | ``` 109 | 110 | ### Making a new release 111 | 1. Use Maven to tag the code, up the pom version and release into the Sonatype staging area. 112 | ``` 113 | mvn release:prepare && mvn release:perform 114 | ``` 115 | > ... 116 | > **What is the release version for "Google Genomics Utils"?:** \ 117 | > **What is SCM release tag or label for "Google Genomics Utils"?:** \ 118 | > **What is the new development version for "Google Genomics Utils"?** \ 119 | 120 | 2. Find the repository at https://oss.sonatype.org/#stagingRepositories and close it. 121 | 3. If closing succeeds, then release it. See the [detailed instructions](http://central.sonatype.org/pages/releasing-the-deployment.html#close-and-drop-or-release-your-staging-repository) for more info. 122 | 4. As long as there aren't any errors - that's it! The new version will be synced to Maven central. 123 | 124 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/CredentialFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils; 17 | 18 | import com.google.api.client.auth.oauth2.Credential; 19 | import com.google.api.client.extensions.java6.auth.oauth2.AbstractPromptReceiver; 20 | import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp; 21 | import com.google.api.client.googleapis.auth.oauth2.GoogleAuthorizationCodeFlow; 22 | import com.google.api.client.googleapis.auth.oauth2.GoogleClientSecrets; 23 | import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; 24 | import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants; 25 | import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; 26 | import com.google.api.client.http.HttpTransport; 27 | import com.google.api.client.json.JsonFactory; 28 | import com.google.api.client.json.jackson2.JacksonFactory; 29 | import com.google.api.client.util.store.FileDataStoreFactory; 30 | import com.google.auth.oauth2.GoogleCredentials; 31 | import com.google.common.base.Preconditions; 32 | 33 | import java.io.File; 34 | import java.io.FileReader; 35 | import java.io.IOException; 36 | import java.security.GeneralSecurityException; 37 | import java.util.Arrays; 38 | import java.util.List; 39 | 40 | /** 41 | * Convenience routines for obtaining credentials. 42 | * 43 | * For more information about auth, please see: 44 | *
    45 | *
  • https://developers.google.com/identity/protocols/application-default-credentials 46 | *
  • https://developers.google.com/api-client-library/java/google-oauth-java-client/oauth2 47 | *
  • https://github.com/google/google-api-java-client/tree/master/google-api-client/src/main/java/com/google/api/client/googleapis/auth/oauth2 48 | *
  • https://github.com/google/google-oauth-java-client 49 | *
  • https://github.com/google/google-auth-library-java 50 | *
51 | */ 52 | public class CredentialFactory { 53 | 54 | private static final String MISSING_ADC_EXCEPTION_MESSAGE = 55 | "Unable to get application default credentials. Please see " 56 | + "https://developers.google.com/identity/protocols/application-default-credentials " 57 | + "for details on how to specify credentials. This is dependent " 58 | + "on the gcloud core component version 2015.02.05 or newer to be able " 59 | + "to get credentials from the currently authorized user via gcloud auth."; 60 | 61 | private static final List SCOPES = Arrays.asList( 62 | "https://www.googleapis.com/auth/cloud-platform"); 63 | 64 | private static final File CREDENTIAL_STORE = new File( 65 | System.getProperty("user.home"), ".store"); 66 | 67 | private static class PromptReceiver extends AbstractPromptReceiver { 68 | @Override 69 | public String getRedirectUri() { 70 | return GoogleOAuthConstants.OOB_REDIRECT_URI; 71 | } 72 | } 73 | 74 | /** 75 | * Obtain the Application Default com.google.api.client.auth.oauth2.Credential 76 | * 77 | * @return the Application Default Credential 78 | */ 79 | public static GoogleCredential getApplicationDefaultCredential() { 80 | try { 81 | GoogleCredential credential = GoogleCredential.getApplicationDefault(); 82 | if (credential.createScopedRequired()) { 83 | credential = 84 | credential.createScoped(Arrays.asList("https://www.googleapis.com/auth/genomics")); 85 | } 86 | return credential; 87 | } catch (IOException e) { 88 | throw new RuntimeException(MISSING_ADC_EXCEPTION_MESSAGE, e); 89 | } 90 | } 91 | 92 | /** 93 | * Obtain the Application Default com.google.auth.oauth2.GoogleCredentials 94 | * 95 | * This is from the newer OAuth library https://github.com/google/google-auth-library-java 96 | * which is used by gRPC. 97 | * 98 | * @return the Application Default Credentials 99 | */ 100 | public static GoogleCredentials getApplicationDefaultCredentials() { 101 | try { 102 | return GoogleCredentials.getApplicationDefault(); 103 | } catch (IOException e) { 104 | throw new RuntimeException(MISSING_ADC_EXCEPTION_MESSAGE, e); 105 | } 106 | } 107 | 108 | /** 109 | * Creates an OAuth2 credential from client secrets, which may require an interactive authorization prompt. 110 | * 111 | * Use this method when the Application Default Credential is not sufficient. 112 | * 113 | * @param clientSecretsFile The {@code client_secrets.json} file path. 114 | * @param credentialId The credentialId for use in identifying the credential in the persistent credential store. 115 | * @return The user credential 116 | */ 117 | public static Credential getCredentialFromClientSecrets(String clientSecretsFile, 118 | String credentialId) { 119 | Preconditions.checkArgument(clientSecretsFile != null); 120 | Preconditions.checkArgument(credentialId != null); 121 | 122 | HttpTransport httpTransport; 123 | try { 124 | httpTransport = GoogleNetHttpTransport.newTrustedTransport(); 125 | } catch (IOException | GeneralSecurityException e) { 126 | throw new RuntimeException("Could not create HTTPS transport for use in credential creation", e); 127 | } 128 | 129 | JsonFactory jsonFactory = JacksonFactory.getDefaultInstance(); 130 | GoogleClientSecrets clientSecrets; 131 | 132 | try { 133 | clientSecrets = GoogleClientSecrets.load(jsonFactory, 134 | new FileReader(clientSecretsFile)); 135 | } catch (IOException e) { 136 | throw new RuntimeException( 137 | "Could not read the client secrets from file: " + clientSecretsFile, 138 | e); 139 | } 140 | 141 | FileDataStoreFactory dataStoreFactory; 142 | try { 143 | dataStoreFactory = new FileDataStoreFactory(CREDENTIAL_STORE); 144 | } catch (IOException e) { 145 | throw new RuntimeException("Could not create persisten credential store " + CREDENTIAL_STORE, e); 146 | } 147 | 148 | GoogleAuthorizationCodeFlow flow; 149 | try { 150 | flow = new GoogleAuthorizationCodeFlow.Builder( 151 | httpTransport, jsonFactory, clientSecrets, SCOPES) 152 | .setDataStoreFactory(dataStoreFactory) 153 | .build(); 154 | } catch (IOException e) { 155 | throw new RuntimeException("Could not build credential authorization flow", e); 156 | } 157 | 158 | // The credentialId identifies the credential in the persistent credential store. 159 | Credential credential; 160 | try { 161 | credential = new AuthorizationCodeInstalledApp(flow, new PromptReceiver()) 162 | .authorize(credentialId); 163 | } catch (IOException e) { 164 | throw new RuntimeException("Could not perform credential authorization flow", e); 165 | } 166 | return credential; 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/GenomicsStreamIteratorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | 15 | package com.google.cloud.genomics.utils.grpc; 16 | 17 | import com.google.cloud.genomics.utils.ShardBoundary; 18 | import com.google.cloud.genomics.utils.ShardUtils; 19 | import com.google.common.collect.ImmutableList; 20 | import com.google.genomics.v1.StreamReadsRequest; 21 | import com.google.genomics.v1.StreamReadsResponse; 22 | import com.google.genomics.v1.StreamVariantsRequest; 23 | import com.google.genomics.v1.StreamVariantsResponse; 24 | import com.google.genomics.v1.StreamingReadServiceGrpc; 25 | import com.google.genomics.v1.StreamingVariantServiceGrpc; 26 | 27 | import org.junit.AfterClass; 28 | import org.junit.BeforeClass; 29 | import org.junit.Test; 30 | import org.junit.runner.RunWith; 31 | import org.junit.runners.JUnit4; 32 | 33 | import io.grpc.ManagedChannel; 34 | import io.grpc.Server; 35 | import io.grpc.inprocess.InProcessChannelBuilder; 36 | import io.grpc.inprocess.InProcessServerBuilder; 37 | import io.grpc.stub.StreamObserver; 38 | 39 | import java.io.IOException; 40 | import java.security.GeneralSecurityException; 41 | import java.util.Collections; 42 | 43 | @RunWith(JUnit4.class) 44 | public class GenomicsStreamIteratorTest { 45 | public static final String SERVER_NAME = "unitTest"; 46 | public static final StreamReadsRequest PROTOTYPE_READ_REQUEST = StreamReadsRequest.newBuilder() 47 | .setReadGroupSetId("theReadGroupSetId") 48 | .setProjectId("theProjectId") 49 | .build(); 50 | public static final StreamVariantsRequest PROTOTYPE_VARIANT_REQUEST = StreamVariantsRequest.newBuilder() 51 | .setVariantSetId("theVariantSetId") 52 | .setProjectId("theProjectId") 53 | .build(); 54 | 55 | protected static Server server; 56 | 57 | /** 58 | * Starts the in-process server. 59 | */ 60 | @BeforeClass 61 | public static void startServer() { 62 | try { 63 | server = InProcessServerBuilder.forName(SERVER_NAME) 64 | .addService(new ReadsUnitServerImpl()) 65 | .addService(new VariantsUnitServerImpl()) 66 | .build().start(); 67 | } catch (IOException ex) { 68 | throw new RuntimeException(ex); 69 | } 70 | } 71 | 72 | @AfterClass 73 | public static void stopServer() { 74 | server.shutdownNow(); 75 | } 76 | 77 | protected static class ReadsUnitServerImpl extends StreamingReadServiceGrpc.StreamingReadServiceImplBase { 78 | @Override 79 | public void streamReads(StreamReadsRequest request, 80 | StreamObserver responseObserver) { 81 | StreamReadsResponse response = StreamReadsResponse.newBuilder() 82 | .addAlignments(TestHelper.makeRead(400, 510)) 83 | .addAlignments(TestHelper.makeRead(450, 505)) 84 | .addAlignments(TestHelper.makeRead(499, 600)) 85 | .build(); 86 | responseObserver.onNext(response); 87 | responseObserver.onCompleted(); 88 | } 89 | } 90 | 91 | protected static class VariantsUnitServerImpl extends StreamingVariantServiceGrpc.StreamingVariantServiceImplBase { 92 | @Override 93 | public void streamVariants(StreamVariantsRequest request, 94 | StreamObserver responseObserver) { 95 | StreamVariantsResponse response = StreamVariantsResponse.newBuilder() 96 | .addVariants(TestHelper.makeVariant(400, 510)) 97 | .addVariants(TestHelper.makeVariant(450, 505)) 98 | .addVariants(TestHelper.makeVariant(499, 600)) 99 | .build(); 100 | responseObserver.onNext(response); 101 | responseObserver.onCompleted(); 102 | } 103 | } 104 | 105 | public ManagedChannel createChannel() { 106 | return InProcessChannelBuilder.forName(SERVER_NAME).build(); 107 | } 108 | 109 | @Test 110 | public void testAllReadsOverlapsStart() throws IOException, GeneralSecurityException { 111 | ImmutableList requests = 112 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE_READ_REQUEST), 1000000L, "chr7:500:600"); 113 | 114 | ReadStreamIterator iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 115 | ShardBoundary.Requirement.STRICT, null); 116 | TestHelper.consumeStreamTest(iter, 0); 117 | 118 | iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 119 | ShardBoundary.Requirement.OVERLAPS, null); 120 | TestHelper.consumeStreamTest(iter, 3); 121 | } 122 | 123 | @Test 124 | public void testAllVariantsOverlapsStart() throws IOException, GeneralSecurityException { 125 | ImmutableList requests = 126 | ShardUtils.getVariantRequests(PROTOTYPE_VARIANT_REQUEST, 1000000L, "chr7:500:600"); 127 | 128 | VariantStreamIterator iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 129 | ShardBoundary.Requirement.STRICT, null); 130 | TestHelper.consumeStreamTest(iter, 0); 131 | 132 | iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 133 | ShardBoundary.Requirement.OVERLAPS, null); 134 | TestHelper.consumeStreamTest(iter, 3); 135 | } 136 | 137 | @Test 138 | public void testSomeReadsOverlapsStart() throws IOException, GeneralSecurityException { 139 | ImmutableList requests = 140 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE_READ_REQUEST), 1000000L, "chr7:499:600"); 141 | 142 | ReadStreamIterator iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 143 | ShardBoundary.Requirement.STRICT, null); 144 | TestHelper.consumeStreamTest(iter, 1); 145 | 146 | iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 147 | ShardBoundary.Requirement.OVERLAPS, null); 148 | TestHelper.consumeStreamTest(iter, 3); 149 | } 150 | 151 | @Test 152 | public void testSomeVariantsOverlapsStart() throws IOException, GeneralSecurityException { 153 | ImmutableList requests = 154 | ShardUtils.getVariantRequests(PROTOTYPE_VARIANT_REQUEST, 1000000L, "chr7:499:600"); 155 | 156 | VariantStreamIterator iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 157 | ShardBoundary.Requirement.STRICT, null); 158 | TestHelper.consumeStreamTest(iter, 1); 159 | 160 | iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 161 | ShardBoundary.Requirement.OVERLAPS, null); 162 | TestHelper.consumeStreamTest(iter, 3); 163 | } 164 | 165 | @Test 166 | public void testNoReadsOverlapsStart() throws IOException, GeneralSecurityException { 167 | ImmutableList requests = 168 | ShardUtils.getReadRequests(Collections.singletonList(PROTOTYPE_READ_REQUEST), 1000000L, "chr7:300:600"); 169 | 170 | ReadStreamIterator iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 171 | ShardBoundary.Requirement.STRICT, null); 172 | TestHelper.consumeStreamTest(iter, 3); 173 | 174 | iter = ReadStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 175 | ShardBoundary.Requirement.OVERLAPS, null); 176 | TestHelper.consumeStreamTest(iter, 3); 177 | } 178 | 179 | @Test 180 | public void testNoVariantsOverlapsStart() throws IOException, GeneralSecurityException { 181 | ImmutableList requests = 182 | ShardUtils.getVariantRequests(PROTOTYPE_VARIANT_REQUEST, 1000000L, "chr7:300:600"); 183 | 184 | VariantStreamIterator iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 185 | ShardBoundary.Requirement.STRICT, null); 186 | TestHelper.consumeStreamTest(iter, 3); 187 | 188 | iter = VariantStreamIterator.enforceShardBoundary(createChannel(), requests.get(0), 189 | ShardBoundary.Requirement.OVERLAPS, null); 190 | TestHelper.consumeStreamTest(iter, 3); 191 | } 192 | 193 | } 194 | -------------------------------------------------------------------------------- /src/test/java/com/google/cloud/genomics/utils/grpc/ReadUtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2014 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.google.cloud.genomics.utils.grpc; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import com.google.common.collect.Lists; 21 | import com.google.genomics.v1.CigarUnit; 22 | import com.google.genomics.v1.CigarUnit.Operation; 23 | import com.google.genomics.v1.LinearAlignment; 24 | import com.google.genomics.v1.Position; 25 | import com.google.genomics.v1.Read; 26 | import com.google.protobuf.ListValue; 27 | import com.google.protobuf.Value; 28 | 29 | import org.junit.Test; 30 | import org.junit.runner.RunWith; 31 | import org.junit.runners.JUnit4; 32 | 33 | import htsjdk.samtools.Cigar; 34 | import htsjdk.samtools.CigarElement; 35 | import htsjdk.samtools.SAMFileHeader; 36 | import htsjdk.samtools.SAMRecord; 37 | import htsjdk.samtools.SamReader; 38 | import htsjdk.samtools.SamReaderFactory; 39 | import htsjdk.samtools.TextCigarCodec; 40 | 41 | import java.io.File; 42 | import java.io.IOException; 43 | import java.util.List; 44 | 45 | @RunWith(JUnit4.class) 46 | public class ReadUtilsTest { 47 | 48 | @Test 49 | public void testGetCigarString() throws Exception { 50 | Read.Builder read = Read.newBuilder(); 51 | assertEquals(null, ReadUtils.getCigarString(read.build())); 52 | 53 | List cigar = Lists.newArrayList( 54 | CigarUnit.newBuilder().setOperation(Operation.ALIGNMENT_MATCH).setOperationLength(100L).build(), 55 | CigarUnit.newBuilder().setOperation(Operation.CLIP_SOFT).setOperationLength(3L).build()); 56 | read.setAlignment(LinearAlignment.newBuilder().addAllCigar(cigar)); 57 | assertEquals("100M3S", ReadUtils.getCigarString(read.build())); 58 | } 59 | 60 | @Test 61 | public void testGetFlags() throws Exception { 62 | Read.Builder read = Read.newBuilder(); 63 | 64 | // Read unmapped (4) 65 | assertEquals(4, ReadUtils.getFlags(read.build())); 66 | 67 | // All conditions false 68 | Position position = Position.newBuilder().setPosition(1L).build(); 69 | read.setAlignment(LinearAlignment.newBuilder().setPosition(position)).setNextMatePosition(position).build(); 70 | assertEquals(0, ReadUtils.getFlags(read.build())); 71 | 72 | // Read paired (1) + Proper Pair (2) + Read and Mate unmapped (12) + 73 | // First in pair (64) + Secondary (256) + Duplicate (1024) + Supplementary (2048) 74 | read = Read.newBuilder(); 75 | read.setNumberReads(2); 76 | read.setProperPlacement(true); 77 | read.setReadNumber(0); 78 | read.setSecondaryAlignment(true); 79 | read.setDuplicateFragment(true); 80 | read.setSupplementaryAlignment(true); 81 | 82 | assertEquals(3407, ReadUtils.getFlags(read.build())); 83 | } 84 | 85 | @Test 86 | public void testConversion() { 87 | SAMRecord record = new SAMRecord(null); 88 | record.setReferenceName("chr20"); 89 | record.setAlignmentStart(1); 90 | record.setCigarString(String.format("%dM", 10)); 91 | record.setMateReferenceName("chr20"); 92 | record.setMateAlignmentStart(100); 93 | record.setReadPairedFlag(true); 94 | record.setFirstOfPairFlag(true); 95 | record.setMateNegativeStrandFlag(true); 96 | 97 | Read read = ReadUtils.makeReadGrpc(record); 98 | assertEquals(0, read.getAlignment().getPosition().getPosition()); 99 | assertEquals(1, read.getAlignment().getCigarList().size()); 100 | assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); 101 | assertEquals(0, read.getReadNumber()); 102 | assertEquals(99, read.getNextMatePosition().getPosition()); 103 | assertEquals("chr20", read.getNextMatePosition().getReferenceName()); 104 | assertEquals(true, read.getNextMatePosition().getReverseStrand()); 105 | } 106 | @Test 107 | public void testByteArrayAttributes() { 108 | // Client code of SamRecord can pass anything to setAttribute including 109 | // byte[] (which doesn't have toString defined). This verifies 110 | // we handle that case correctly. 111 | SAMRecord record = new SAMRecord(null); 112 | record.setReferenceName("chr20"); 113 | record.setAlignmentStart(1); 114 | record.setCigarString(String.format("%dM", 10)); 115 | String s = "123456"; 116 | record.setAttribute("FZ", s.getBytes()); 117 | 118 | Read read = ReadUtils.makeReadGrpc(record); 119 | assertEquals(0, read.getAlignment().getPosition().getPosition()); 120 | assertEquals(1, read.getAlignment().getCigarList().size()); 121 | assertEquals("chr20", read.getAlignment().getPosition().getReferenceName()); 122 | assertEquals(s, read.getInfo().get("FZ").getValues(0).getStringValue()); 123 | } 124 | 125 | @Test 126 | public void SamToReadToSamTest() { 127 | String filePath = "src/test/resources/com/google/cloud/genomics/utils/conversion_test.sam"; 128 | File samInput = new File(filePath); 129 | SamReader reads = SamReaderFactory.makeDefault().open(samInput); 130 | SAMFileHeader header = reads.getFileHeader(); 131 | 132 | int numReads = 0; 133 | for (SAMRecord sam : reads){ 134 | Read read = ReadUtils.makeReadGrpc(sam); 135 | SAMRecord newSam = ReadUtils.makeSAMRecord(read, header ); 136 | final String originalSamString = sam.getSAMString(); 137 | final String postConversionString = newSam.getSAMString(); 138 | assertEquals(originalSamString, postConversionString); 139 | numReads++; 140 | } 141 | assertEquals(19, numReads);//sanity check to make sure we actually read the file 142 | } 143 | 144 | private void testGetReferenceSequenceHelper(final String seq, final String cigar, final String md, 145 | final String expectedReference) throws IOException { 146 | LinearAlignment.Builder alignment = LinearAlignment.newBuilder(); 147 | Cigar cigars = TextCigarCodec.decode(cigar); 148 | for (int i = 0; i < cigars.numCigarElements(); i++) { 149 | CigarElement c = cigars.getCigarElement(i); 150 | CigarUnit.Builder unit = CigarUnit.newBuilder().setOperationLength(c.getLength()); 151 | switch (c.getOperator()) { 152 | case M: 153 | unit.setOperation(CigarUnit.Operation.ALIGNMENT_MATCH); 154 | break; 155 | case I: 156 | unit.setOperation(CigarUnit.Operation.INSERT); 157 | break; 158 | case D: 159 | unit.setOperation(CigarUnit.Operation.DELETE); 160 | break; 161 | case N: 162 | unit.setOperation(CigarUnit.Operation.SKIP); 163 | break; 164 | case S: 165 | unit.setOperation(CigarUnit.Operation.CLIP_SOFT); 166 | break; 167 | case H: 168 | unit.setOperation(CigarUnit.Operation.CLIP_HARD); 169 | break; 170 | case P: 171 | unit.setOperation(CigarUnit.Operation.PAD); 172 | break; 173 | case EQ: 174 | unit.setOperation(CigarUnit.Operation.SEQUENCE_MATCH); 175 | break; 176 | case X: 177 | unit.setOperation(CigarUnit.Operation.SEQUENCE_MISMATCH); 178 | break; 179 | } 180 | alignment.addCigar(unit.build()); 181 | } 182 | final Read.Builder rec = Read.newBuilder() 183 | .setFragmentName("test") 184 | .setAlignedSequence(seq) 185 | .setAlignment(alignment.build()); 186 | rec.getMutableInfo().put("MD", 187 | ListValue.newBuilder().addValues(0, Value.newBuilder().setStringValue(md).build()).build()); 188 | final String refBases = ReadUtils.inferReferenceSequenceByParsingMdFlag(rec.build()); 189 | assertEquals(refBases, expectedReference); 190 | } 191 | 192 | private static final Object[][] TEST_DATA = new Object[][]{ 193 | {"ACGTACGTACGT", "2H12M", "12", "ACGTACGTACGT"}, 194 | {"ACGTACGTACGT", "4M4I4M2H", "8", "ACGT----ACGT"}, 195 | {"ACGTACGTACGT", "2S4M2I4M2S", "8", "00GTAC--ACGT00"}, 196 | {"ACGTACGTACGT", "6M2D6M2H", "4GA^TT0TG4", "ACGTGATGACGT"}, 197 | {"ACGTACGTACGT", "6M2N6M2H", "4GA0TG4", "ACGTGATGACGT"}, 198 | {"ACGTACGTACGT", "6M2N6M2H", "4GATG4", "ACGTGATGACGT"}}; 199 | 200 | @Test 201 | public void testGetReferenceSequence() throws IOException { 202 | for (Object[] o : TEST_DATA) { 203 | testGetReferenceSequenceHelper((String) o[0], 204 | (String) o[1], 205 | (String) o[2], 206 | (String) o[3]); 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/grpc/GenomicsStreamIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils.grpc; 15 | 16 | import com.google.api.client.util.BackOff; 17 | import com.google.api.client.util.ExponentialBackOff; 18 | import com.google.common.base.Predicate; 19 | import com.google.common.collect.Iterables; 20 | import com.google.common.collect.Lists; 21 | 22 | import io.grpc.ManagedChannel; 23 | 24 | import java.io.IOException; 25 | import java.util.Iterator; 26 | import java.util.List; 27 | import java.util.logging.Level; 28 | import java.util.logging.Logger; 29 | 30 | /** 31 | * An iterator for streaming genomic data via gRPC with support for retries. 32 | * 33 | * Includes complex retry logic to upon failure resume the stream at the last known good start 34 | * position without returning duplicate data. 35 | * 36 | * TODO: refactor this further to simplify the generic signature. 37 | * 38 | * @param Streaming request type. 39 | * @param Streaming response type. 40 | * @param Genomic data type returned by stream. 41 | * @param Blocking stub type. 42 | */ 43 | public abstract class GenomicsStreamIterator> 44 | implements Iterator { 45 | private static final Logger LOG = Logger.getLogger(GenomicsStreamIterator.class.getName()); 46 | 47 | protected final ManagedChannel genomicsChannel; 48 | protected final Predicate shardPredicate; 49 | protected final StubT stub; 50 | protected final RequestT originalRequest; 51 | 52 | protected ExponentialBackOff backoff; 53 | 54 | // Stateful members used to facilitate complex retry behavior for gRPC streams. 55 | private Iterator delegate; 56 | private ItemT lastSuccessfulDataItem; 57 | private String idSentinel; 58 | 59 | /** 60 | * Create a stream iterator that will filter shard data using the predicate, if supplied. 61 | * 62 | * @param channel The channel. 63 | * @param request The request for the shard of data. 64 | * @param shardPredicate A predicate used to client-side filter results returned (e.g., enforce a 65 | * shard boundary and/or limit to SNPs only) or null for no filtering. 66 | */ 67 | 68 | protected GenomicsStreamIterator(ManagedChannel channel, RequestT request, 69 | Predicate shardPredicate) { 70 | this.originalRequest = request; 71 | this.shardPredicate = shardPredicate; 72 | this.genomicsChannel = channel; 73 | stub = createStub(genomicsChannel); 74 | 75 | // Using default backoff settings. For details, see 76 | // https://developers.google.com/api-client-library/java/google-http-java-client/reference/1.19.0/com/google/api/client/util/ExponentialBackOff 77 | backoff = new ExponentialBackOff.Builder().build(); 78 | 79 | // RETRY STATE: Initialize settings. 80 | delegate = createIterator(originalRequest); 81 | lastSuccessfulDataItem = null; 82 | idSentinel = null; 83 | } 84 | 85 | abstract StubT createStub(ManagedChannel channel); 86 | 87 | abstract Iterator createIteratorFromStub(RequestT request); 88 | 89 | abstract long getRequestStart(RequestT streamRequest); 90 | 91 | abstract long getDataItemStart(ItemT dataItem); 92 | 93 | abstract String getDataItemId(ItemT dataItem); 94 | 95 | abstract RequestT getRevisedRequest(long updatedStart); 96 | 97 | abstract List getDataList(ResponseT response); 98 | 99 | abstract ResponseT buildResponse(ResponseT response, Iterable dataList); 100 | 101 | private Iterator createIterator(RequestT request) { 102 | while (true) { 103 | try { 104 | return createIteratorFromStub(request); 105 | } catch (Exception e) { 106 | if (shouldRetryNow()) { 107 | LOG.log(Level.WARNING, "Retrying after failure to create iterator", e); 108 | } else { 109 | LOG.log(Level.WARNING, "All retries to create iterator consumed, re-throwing exception", 110 | e); 111 | throw e; 112 | } 113 | } 114 | } 115 | } 116 | 117 | private boolean shouldRetryNow() { 118 | long backOffMillis; 119 | try { 120 | backOffMillis = backoff.nextBackOffMillis(); 121 | } catch (IOException e1) { 122 | // Something strange happened, just give up. 123 | backOffMillis = BackOff.STOP; 124 | } 125 | 126 | if (backOffMillis == BackOff.STOP) { 127 | backoff.reset(); 128 | return false; 129 | } 130 | 131 | try { 132 | Thread.sleep(backOffMillis); 133 | } catch (InterruptedException e) { 134 | LOG.log(Level.WARNING, "Backoff sleep interrupted", e); 135 | } 136 | return true; 137 | } 138 | 139 | /** 140 | * @see java.util.Iterator#hasNext() 141 | */ 142 | @Override 143 | public boolean hasNext() { 144 | boolean hasNext; 145 | while (true) { 146 | try { 147 | hasNext = delegate.hasNext(); 148 | break; 149 | } catch (Exception e) { 150 | if (shouldRetryNow()) { 151 | LOG.log(Level.WARNING, "Retrying after failing to get next item from stream: ", e); 152 | setStreamStateForRetry(); 153 | } else { 154 | LOG.log(Level.WARNING, "All retries to get next item from stream consumed, throwing: ", e); 155 | genomicsChannel.shutdownNow(); 156 | throw e; 157 | } 158 | } 159 | } 160 | if (!hasNext) { 161 | genomicsChannel.shutdownNow(); 162 | } 163 | return hasNext; 164 | } 165 | 166 | private void setStreamStateForRetry() { 167 | if (null == lastSuccessfulDataItem) { 168 | // We have never returned any data. No need to set up state needed to filter previously 169 | // returned results. 170 | delegate = createIterator(originalRequest); 171 | return; 172 | } 173 | 174 | if (getRequestStart(originalRequest) < getDataItemStart(lastSuccessfulDataItem)) { 175 | // Create a new iterator at the revised start position. 176 | delegate = createIterator(getRevisedRequest(getDataItemStart(lastSuccessfulDataItem))); 177 | } else { 178 | // The point at which the retry occurred was still within data overlapping the start of our 179 | // original request but not beyond it yet. 180 | delegate = createIterator(originalRequest); 181 | } 182 | 183 | // RETRY STATE: Enable the filtering of repeated data in next(). 184 | idSentinel = getDataItemId(lastSuccessfulDataItem); 185 | } 186 | 187 | /** 188 | * @see java.util.Iterator#next() 189 | */ 190 | @Override 191 | public ResponseT next() { 192 | ResponseT response = delegate.next(); 193 | // TODO: Its more clean conceptually to do the same thing for all responses, but this could be a 194 | // place where we're wasting a lot of time rebuilding response objects when nothing has actually 195 | // changed. 196 | return buildResponse(response, enforceShardPredicate(removeRepeatedData(getDataList(response)))); 197 | } 198 | 199 | private List removeRepeatedData(List dataList) { 200 | List filteredDataList = null; 201 | if (null == idSentinel) { 202 | filteredDataList = dataList; 203 | } else { 204 | // Filter out previously returned data items. 205 | filteredDataList = Lists.newArrayList(); 206 | boolean sentinelFound = false; 207 | for (ItemT dataItem : dataList) { 208 | if (sentinelFound) { 209 | filteredDataList.add(dataItem); 210 | } else { 211 | if (idSentinel.equals(getDataItemId(dataItem))) { 212 | // RETRY STATE: We're at the end of the repeated data. Unset the sentinel and proceed. 213 | idSentinel = null; 214 | sentinelFound = true; 215 | } 216 | } 217 | } 218 | } 219 | // RETRY STATE: Keep our last successfully returned data item in memory, just in case we need to 220 | // retry. 221 | if (filteredDataList.size() > 0) { 222 | lastSuccessfulDataItem = filteredDataList.get(filteredDataList.size() - 1); 223 | } 224 | return filteredDataList; 225 | } 226 | 227 | private Iterable enforceShardPredicate(Iterable dataList) { 228 | if (null == shardPredicate) { 229 | return dataList; 230 | } 231 | return Iterables.filter(dataList, shardPredicate); 232 | } 233 | 234 | /** 235 | * @see java.util.Iterator#remove() 236 | */ 237 | @Override 238 | public void remove() { 239 | delegate.remove(); 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /src/main/java/com/google/cloud/genomics/utils/GenomicsUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 Google Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 | * in compliance with the License. You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software distributed under the License 10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 | * or implied. See the License for the specific language governing permissions and limitations under 12 | * the License. 13 | */ 14 | package com.google.cloud.genomics.utils; 15 | 16 | import com.google.api.services.genomics.Genomics; 17 | import com.google.api.services.genomics.model.CallSet; 18 | import com.google.api.services.genomics.model.CoverageBucket; 19 | import com.google.api.services.genomics.model.ListCoverageBucketsResponse; 20 | import com.google.api.services.genomics.model.ReadGroupSet; 21 | import com.google.api.services.genomics.model.Reference; 22 | import com.google.api.services.genomics.model.ReferenceBound; 23 | import com.google.api.services.genomics.model.SearchCallSetsRequest; 24 | import com.google.api.services.genomics.model.SearchReadGroupSetsRequest; 25 | import com.google.api.services.genomics.model.SearchReferencesRequest; 26 | import com.google.api.services.genomics.model.SearchVariantSetsRequest; 27 | import com.google.api.services.genomics.model.VariantSet; 28 | import com.google.common.base.Strings; 29 | import com.google.common.collect.Lists; 30 | 31 | import java.io.IOException; 32 | import java.util.List; 33 | 34 | /** 35 | * Convenience routines for fetching ids in the hierarchy of data within the Genomics API and other data lookups. 36 | */ 37 | public class GenomicsUtils { 38 | 39 | /** 40 | * Gets ReadGroupSetIds from a given datasetId using the Genomics API. 41 | * 42 | * @param datasetId The id of the dataset to query. 43 | * @param auth The OfflineAuth for the API request. 44 | * @return The list of readGroupSetIds in the dataset. 45 | * @throws IOException If dataset does not contain any readGroupSets. 46 | */ 47 | public static List getReadGroupSetIds(String datasetId, OfflineAuth auth) throws IOException { 48 | List output = Lists.newArrayList(); 49 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 50 | Iterable rgs = Paginator.ReadGroupSets.create(genomics) 51 | .search(new SearchReadGroupSetsRequest().setDatasetIds(Lists.newArrayList(datasetId)), 52 | "readGroupSets(id),nextPageToken"); 53 | for (ReadGroupSet r : rgs) { 54 | output.add(r.getId()); 55 | } 56 | if (output.isEmpty()) { 57 | throw new IOException("Dataset " + datasetId + " does not contain any ReadGroupSets"); 58 | } 59 | return output; 60 | } 61 | 62 | /** 63 | * Gets the ReferenceSetId for a given readGroupSetId using the Genomics API. 64 | * 65 | * @param readGroupSetId The id of the readGroupSet to query. 66 | * @param auth The OfflineAuth for the API request. 67 | * @return The referenceSetId for the redGroupSet (which may be null). 68 | * @throws IOException 69 | */ 70 | public static String getReferenceSetId(String readGroupSetId, OfflineAuth auth) 71 | throws IOException { 72 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 73 | ReadGroupSet readGroupSet = genomics.readgroupsets().get(readGroupSetId) 74 | .setFields("referenceSetId").execute(); 75 | return readGroupSet.getReferenceSetId(); 76 | } 77 | 78 | /** 79 | * Gets the CoverageBuckets for a given readGroupSetId using the Genomics API. 80 | * 81 | * @param readGroupSetId The id of the readGroupSet to query. 82 | * @param auth The OfflineAuth for the API request. 83 | * @return The list of reference bounds in the variantSet. 84 | * @throws IOException 85 | */ 86 | public static List getCoverageBuckets(String readGroupSetId, OfflineAuth auth) 87 | throws IOException { 88 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 89 | ListCoverageBucketsResponse response = 90 | genomics.readgroupsets().coveragebuckets().list(readGroupSetId).execute(); 91 | // Requests of this form return one result per reference name, so therefore many fewer than 92 | // the default page size, but verify that the assumption holds true. 93 | if (!Strings.isNullOrEmpty(response.getNextPageToken())) { 94 | throw new IllegalArgumentException("Read group set " + readGroupSetId 95 | + " has more Coverage Buckets than the default page size for the CoverageBuckets list operation."); 96 | } 97 | return response.getCoverageBuckets(); 98 | } 99 | 100 | 101 | /** 102 | * Gets the references for a given referenceSetId using the Genomics API. 103 | * 104 | * @param referenceSetId The id of the referenceSet to query. 105 | * @param auth The OfflineAuth for the API request. 106 | * @return The list of references in the referenceSet. 107 | * @throws IOException 108 | */ 109 | public static Iterable getReferences(String referenceSetId, OfflineAuth auth) 110 | throws IOException { 111 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 112 | return Paginator.References.create( 113 | genomics).search(new SearchReferencesRequest().setReferenceSetId(referenceSetId)); 114 | } 115 | 116 | /** 117 | * Gets VariantSetIds from a given datasetId using the Genomics API. 118 | * 119 | * @param datasetId The id of the dataset to query. 120 | * @param auth The OfflineAuth for the API request. 121 | * @return The list of variantSetIds in the dataset. 122 | * @throws IOException If dataset does not contain any variantSets. 123 | */ 124 | public static List getVariantSetIds(String datasetId, OfflineAuth auth) 125 | throws IOException { 126 | List output = Lists.newArrayList(); 127 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 128 | Iterable vs = Paginator.Variantsets.create(genomics) 129 | .search(new SearchVariantSetsRequest().setDatasetIds(Lists.newArrayList(datasetId)), 130 | "variantSets(id),nextPageToken"); 131 | for (VariantSet v : vs) { 132 | output.add(v.getId()); 133 | } 134 | if (output.isEmpty()) { 135 | throw new IOException("Dataset " + datasetId + " does not contain any VariantSets"); 136 | } 137 | return output; 138 | } 139 | 140 | /** 141 | * Gets CallSets for a given variantSetId using the Genomics API. 142 | * 143 | * @param variantSetId The id of the variantSet to query. 144 | * @param auth The OfflineAuth for the API request. 145 | * @return The list of callSet names in the variantSet. 146 | * @throws IOException If variantSet does not contain any CallSets. 147 | */ 148 | public static Iterable getCallSets(String variantSetId, OfflineAuth auth) 149 | throws IOException { 150 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 151 | return Paginator.Callsets.create(genomics) 152 | .search(new SearchCallSetsRequest().setVariantSetIds(Lists.newArrayList(variantSetId)), 153 | "callSets,nextPageToken"); 154 | } 155 | 156 | /** 157 | * Gets CallSets Names for a given variantSetId using the Genomics API. 158 | * 159 | * @param variantSetId The id of the variantSet to query. 160 | * @param auth The OfflineAuth for the API request. 161 | * @return The list of callSet names in the variantSet. 162 | * @throws IOException If variantSet does not contain any CallSets. 163 | */ 164 | public static List getCallSetsNames(String variantSetId, OfflineAuth auth) 165 | throws IOException { 166 | List output = Lists.newArrayList(); 167 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 168 | Iterable cs = Paginator.Callsets.create(genomics) 169 | .search(new SearchCallSetsRequest().setVariantSetIds(Lists.newArrayList(variantSetId)), 170 | "callSets(name),nextPageToken"); 171 | for (CallSet c : cs) { 172 | output.add(c.getName()); 173 | } 174 | if (output.isEmpty()) { 175 | throw new IOException("VariantSet " + variantSetId + " does not contain any CallSets"); 176 | } 177 | return output; 178 | } 179 | 180 | /** 181 | * Gets the ReferenceBounds for a given variantSetId using the Genomics API. 182 | * 183 | * @param variantSetId The id of the variantSet to query. 184 | * @param auth The OfflineAuth for the API request. 185 | * @return The list of reference bounds in the variantSet. 186 | * @throws IOException 187 | */ 188 | public static List getReferenceBounds(String variantSetId, OfflineAuth auth) 189 | throws IOException { 190 | Genomics genomics = GenomicsFactory.builder().build().fromOfflineAuth(auth); 191 | VariantSet variantSet = genomics.variantsets().get(variantSetId).execute(); 192 | return variantSet.getReferenceBounds(); 193 | } 194 | } 195 | --------------------------------------------------------------------------------