├── .gitallowed
├── .gitignore
├── cloud-examples
└── src
│ ├── test
│ ├── scala
│ │ ├── com
│ │ │ └── cloudera
│ │ │ │ └── spark
│ │ │ │ └── cloud
│ │ │ │ ├── s3
│ │ │ │ ├── S3ASeekReadNormalIOSuite.scala
│ │ │ │ ├── TestParquetBinding.scala
│ │ │ │ ├── S3ASeekReadRandomIOSuite.scala
│ │ │ │ ├── commit
│ │ │ │ │ ├── AbstractS3ACommitterSuite.scala
│ │ │ │ │ ├── S3ACommitterFactorySuite.scala
│ │ │ │ │ ├── Events.scala
│ │ │ │ │ └── S3ACommitDataframeSuite.scala
│ │ │ │ ├── S3ABasicIOSuite.scala
│ │ │ │ ├── S3ANumbersSuite.scala
│ │ │ │ ├── S3AStreamingSuite.scala
│ │ │ │ ├── S3ASeekReadSequentialIOSuite.scala
│ │ │ │ ├── S3ADataFrameSuite.scala
│ │ │ │ ├── S3ALineCountSuite.scala
│ │ │ │ ├── S3ANumbersSuiteV2APISuite.scala
│ │ │ │ ├── S3AFileGeneratorSuite.scala
│ │ │ │ ├── S3DependencyCheckSuite.scala
│ │ │ │ ├── S3ACSVReadSuite.scala
│ │ │ │ ├── S3AEncryptionSuite.scala
│ │ │ │ └── S3ALineCountWritebackSuite.scala
│ │ │ │ ├── gs
│ │ │ │ ├── GsDataFrameSuite.scala
│ │ │ │ ├── GsCSVReadSuite.scala
│ │ │ │ ├── GsBasicIOSuite.scala
│ │ │ │ ├── GsCommitDataframeSuite.scala
│ │ │ │ ├── AbstractGsCommitterSuite.scala
│ │ │ │ └── GSDependencyCheckSuite.scala
│ │ │ │ ├── abfs
│ │ │ │ ├── AbfsBasicIOSuite.scala
│ │ │ │ ├── AbfsDataFrameSuite.scala
│ │ │ │ ├── AbfsCSVReadSuite.scala
│ │ │ │ └── commit
│ │ │ │ │ ├── AbfsCommitDataframeSuite.scala
│ │ │ │ │ └── AbstractAbfsCommitterSuite.scala
│ │ │ │ ├── azure
│ │ │ │ ├── AzureBasicIOSuite.scala
│ │ │ │ ├── AzureStreamingSuite.scala
│ │ │ │ ├── AzureCSVReadSuite.scala
│ │ │ │ ├── AzureSeekReadSuite.scala
│ │ │ │ ├── AzureFileGeneratorSuite.scala
│ │ │ │ ├── AzureLineCountSuite.scala
│ │ │ │ └── AzureDataFrameSuite.scala
│ │ │ │ ├── csv
│ │ │ │ ├── LocalHugeCsvIOSuite.scala
│ │ │ │ └── AbfsHugeCsvIOSuite.scala
│ │ │ │ ├── common
│ │ │ │ ├── StreamingTests.scala
│ │ │ │ ├── CloudSuiteWithCSVDatasource.scala
│ │ │ │ ├── HadoopVersionSuite.scala
│ │ │ │ ├── ReadSample.scala
│ │ │ │ ├── DataFrameTests.scala
│ │ │ │ ├── FileGeneratorTests.scala
│ │ │ │ └── SeekReadTests.scala
│ │ │ │ ├── examples
│ │ │ │ └── S3DataFrameExampleSuite.scala
│ │ │ │ └── committers
│ │ │ │ └── AbstractCommitterSuite.scala
│ │ └── org
│ │ │ └── apache
│ │ │ └── spark
│ │ │ ├── sql
│ │ │ ├── hive
│ │ │ │ └── orc
│ │ │ │ │ ├── gs
│ │ │ │ │ ├── GsParquetPartitionSuite.scala
│ │ │ │ │ ├── GsOrcRelationSuite.scala
│ │ │ │ │ ├── GsParquetRelationSuite.scala
│ │ │ │ │ ├── GsOrcPartitionSuite.scala
│ │ │ │ │ └── GsParquetRelationScaleSuite.scala
│ │ │ │ │ ├── abfs
│ │ │ │ │ ├── AbfsParquetPartitionSuite.scala
│ │ │ │ │ ├── AbfsOrcRelationSuite.scala
│ │ │ │ │ ├── AbfsOrcPartitionSuite.scala
│ │ │ │ │ ├── AbfsParquetRelationSuite.scala
│ │ │ │ │ └── AbfsParquetRelationScaleSuite.scala
│ │ │ │ │ └── cloud
│ │ │ │ │ ├── S3AOrcRelationSuite.scala
│ │ │ │ │ ├── S3AOrcPartitionSuite.scala
│ │ │ │ │ ├── S3AParquetPartitionSuite.scala
│ │ │ │ │ ├── S3AParquetRelationSuite.scala
│ │ │ │ │ ├── S3AParquetRelationScaleSuite.scala
│ │ │ │ │ └── S3AOrcRelationScaleSuite.scala
│ │ │ └── sources
│ │ │ │ ├── MustDeclareDatasource.scala
│ │ │ │ ├── ParquetRelationTrait.scala
│ │ │ │ ├── AbtractOrcRelationSuite.scala
│ │ │ │ ├── CloudPartitionTest.scala
│ │ │ │ └── HiveTestTrait.scala
│ │ │ └── SparkScopeWorkarounds.scala
│ └── resources
│ │ ├── core-site.xml
│ │ └── log4j2.properties
│ └── main
│ ├── scala
│ ├── com
│ │ └── cloudera
│ │ │ └── spark
│ │ │ └── cloud
│ │ │ ├── s3
│ │ │ ├── NormalIOPolicy.scala
│ │ │ ├── SequentialIOPolicy.scala
│ │ │ ├── IOPolicy.scala
│ │ │ ├── RandomIOPolicy.scala
│ │ │ ├── S3AFileGenerator.scala
│ │ │ ├── S3ALineCount.scala
│ │ │ ├── S3AStreaming.scala
│ │ │ ├── S3ADataFrames.scala
│ │ │ ├── S3AExampleSetup.scala
│ │ │ └── S3ATestSetup.scala
│ │ │ ├── utils
│ │ │ ├── Demo.scala
│ │ │ ├── ForceRecentHadoopVersion.scala
│ │ │ ├── ExtraAssertions.scala
│ │ │ └── StatisticsTracker.scala
│ │ │ ├── adl
│ │ │ └── AdlTestSetup.scala
│ │ │ ├── azure
│ │ │ └── AzureTestSetup.scala
│ │ │ ├── gs
│ │ │ └── GsTestSetup.scala
│ │ │ ├── abfs
│ │ │ └── AbfsTestSetup.scala
│ │ │ ├── local
│ │ │ └── LocalTestSetup.scala
│ │ │ ├── common
│ │ │ ├── CsvDatasourceSupport.scala
│ │ │ ├── ContextFreeCloudSuite.scala
│ │ │ ├── StoreTestHelper.scala
│ │ │ ├── StoreTestOperations.scala
│ │ │ └── CloudSuite.scala
│ │ │ └── examples
│ │ │ └── AzureStreamingExample.scala
│ └── org
│ │ └── apache
│ │ └── hadoop
│ │ └── fs
│ │ └── FSHelper.scala
│ ├── site
│ └── using.md
│ └── resources
│ └── log4j.properties
├── spark-cloud-integration
└── src
│ └── main
│ ├── scala
│ ├── org
│ │ └── apache
│ │ │ └── spark
│ │ │ └── cloudera
│ │ │ ├── package.scala
│ │ │ └── statistics
│ │ │ ├── IOStatisticsAccumulator.scala
│ │ │ └── IOStatisticsCollectorExecutorPlugin.scala
│ └── com
│ │ └── cloudera
│ │ └── spark
│ │ └── cloud
│ │ ├── test
│ │ └── UnitTestSuite.scala
│ │ ├── s3
│ │ └── audit
│ │ │ ├── ServerLogEntry.scala
│ │ │ └── LogParser.scala
│ │ ├── utils
│ │ ├── IntegrationUtils.scala
│ │ ├── ConfigSerDeser.scala
│ │ └── HConf.scala
│ │ ├── CommitterInfo.scala
│ │ └── CommitterBinding.scala
│ └── site
│ └── markdown
│ └── integration.md
├── README.md
└── .travis.yml
/.gitallowed:
--------------------------------------------------------------------------------
1 | # serialization
2 | \-[0-9]+L
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | pom.xml.tag
3 | pom.xml.releaseBackup
4 | pom.xml.versionsBackup
5 | pom.xml.next
6 | release.properties
7 | dependency-reduced-pom.xml
8 | buildNumber.properties
9 | .mvn/timing.properties
10 | cloud.xml
11 | cloud-examples/metastore_db
12 | cloud-examples/derby.log
13 | cloud-examples/spark-warehouse
14 | cloud-examples/src/scripts
15 | spark-snapshot
16 | *.iws
17 | *.ipr
18 |
19 | /cloud-examples/build.properties
20 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ASeekReadNormalIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | class S3ASeekReadNormalIOSuite extends S3ASeekReadSequentialIOSuite {
21 |
22 | override def inputPolicy: String = NORMAL_IO
23 | }
24 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/NormalIOPolicy.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | trait NormalIOPolicy extends IOPolicy {
21 |
22 | /**
23 | * Use original sequential IO
24 | *
25 | * @return the IO type
26 | */
27 | override def inputPolicy: String = NORMAL_IO
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/TestParquetBinding.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.test.UnitTestSuite
21 |
22 | /**
23 | * Look at what Parquet committer binding is up to
24 | */
25 | class TestParquetBinding extends UnitTestSuite {
26 |
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/SequentialIOPolicy.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | trait SequentialIOPolicy extends IOPolicy {
21 |
22 | /**
23 | * Use original sequential IO
24 | *
25 | * @return the IO type
26 | */
27 | override def inputPolicy: String = SEQUENTIAL_IO
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ASeekReadRandomIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | /**
21 | * Subclass of `S3aSeekReadSuite` with random IO turned on.
22 | */
23 | class S3ASeekReadRandomIOSuite extends S3ASeekReadSequentialIOSuite {
24 |
25 | override def inputPolicy: String = RANDOM_IO
26 | }
27 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/IOPolicy.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | /**
21 | * IO Policy to support.
22 | */
23 | trait IOPolicy extends S3AConstants {
24 |
25 | /**
26 | * What input policy to request
27 | *
28 | * @return the IO type
29 | */
30 | def inputPolicy: String
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/gs/GsParquetPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.gs
19 |
20 | import org.apache.spark.sql.sources.ParquetRelationTrait
21 |
22 | /**
23 | * Partitioned queries with ORC data against ABFS.
24 | */
25 | class GsParquetPartitionSuite extends GsOrcPartitionSuite with
26 | ParquetRelationTrait {
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/RandomIOPolicy.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | /**
21 | * Switch to Random S3A IO,
22 | */
23 | trait RandomIOPolicy extends IOPolicy {
24 |
25 | /**
26 | * Use Random IO for high performance ORC
27 | *
28 | * @return the IO type
29 | */
30 | override def inputPolicy: String = RANDOM_IO
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/commit/AbstractS3ACommitterSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.commit
19 |
20 | import com.cloudera.spark.cloud.committers.AbstractCommitterSuite
21 | import com.cloudera.spark.cloud.s3.S3ATestSetup
22 |
23 | abstract class AbstractS3ACommitterSuite
24 | extends AbstractCommitterSuite with S3ATestSetup {
25 |
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/org/apache/spark/cloudera/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 |
19 | package org.apache.spark
20 |
21 | /**
22 | * Package to put things which need to get at Spark Private structures.
23 | *
24 | * These have to be viewed as unstable; if something breaks due to a spark
25 | * change, that has to be accepted as inevitable.
26 | */
27 | package object cloudera {
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/abfs/AbfsParquetPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.abfs
19 |
20 | import org.apache.spark.sql.sources.ParquetRelationTrait
21 |
22 | /**
23 | * Partitioned queries with ORC data against ABFS.
24 | */
25 | class AbfsParquetPartitionSuite extends AbfsOrcPartitionSuite with
26 | ParquetRelationTrait {
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3AFileGenerator.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.operations.CloudFileGenerator
21 |
22 | /**
23 | * Generate a file containing some numbers in the remote repository.
24 | */
25 | object S3AFileGenerator extends CloudFileGenerator with S3AExampleSetup
26 | with SequentialIOPolicy {
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/utils/Demo.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import com.github.lalyos.jfiglet.FigletFont
21 |
22 | object Demo {
23 |
24 | /**
25 | * Uses figlet to render to a string.
26 | * see: https://github.com/lalyos/jfiglet
27 | */
28 | def text(m: String): String = {
29 | "\n" + FigletFont.convertOneLine(m)
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/GsDataFrameSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.common.DataFrameTests
21 |
22 | /**
23 | * Test GS and DataFrames.
24 | */
25 | class GsDataFrameSuite extends DataFrameTests with GsTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/abfs/AbfsBasicIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs
19 |
20 | import com.cloudera.spark.cloud.common.BasicIOTests
21 |
22 | /**
23 | * Azure's basic IO operations.
24 | */
25 | class AbfsBasicIOSuite extends BasicIOTests with AbfsTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureBasicIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.BasicIOTests
21 |
22 | /**
23 | * Azure's basic IO operations.
24 | */
25 | class AzureBasicIOSuite extends BasicIOTests with AzureTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/abfs/AbfsDataFrameSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs
19 |
20 | import com.cloudera.spark.cloud.common.DataFrameTests
21 |
22 | /**
23 | * Test Azure and DataFrames.
24 | */
25 | class AbfsDataFrameSuite extends DataFrameTests with AbfsTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/GsCSVReadSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.common.CSVReadTests
21 |
22 | class GsCSVReadSuite extends CSVReadTests with GsTestSetup {
23 | init()
24 |
25 | /**
26 | * set up FS if enabled.
27 | */
28 | def init(): Unit = {
29 | if (enabled) {
30 | initFS()
31 | initDatasources()
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureStreamingSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.StreamingTests
21 |
22 | /**
23 | * Test Streaming under Azure.
24 | */
25 | class AzureStreamingSuite extends StreamingTests with AzureTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/abfs/AbfsCSVReadSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs
19 |
20 | import com.cloudera.spark.cloud.common.CSVReadTests
21 |
22 | class AbfsCSVReadSuite extends CSVReadTests with AbfsTestSetup {
23 | init()
24 |
25 | /**
26 | * set up FS if enabled.
27 | */
28 | def init(): Unit = {
29 | if (enabled) {
30 | initFS()
31 | initDatasources()
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureCSVReadSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.CSVReadTests
21 |
22 | class AzureCSVReadSuite extends CSVReadTests with AzureTestSetup {
23 | init()
24 |
25 | /**
26 | * set up FS if enabled.
27 | */
28 | def init(): Unit = {
29 | if (enabled) {
30 | initFS()
31 | initDatasources()
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureSeekReadSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.SeekReadTests
21 |
22 | class AzureSeekReadSuite extends SeekReadTests with AzureTestSetup {
23 | init()
24 |
25 | /**
26 | * set up FS if enabled.
27 | */
28 | def init(): Unit = {
29 | if (enabled) {
30 | initFS()
31 | initDatasources()
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ABasicIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.BasicIOTests
21 |
22 | /**
23 | * Basic S3A IO Tests.
24 | */
25 | class S3ABasicIOSuite extends BasicIOTests with S3ATestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | // propagate S3 credentials
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/GsBasicIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.common.BasicIOTests
21 |
22 | /**
23 | * GS's basic IO operations.
24 | */
25 | class GsBasicIOSuite extends BasicIOTests with GsTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | } else {
33 | log.info("suite is not enabled")
34 | }
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/test/UnitTestSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.test
19 |
20 | import org.scalatest.funsuite.AnyFunSuite
21 | import org.scalatest.matchers.must.Matchers
22 |
23 |
24 | import org.apache.spark.internal.Logging
25 |
26 | /**
27 | * Base class for test suites.
28 | * Added because scalatest imports are too brittle to use.
29 | */
30 | class UnitTestSuite extends AnyFunSuite with Logging with Matchers {
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ANumbersSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.NumbersRddTests
21 |
22 | class S3ANumbersSuite extends NumbersRddTests with S3ATestSetup {
23 | init()
24 |
25 | def init(): Unit = {
26 | // propagate S3 credentials
27 | if (enabled) {
28 | initFS()
29 | }
30 | }
31 |
32 | override protected def pathname = {
33 | "s3a_numbers_suite"
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/gs/GsOrcRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.gs
19 |
20 | import com.cloudera.spark.cloud.gs.GsTestSetup
21 |
22 | import org.apache.spark.sql.sources.AbtractOrcRelationSuite
23 |
24 | class GsOrcRelationSuite extends AbtractOrcRelationSuite with GsTestSetup {
25 |
26 | init()
27 |
28 | def init(): Unit = {
29 | // propagate credentials
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/csv/LocalHugeCsvIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.csv
19 |
20 | import com.cloudera.spark.cloud.local.LocalTestSetup
21 |
22 | /**
23 | * local csv tests to act as a baseline for performance/correctness.
24 | * always runs.
25 | */
26 | class LocalHugeCsvIOSuite extends AbstractHugeCsvIOSuite with LocalTestSetup {
27 |
28 | init()
29 |
30 | /**
31 | * set up FS.
32 | */
33 | def init(): Unit = {
34 | initFS()
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/abfs/AbfsOrcRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.abfs
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 |
22 | import org.apache.spark.sql.sources.AbtractOrcRelationSuite
23 |
24 | class AbfsOrcRelationSuite extends AbtractOrcRelationSuite with AbfsTestSetup {
25 |
26 | init()
27 |
28 | def init(): Unit = {
29 | // propagate credentials
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/SparkScopeWorkarounds.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark
19 |
20 | import org.apache.spark.sql.hive.HiveUtils
21 |
22 | /**
23 | * Here to get at useful stuff that Spark keeps private but which turn out be
24 | * invaluable during testing.
25 | *
26 | * Needless to say: things may break here without warning or redress.
27 | */
28 | object SparkScopeWorkarounds {
29 | def tempHiveConfig(): Map[String, String] = {
30 | HiveUtils.newTemporaryConfiguration(true)
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/GsCommitDataframeSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.committers.AbstractCommitDataframeSuite
21 |
22 | class GsCommitDataframeSuite
23 | extends AbstractCommitDataframeSuite with GsTestSetup {
24 |
25 | init()
26 |
27 | def init(): Unit = {
28 | if (enabled) {
29 | initFS()
30 | }
31 | }
32 |
33 | override def committers: Seq[String] = Seq("manifest")
34 |
35 | override def schema: String = "gs"
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AOrcRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 |
21 | import com.cloudera.spark.cloud.s3.S3ATestSetup
22 |
23 | import org.apache.spark.sql.sources.AbtractOrcRelationSuite
24 |
25 | class S3AOrcRelationSuite extends AbtractOrcRelationSuite with S3ATestSetup {
26 |
27 |
28 |
29 | init()
30 |
31 | def init(): Unit = {
32 | // propagate S3 credentials
33 | if (enabled) {
34 | initFS()
35 | }
36 | }
37 |
38 |
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureFileGeneratorSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.FileGeneratorTests
21 |
22 | /**
23 | * Test the `FileGenerator` entry point under Azure.
24 | */
25 | class AzureFileGeneratorSuite extends FileGeneratorTests with AzureTestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | after {
36 | cleanFilesystemInTeardown()
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AOrcPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 | import com.cloudera.spark.cloud.s3.S3ATestSetup
21 |
22 | import org.apache.spark.sql.sources.CloudPartitionTest
23 |
24 | class S3AOrcPartitionSuite extends CloudPartitionTest with S3ATestSetup {
25 |
26 | init()
27 |
28 | def init(): Unit = {
29 | // propagate S3 credentials
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | override val dataSourceName: String = "orc"
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AParquetPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 | import com.cloudera.spark.cloud.s3.S3ATestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudPartitionTest, ParquetRelationTrait}
23 |
24 | class S3AParquetPartitionSuite extends CloudPartitionTest with S3ATestSetup
25 | with ParquetRelationTrait {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | // propagate S3 credentials
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/gs/GsParquetRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.gs
19 |
20 | import com.cloudera.spark.cloud.gs.GsTestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationBasicSuite, ParquetRelationTrait}
23 |
24 | class GsParquetRelationSuite extends CloudRelationBasicSuite
25 | with GsTestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AParquetRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 | import com.cloudera.spark.cloud.s3.S3ATestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationBasicSuite, ParquetRelationTrait}
23 |
24 | class S3AParquetRelationSuite extends CloudRelationBasicSuite
25 | with S3ATestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate S3 credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3AStreamingSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.StreamingTests
21 | import com.cloudera.spark.cloud.operations.CloudStreaming
22 |
23 | /**
24 | * Test Streaming against S3A.
25 | */
26 | class S3AStreamingSuite extends StreamingTests with S3ATestSetup {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate S3 credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | override protected val instance: CloudStreaming = S3AStreaming
38 | }
39 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/gs/GsOrcPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.gs
19 |
20 | import com.cloudera.spark.cloud.gs.GsTestSetup
21 |
22 | import org.apache.spark.sql.sources.CloudPartitionTest
23 |
24 | /**
25 | * Partitioned queries with ORC data against GS.
26 | */
27 | class GsOrcPartitionSuite extends CloudPartitionTest with GsTestSetup {
28 |
29 | init()
30 |
31 | def init(): Unit = {
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | override def dataSourceName(): String = {
38 | "orc"
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ASeekReadSequentialIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.SeekReadTests
21 |
22 | /**
23 | * Tests reading in the S3A CSV file using sequential and Random IO.
24 | */
25 | class S3ASeekReadSequentialIOSuite extends SeekReadTests with S3ATestSetup
26 | with SequentialIOPolicy {
27 |
28 | init()
29 |
30 | /**
31 | * set up FS if enabled.
32 | */
33 | def init(): Unit = {
34 | if (enabled) {
35 | initFS()
36 | initDatasources()
37 | }
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/sources/MustDeclareDatasource.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.sources
19 |
20 | import org.apache.spark.sql.types.DataType
21 |
22 | /**
23 | * Subclasses must declare their datasource.
24 | */
25 | trait MustDeclareDatasource {
26 | /**
27 | * Name of the data source: this must be declared.
28 | */
29 | def dataSourceName(): String;
30 |
31 |
32 | /**
33 | * Datatype mapping.
34 | *
35 | * @param dataType type
36 | * @return true of supported
37 | */
38 | def supportsDataType(
39 | dataType: DataType): Boolean;
40 | }
41 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/site/markdown/integration.md:
--------------------------------------------------------------------------------
1 |
14 |
15 | # Integrating the Apache Hadoop S3A Committers with Apache Spark
16 |
17 | This document looks at the whole issue of "how to integrate the Hadoop S3A Committers"
18 | with Apache Spark —it is intended to apply to any custom `PathOutputCommitter`
19 | implementation.
20 |
21 |
22 | ## Background: Hadoop
23 |
24 | Hadoop has two MapReduce APIs, MRv1 and MRv2 (not to be distnguished from the v1/v2 commit
25 | algorithms.) MRv1 classes are found under the packages `org.apache.hadoop.mapred`;
26 | the MRv2 classes under `org.apache.hadoop.mapreduce`. This is important, as
27 | they often share classnames.
28 |
29 |
30 |
31 | The "original" V1 API shipped in Hadoop 1. The newer v2 API came in Hadoop 2.
32 | In Spark's `RDD.saveAsTextFile()` uses the MRv2 APIs to write data.
33 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ADataFrameSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.DataFrameTests
21 | import com.cloudera.spark.cloud.operations.CloudDataFrames
22 |
23 | /**
24 | * Test the [S3DataFrames] logic.
25 | */
26 | class S3ADataFrameSuite extends DataFrameTests with S3ATestSetup {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate S3 credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | override protected val instance: CloudDataFrames = S3ADataFrames
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/abfs/AbfsOrcPartitionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.abfs
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 |
22 | import org.apache.spark.sql.sources.CloudPartitionTest
23 |
24 | /**
25 | * Partitioned queries with ORC data against ABFS.
26 | */
27 | class AbfsOrcPartitionSuite extends CloudPartitionTest with AbfsTestSetup {
28 |
29 | init()
30 |
31 | def init(): Unit = {
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | override def dataSourceName(): String = {
38 | "orc"
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/gs/GsParquetRelationScaleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.gs
19 |
20 | import com.cloudera.spark.cloud.gs.GsTestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationScaleTest, ParquetRelationTrait}
23 |
24 | class GsParquetRelationScaleSuite extends CloudRelationScaleTest
25 | with GsTestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | override def enabled: Boolean = super.enabled && isScaleTestEnabled
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/resources/core-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
20 |
24 |
25 |
26 |
27 | fs.s3a.committer.name
28 | directory
29 |
30 | Committer to create for output to S3A, one of:
31 | "file", "directory", "partitioned", "magic".
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/abfs/AbfsParquetRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.abfs
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationBasicSuite, ParquetRelationTrait}
23 |
24 | class AbfsParquetRelationSuite extends CloudRelationBasicSuite
25 | with AbfsTestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 | override def dynamicPartitioning: Boolean = true;
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AParquetRelationScaleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 | import com.cloudera.spark.cloud.s3.S3ATestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationScaleTest, ParquetRelationTrait}
23 |
24 | class S3AParquetRelationScaleSuite extends CloudRelationScaleTest
25 | with S3ATestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | override def enabled: Boolean = super.enabled && isScaleTestEnabled
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/sources/ParquetRelationTrait.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.sources
19 |
20 | import org.apache.spark.sql.types.{CalendarIntervalType, DataType, NullType}
21 |
22 |
23 |
24 | trait ParquetRelationTrait extends MustDeclareDatasource {
25 | // Parquet does not play well with NullType.
26 | override def supportsDataType(
27 | dataType: DataType): Boolean = dataType match {
28 | case _: NullType => false
29 | case _: CalendarIntervalType => false
30 | case _ => true
31 | }
32 |
33 | override def dataSourceName(): String = {
34 | "parquet"
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/abfs/commit/AbfsCommitDataframeSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs.commit
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 | import com.cloudera.spark.cloud.committers.AbstractCommitDataframeSuite
22 |
23 | private class AbfsCommitDataframeSuite extends AbstractCommitDataframeSuite
24 | with AbfsTestSetup {
25 |
26 | init()
27 |
28 | def init(): Unit = {
29 | if (enabled) {
30 | initFS()
31 | }
32 | }
33 |
34 |
35 | override def committers: Seq[String] = Seq("manifest")
36 |
37 |
38 | override def schema: String = "abfs"
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/abfs/AbfsParquetRelationScaleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.abfs
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 |
22 | import org.apache.spark.sql.sources.{CloudRelationScaleTest, ParquetRelationTrait}
23 |
24 | class AbfsParquetRelationScaleSuite extends CloudRelationScaleTest
25 | with AbfsTestSetup
26 | with ParquetRelationTrait {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | override def enabled: Boolean = super.enabled && isScaleTestEnabled
37 |
38 |
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/org/apache/hadoop/fs/FSHelper.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.hadoop.fs
19 |
20 | import java.io.IOException
21 | import java.net.URI
22 |
23 | import org.apache.hadoop.conf.Configuration
24 |
25 | /**
26 | * Help with testing by accessing package-private methods in FileSystem which
27 | * are designed for aiding testability. They are normally accessed via
28 | * `FileSystemTestHelper`, but as that is in hadoop-common-test JAR, a simple
29 | * object here avoids maven import conflict problems.
30 | */
31 | object FSHelper {
32 |
33 | @throws[IOException]
34 | def addFileSystemForTesting(uri: URI, conf: Configuration, fs: FileSystem) {
35 | FileSystem.addFileSystemForTesting(uri, conf, fs)
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/hive/orc/cloud/S3AOrcRelationScaleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.orc.cloud
19 |
20 | import com.cloudera.spark.cloud.s3.S3ATestSetup
21 |
22 | import org.apache.spark.sql.hive.orc.OrcFileFormat
23 | import org.apache.spark.sql.sources.CloudRelationScaleTest
24 |
25 | class S3AOrcRelationScaleSuite extends CloudRelationScaleTest with S3ATestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | // propagate S3 credentials
31 | if (enabled) {
32 | initFS()
33 | }
34 | }
35 |
36 | override def enabled: Boolean = super.enabled && isScaleTestEnabled
37 |
38 | override val dataSourceName: String = classOf[OrcFileFormat].getCanonicalName
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/adl/AdlTestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.adl
19 |
20 | import java.net.URI
21 |
22 | import com.cloudera.spark.cloud.common.CopyCsvFileTrait
23 | import org.apache.hadoop.fs.FileSystem
24 |
25 | /**
26 | * Trait for ADL tests.
27 | *
28 | * This trait supports CSV data source by copying over the data from S3A if
29 | * it isn't already in a ADL URL
30 | */
31 | trait AdlTestSetup extends CopyCsvFileTrait {
32 |
33 | override def enabled: Boolean = {
34 | getConf.getBoolean(ADL_TESTS_ENABLED, false)
35 | }
36 |
37 | def initFS(): FileSystem = {
38 | val uri = new URI(requiredOption(ADL_TEST_URI))
39 | logDebug(s"Executing Azure tests against $uri")
40 | createFilesystem(uri)
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/azure/AzureTestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import java.net.URI
21 |
22 | import com.cloudera.spark.cloud.common.CopyCsvFileTrait
23 | import org.apache.hadoop.fs.FileSystem
24 |
25 | /**
26 | * Trait for Azure ADL tests.
27 | *
28 | * This trait supports CSV data source by copying over the data from S3A if
29 | * it isn't already in a WASB URL
30 | */
31 | trait AzureTestSetup extends CopyCsvFileTrait {
32 |
33 | override def enabled: Boolean = {
34 | getConf.getBoolean(AZURE_TESTS_ENABLED, false)
35 | }
36 |
37 | def initFS(): FileSystem = {
38 | val uri = new URI(requiredOption(AZURE_TEST_URI))
39 | logDebug(s"Executing Azure tests against $uri")
40 | createFilesystem(uri)
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/StreamingTests.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import com.cloudera.spark.cloud.operations.CloudStreaming
21 |
22 | /**
23 | * Test Streaming.
24 | */
25 | abstract class StreamingTests extends CloudSuite {
26 |
27 | after {
28 | cleanFilesystemInTeardown()
29 | }
30 |
31 | /**
32 | * Override point: instantiate
33 | */
34 | protected val instance: CloudStreaming = new CloudStreaming()
35 |
36 | ctest("streaming",
37 | "Execute the Streaming example") {
38 | val conf = newSparkConf()
39 | conf.setAppName("Streaming")
40 | val destDir = testPath(filesystem, "streaming")
41 | val rowCount = 1000
42 |
43 | assert(0 === instance.action(conf, Seq(destDir, rowCount)))
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/gs/GsTestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import java.net.URI
21 |
22 | import com.cloudera.spark.cloud.common.CopyCsvFileTrait
23 | import org.apache.hadoop.fs.FileSystem
24 | /**
25 | * Trait for GCS.
26 | *
27 | * This trait supports CSV data source by copying over the data from S3A if
28 | * it isn't already in a gcs URL
29 | */
30 | trait GsTestSetup extends CopyCsvFileTrait {
31 |
32 | override def enabled: Boolean = {
33 | getConf.getBoolean(GS_TESTS_ENABLED, false)
34 | }
35 |
36 | def initFS(): FileSystem = {
37 | val uri = new URI(requiredOption(GS_TEST_URI))
38 | logDebug(s"Executing GCS tests against $uri")
39 | createFilesystem(uri)
40 | }
41 |
42 | override def dynamicPartitioning: Boolean = true;
43 | }
44 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/abfs/AbfsTestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs
19 |
20 | import java.net.URI
21 |
22 | import com.cloudera.spark.cloud.common.CopyCsvFileTrait
23 | import org.apache.hadoop.fs.FileSystem
24 |
25 | /**
26 | * Trait for Azure ABFS tests.
27 | *
28 | * This trait supports CSV data source by copying over the data from S3A if
29 | * it isn't already in a ABFS URL
30 | */
31 | trait AbfsTestSetup extends CopyCsvFileTrait {
32 |
33 | override def enabled: Boolean = {
34 | getConf.getBoolean(ABFS_TESTS_ENABLED, false)
35 | }
36 |
37 | override def dynamicPartitioning: Boolean = true
38 |
39 | def initFS(): FileSystem = {
40 | val uri = new URI(requiredOption(ABFS_TEST_URI))
41 | logDebug(s"Executing Abfs tests against $uri")
42 | createFilesystem(uri)
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ALineCountSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.CloudSuiteWithCSVDatasource
21 |
22 | /**
23 | * Test the `S3LineCount` entry point.
24 | */
25 | class S3ALineCountSuite extends CloudSuiteWithCSVDatasource with S3ATestSetup {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | if (enabled) {
31 | setupFilesystemConfiguration(getConf)
32 | }
33 | }
34 |
35 | override def enabled: Boolean = super.enabled && hasCSVTestFile
36 |
37 | ctest("S3ALineCountReadData",
38 | "Execute the S3ALineCount example with the default values (i.e. no arguments)") {
39 | val sparkConf = newSparkConf(getTestCSVPath())
40 | sparkConf.setAppName("S3ALineCountDefaults")
41 | assert(0 === S3ALineCount.action(sparkConf, Seq()))
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ANumbersSuiteV2APISuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.NumbersRddTests
21 | import org.apache.hadoop.fs.Path
22 |
23 | import org.apache.spark.rdd.RDD
24 |
25 | class S3ANumbersSuiteV2APISuite extends NumbersRddTests with S3ATestSetup {
26 | init()
27 |
28 | def init(): Unit = {
29 | // propagate S3 credentials
30 | if (enabled) {
31 | initFS()
32 | }
33 | }
34 |
35 | override protected def pathname = {
36 | "numbers_rdd_tests_v2api"
37 | }
38 |
39 | /**
40 | * Save the RDD.
41 | *
42 | * @param numbers RDD to save
43 | * @param dest destination path
44 | */
45 | override protected def saveRDD(
46 | numbers: RDD[Int],
47 | dest: Path): Unit = {
48 | saveRDDviaMRv2(numbers, dest)
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3ALineCount.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.CloudTestKeys
21 | import com.cloudera.spark.cloud.operations.LineCount
22 |
23 | import org.apache.spark.SparkConf
24 |
25 | /**
26 | * A line count example which has a default reference of a public Amazon S3
27 | * CSV .gz file in the absence of anything on the command line.
28 | */
29 | object S3ALineCount extends LineCount with S3AExampleSetup with SequentialIOPolicy {
30 |
31 | override def defaultSource: Option[String] = {
32 | Some(CloudTestKeys.S3A_CSV_PATH_DEFAULT)
33 | }
34 |
35 | override def maybeEnableAnonymousAccess(
36 | sparkConf: SparkConf,
37 | dest: Option[String]): Unit = {
38 | if (dest.isEmpty) {
39 | hconf(sparkConf, AWS_CREDENTIALS_PROVIDER, ANONYMOUS_CREDENTIALS)
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/utils/ForceRecentHadoopVersion.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import org.apache.hadoop.fs.azure.AzureException
21 | import org.apache.hadoop.fs.s3a.RenameFailedException
22 |
23 | /**
24 | * This class is used to ensure that a recent Hadoop version is on the classpath.
25 | *
26 | * If it does not compile: the version of Spark it is built against has out of date
27 | * dependencies.
28 | *
29 | * If it does not findClass, the version of Spark it is running against is out of date.
30 | *
31 | * Currently: requires Hadoop 2.8+
32 | */
33 | class ForceRecentHadoopVersion {
34 |
35 | /** compile/link failure against Hadoop 2.6 */
36 | val requireAzure = new AzureException("needs Hadoop 2.7+")
37 |
38 | /** compile failure against Hadoop 2.7 */
39 | val requireRecentAWS = new RenameFailedException("/", "Needs something", "")
40 | }
41 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Cloud Integration for Apache Spark
2 |
3 | The [cloud-integration](https://github.com/hortonworks-spark/cloud-integration)
4 | repository provides modules to improve Apache Spark's integration with cloud infrastructures.
5 |
6 |
7 |
8 | ## Module `spark-cloud-integration`
9 |
10 | Classes and Tools to make Spark work better in-cloud
11 |
12 | * Committer integration with the s3a committers.
13 | * Proof of concept cloud-first distcp replacement.
14 | * Serialization for Hadoop `Configuration`: class `ConfigSerDeser`. Use this
15 | to get a configuration into an RDD method
16 | * Trait `HConf` to manipulate the hadoop options in a spark config.
17 | * Anything else which turns out to be useful.
18 | * Variant of `FileInputStream` for cloud storage, `org.apache.spark.streaming.cloudera.CloudInputDStream`
19 |
20 | See [Spark Cloud Integration](spark-cloud-integration/src/main/site/markdown/index.md)
21 |
22 |
23 |
24 | ## Module `cloud-examples`
25 |
26 | This does the packaging/integration tests for Spark and cloud against AWS, Azure and Google GCS.
27 |
28 | These are basic tests of the core functionality of I/O, streaming, and verify that
29 | the commmitters work.
30 |
31 | As well as running as unit tests, they have CLI entry points which can be used for scalable functional testing.
32 |
33 |
34 | ## Module `minimal-integration-test`
35 |
36 | This is a minimal JAR for integration tests
37 |
38 | Usage
39 | ```bash
40 | spark-submit --class com.cloudera.spark.cloud.integration.Generator \
41 | --master yarn \
42 | --num-executors 2 \
43 | --driver-memory 512m \
44 | --executor-memory 512m \
45 | --executor-cores 1 \
46 | minimal-integration-test-1.0-SNAPSHOT.jar \
47 | adl://example.azuredatalakestore.net/output/dest/1 \
48 | 2 2 15
49 | ```
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/s3/audit/ServerLogEntry.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.audit
19 |
20 | case class ServerLogEntry(
21 | bucketowner: String,
22 | bucket_name: String,
23 | requestdatetime: String,
24 | remoteip: String,
25 | requester: String,
26 | requestid: String,
27 | operation: String,
28 | key: String,
29 | request_uri: String,
30 | httpstatus: String,
31 | errorcode: String,
32 | bytessent: Long,
33 | objectsize: Long,
34 | totaltime: String,
35 | turnaroundtime: String,
36 | referrer: String,
37 | useragent: String,
38 | versionid: String,
39 | hostid: String,
40 | sigv: String,
41 | ciphersuite: String,
42 | authtype: String,
43 | endpoint: String,
44 | tlsversion: String) {
45 |
46 | override def toString: String =
47 | s"$operation /$bucket_name/$key $httpstatus $errorcode $bytessent $requestdatetime"
48 | }
49 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/utils/IntegrationUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import java.net.URL
21 |
22 | import org.apache.hadoop.util.ExitUtil
23 |
24 | /**
25 | * A class to instantiate for all the general utils
26 | */
27 | class IntegrationUtils extends TimeOperations with HConf {
28 | private val E_NO_CLASS = 11
29 |
30 | def findClass(src: String, classname: String): (String, String, URL, Class[_]) = {
31 | try {
32 | val loader = this.getClass.getClassLoader
33 | val res = classname.replaceAll("\\.", "/") + ".class"
34 | val url = loader.getResource(res)
35 | val clazz = loader.loadClass(classname)
36 | (src, classname, url, clazz)
37 | } catch {
38 | case e: Exception =>
39 | throw new ExitUtil.ExitException(E_NO_CLASS,
40 | s"Failed to findClass Class $classname from $src").initCause(e)
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # Spark provides this Travis CI configuration file to help contributors
17 | # check Scala/Java style conformance and JDK7/8 compilation easily
18 | # during their preparing pull requests.
19 | # - Scalastyle is executed during `maven install` implicitly.
20 | # - Java Checkstyle is executed by `lint-java`.
21 | # See the related discussion here.
22 | # https://github.com/apache/spark/pull/12980
23 |
24 | # 1. Choose OS (Ubuntu 14.04.3 LTS Server Edition 64bit, ~2 CORE, 7.5GB RAM)
25 | #%sudo: required
26 | #%dist: trusty
27 |
28 | # 2. Choose language and target JDKs for parallel builds.
29 | language: java
30 | jdk:
31 | - oraclejdk8
32 |
33 | # 3. Setup cache directory for SBT and Maven.
34 | cache:
35 | directories:
36 | - $HOME/.sbt
37 | - $HOME/.m2
38 |
39 | # 4. Turn off notifications.
40 | notifications:
41 | email: false
42 |
43 | # 5. Run maven install before running lint-java.
44 | install:
45 | - export MAVEN_SKIP_RC=1
46 | - mvn -T 1C install
47 |
48 |
49 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/local/LocalTestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.local
19 |
20 | import java.io.File
21 |
22 | import com.cloudera.spark.cloud.common.CloudSuiteTrait
23 | import org.apache.hadoop.fs.{FileSystem, Path}
24 |
25 | /**
26 | * Trait for the local fs; goal is for benchmarking/validating/writing
27 | * new tests.
28 | *
29 | */
30 | trait LocalTestSetup extends CloudSuiteTrait {
31 |
32 | override def enabled: Boolean = {
33 | true
34 | }
35 |
36 | def initFS(): FileSystem = {
37 | val fs = getLocalFS
38 | setFilesystem(fs)
39 | fs
40 | }
41 |
42 | override def dynamicPartitioning: Boolean = true;
43 |
44 | /**
45 | * the test path here is always to something under the temp dir.
46 | */
47 | override protected def testDir: Path = {
48 | val f = File.createTempFile(this.getClass.getSimpleName, "")
49 | f.delete()
50 | f.mkdir()
51 | new Path(f.toURI)
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/examples/S3DataFrameExampleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.examples
19 |
20 | import com.cloudera.spark.cloud.common.CloudSuite
21 | import com.cloudera.spark.cloud.s3.S3ATestSetup
22 |
23 | /**
24 | * Test the [S3DataFrames] logic.
25 | */
26 | class S3DataFrameExampleSuite extends CloudSuite with S3ATestSetup {
27 |
28 | init()
29 |
30 | def init(): Unit = {
31 | // propagate S3 credentials
32 | if (enabled) {
33 | initFS()
34 | }
35 | }
36 |
37 | /**
38 | * Override point: the data frame operation to execute
39 | */
40 | ctest("DataFrames",
41 | "Dataframe IO") {
42 | val conf = newSparkConf()
43 | conf.setAppName("DataFrames")
44 | val destDir = testPath(filesystem, "dataframes")
45 | val instance = new S3DataFrameExample()
46 | val args = Seq(destDir)
47 | assert(0 === instance.action(conf, args),
48 | s" action($args) failed against $instance")
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3AStreaming.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.operations.CloudStreaming
21 |
22 | import org.apache.spark.SparkConf
23 | import org.apache.spark.streaming._
24 |
25 | /**
26 | * An example/test for streaming with a source of S3.
27 | */
28 | object S3AStreaming extends CloudStreaming with S3AExampleSetup
29 | with SequentialIOPolicy {
30 |
31 | /**
32 | * This is never executed; it's just here as the source of the example in the
33 | * documentation.
34 | */
35 | def streamingExample(): Unit = {
36 | val sparkConf = new SparkConf()
37 | val ssc = new StreamingContext(sparkConf, Milliseconds(1000))
38 | try {
39 | val lines = ssc.textFileStream("s3a://testbucket/incoming")
40 | val matches = lines.filter(_.endsWith("3"))
41 | matches.print()
42 | ssc.start()
43 | ssc.awaitTermination()
44 | } finally {
45 | ssc.stop(true)
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/csv/AbfsHugeCsvIOSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.csv
19 |
20 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
21 | import com.cloudera.spark.cloud.ObjectStoreConfigurations.ABFS_READAHEAD_HADOOP_OPTIONS
22 | import org.apache.hadoop.conf.Configuration
23 |
24 | /**
25 | * The real test of HADOOP-18521.
26 | */
27 | class AbfsHugeCsvIOSuite extends AbstractHugeCsvIOSuite with AbfsTestSetup() {
28 |
29 | init()
30 |
31 | /**
32 | * set up FS if enabled.
33 | */
34 | def init(): Unit = {
35 | if (enabled) {
36 | initFS()
37 | }
38 | }
39 |
40 | /**
41 | * Patch in ABFS readahead options, to ensure they are
42 | * always set.
43 | * @return the configuration to create the fs with
44 | */
45 | override def createConfiguration(): Configuration = {
46 | val conf = super.createConfiguration()
47 | for (kv <- ABFS_READAHEAD_HADOOP_OPTIONS) {
48 | conf.set(kv._1, kv._2)
49 | }
50 | conf
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureLineCountSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.CloudSuiteWithCSVDatasource
21 | import com.cloudera.spark.cloud.operations.LineCount
22 |
23 | /**
24 | * Test the `S3LineCount` entry point.
25 | */
26 | class AzureLineCountSuite extends CloudSuiteWithCSVDatasource with AzureTestSetup {
27 |
28 | init()
29 |
30 | /**
31 | * set up FS if enabled.
32 | */
33 | def init(): Unit = {
34 | if (enabled) {
35 | initFS()
36 | initDatasources()
37 | }
38 | }
39 |
40 | override def enabled: Boolean = super.enabled && hasCSVTestFile
41 |
42 | after {
43 | cleanFilesystemInTeardown()
44 | }
45 |
46 | ctest("AzureLineCountSuite",
47 | "Execute the LineCount example") {
48 | val src = getTestCSVPath()
49 | val sparkConf = newSparkConf(src)
50 | sparkConf.setAppName("AzureLineCountSuite")
51 | assert(0 === new LineCount().action(sparkConf,
52 | Seq(src.toUri.toString)))
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/CloudSuiteWithCSVDatasource.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import org.apache.hadoop.conf.Configuration
21 | import org.apache.hadoop.fs.{FileSystem, Path}
22 |
23 | /**
24 | * Any cloud suite which requires the datasource to be a (possibly copied over)
25 | * CSV file.
26 | */
27 | class CloudSuiteWithCSVDatasource extends CloudSuite with CsvDatasourceSupport {
28 |
29 | /**
30 | * Call this to set up the datasource for tests.
31 | */
32 | def initDatasources(): Unit = {
33 | if (hasCSVTestFile()) {
34 | prepareTestCSVFile()
35 | testCSVFilePath.get
36 | }
37 | }
38 |
39 | /**
40 | * Get the CSV source path and filesystem to read from it.
41 | * The filesystem uses the endpoint defined for the CSV file.
42 | *
43 | * @return Patn and FS of a CSV source file.
44 | */
45 | def getCSVSourceAndFileSystem(): (Path, FileSystem) = {
46 | val source = getTestCSVPath()
47 | (source, FileSystem.newInstance(source.toUri, new Configuration(getConf)))
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/site/using.md:
--------------------------------------------------------------------------------
1 |
14 |
15 | # Using the extra features in these examples
16 |
17 | ### Example: Spark Streaming and Cloud Storage
18 |
19 | Spark Streaming can monitor files added to object stores, by
20 | creating a `FileInputDStream` DStream monitoring a path under a bucket.
21 |
22 | ```scala
23 | import org.apache.spark.SparkConf
24 | import org.apache.spark.sql.SparkSession
25 | import org.apache.spark.streaming._
26 |
27 | val sparkConf = new SparkConf()
28 | val ssc = new StreamingContext(sparkConf, Milliseconds(5000))
29 | try {
30 | val lines = ssc.textFileStream("s3a://bucket/incoming")
31 | val matches = lines.filter(_.endsWith("3"))
32 | matches.print()
33 | ssc.start()
34 | ssc.awaitTermination()
35 | } finally {
36 | ssc.stop(true)
37 | }
38 | ```
39 |
40 |
41 | 1. The time to scan for new files is proportional to the number of files
42 | under the path —not the number of *new* files, and that it can become a slow operation.
43 | The size of the window needs to be set to handle this.
44 |
45 | 1. Files only appear in an object store once they are completely written; there
46 | is no need for a worklow of write-then-rename to ensure that files aren't picked up
47 | while they are still being written. Applications can write straight to the monitored directory.
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3ADataFrames.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.CloudTestKeys
21 | import com.cloudera.spark.cloud.operations.CloudDataFrames
22 | import org.apache.hadoop.conf.Configuration
23 | import org.apache.hadoop.fs.{FileSystem, Path}
24 |
25 | import org.apache.spark.sql.SparkSession
26 |
27 | /**
28 | * Test dataframe operations using S3 as the destination and source of operations.
29 | * This validates the various conversion jobs all work against the object store.
30 | *
31 | * It doesn't verify timings, though some information is printed.
32 | */
33 | object S3ADataFrames extends CloudDataFrames with S3AExampleSetup {
34 |
35 | override def extraValidation(
36 | session: SparkSession,
37 | conf: Configuration,
38 | fs: FileSystem,
39 | results: Seq[(String, Path, Long, Long)]): Unit = {
40 |
41 | val operations = new CommitterOperations(fs)
42 | if (conf.getBoolean(CloudTestKeys.S3A_COMMITTER_TEST_ENABLED, false)) {
43 | results.foreach((tuple: (String, Path, Long, Long)) => {
44 | operations.verifyCommitter(tuple._2, None, None, "")
45 | })
46 | }
47 |
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3AFileGeneratorSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.FileGeneratorTests
21 | import org.apache.hadoop.fs.Path
22 |
23 | import org.apache.spark.SparkConf
24 |
25 | /**
26 | * Test the `S3FileGenerator` entry point.
27 | */
28 | class S3AFileGeneratorSuite extends FileGeneratorTests with S3ATestSetup {
29 |
30 | init()
31 |
32 | def init(): Unit = {
33 | // propagate S3 credentials
34 | if (enabled) {
35 | initFS()
36 | }
37 | }
38 |
39 | after {
40 | cleanFilesystemInTeardown()
41 | }
42 |
43 | ctest("FileGeneratorUsage",
44 | "Execute the S3FileGenerator example with a bad argument; expect a failure") {
45 | val conf = newSparkConf()
46 | conf.setAppName("FileGenerator")
47 | assert(-2 === S3AFileGenerator.action(conf, Seq()))
48 | }
49 |
50 | override def generate(
51 | conf: SparkConf,
52 | destDir: Path,
53 | monthCount: Int,
54 | fileCount: Int,
55 | rowCount: Int): Int = {
56 | val result = S3AFileGenerator.action(conf, Seq(destDir,
57 | monthCount,
58 | fileCount,
59 | rowCount))
60 | result
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/HadoopVersionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import java.util
21 | import java.util.Collections
22 |
23 | import scala.collection.JavaConverters._
24 |
25 | import com.cloudera.spark.cloud.common.CloudSuite._
26 | import com.cloudera.spark.cloud.test.UnitTestSuite
27 |
28 | class HadoopVersionSuite extends UnitTestSuite {
29 |
30 | test("Sysprops") {
31 | val props = System.getProperties
32 | val list = new util.ArrayList[String](props.stringPropertyNames())
33 | Collections.sort(list)
34 | val plist = list.asScala
35 | .filter(k => (!k.startsWith("java.") && !k.startsWith("sun.")))
36 | .map(key => s"$key = ${props.getProperty(key)}")
37 | .mkString("\n")
38 | logInfo(s"Properties:\n$plist")
39 | }
40 |
41 | test("PropagatedValues") {
42 | val mapped = StoreTestHelper.loadConfiguration().asScala
43 | .filter { entry =>
44 | val k = entry.getKey
45 | k.startsWith("fs.s3a") && !k.contains("key")
46 | }
47 | .map(entry => s"${entry.getKey} = ${entry.getValue}").toList.sorted
48 | val list = mapped.mkString("\n")
49 | logInfo(s"S3A config options:\n${list}")
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3DependencyCheckSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.test.UnitTestSuite
21 |
22 | /**
23 | * Force findClass in hadoop s3n/s3a classes and some dependencies.
24 | * Dependency problems should be picked up at compile time; runtime may
25 | * identify problems with any other transitive library
26 | */
27 | class S3DependencyCheckSuite extends UnitTestSuite {
28 |
29 | test("Create S3A FS Instance") {
30 | instantiate("org.apache.hadoop.fs.s3a.S3AFileSystem")
31 | }
32 |
33 | test("hive") {
34 | instantiate("org.apache.hadoop.hive.conf.HiveConf")
35 | }
36 |
37 | /**
38 | * Instantiate the class.
39 | * This is wrapped because scalatest gets confused about instantiation Errors raised
40 | * in a test case: they aren't methods, see.
41 | * @param classname class to instantiate.
42 | */
43 | def instantiate(classname: String) {
44 | try {
45 | val clazz = this.getClass.getClassLoader.loadClass(classname)
46 | clazz.newInstance()
47 | } catch {
48 | case e: Exception => throw e
49 | case e: Throwable => throw new Exception(s"Could not instantiate $classname", e)
50 | }
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/CommitterInfo.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud
19 |
20 | import com.cloudera.spark.cloud.CommitterBinding.factoryForSchema
21 | import com.cloudera.spark.cloud.utils.HConf
22 | import org.apache.hadoop.conf.Configuration
23 |
24 | import org.apache.spark.SparkConf
25 |
26 | /**
27 | * representation of a committer
28 | * @param name committe name for s3a manifestation
29 | * @param factory factory classname
30 | */
31 | case class CommitterInfo(name: String, factory: String)
32 | extends HConf {
33 |
34 | def bind(sparkConf: SparkConf): Unit = {
35 | bindToSchema(sparkConf, "s3a")
36 | }
37 |
38 | def bind(conf: Configuration): Unit = {
39 | bindToSchema(conf, "s3a")
40 | }
41 |
42 | def bindToSchema(sparkConf: SparkConf, fsSchema: String): Unit = {
43 | hconf(sparkConf, factoryForSchema(fsSchema), factory)
44 | hconf(sparkConf, CommitterBinding.S3A_COMMITTER_NAME,
45 | name)
46 | }
47 |
48 | def bindToSchema(conf: Configuration, fsSchema: String): Unit = {
49 | conf.set(factoryForSchema(fsSchema), factory)
50 | conf.set(CommitterBinding.S3A_COMMITTER_NAME, name)
51 | }
52 |
53 | override def toString: String = s"Committer binding $factory($name)"
54 | }
55 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/sources/AbtractOrcRelationSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.sources
19 |
20 | import org.apache.spark.sql.Row
21 | import org.apache.spark.sql.hive.orc.OrcFileFormat
22 | import org.apache.spark.sql.internal.SQLConf
23 |
24 | /**
25 | * cloud relation suite with some orc specific tests.
26 | */
27 | abstract class AbtractOrcRelationSuite extends CloudRelationBasicSuite {
28 |
29 | import testImplicits._
30 |
31 | override val dataSourceName: String = classOf[OrcFileFormat].getCanonicalName
32 |
33 | ctest("SPARK-12218",
34 | "'Not' is included in ORC filter pushdown", false) {
35 |
36 | withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {
37 | withTempPathDir("SPARK-12218") { dir =>
38 | val path = s"${dir.toString}/table1"
39 | (1 to 5).map(i => (i, (i % 2).toString)).toDF("a", "b").write.orc(path)
40 |
41 | checkAnswer(
42 | spark.read.orc(path).where("not (a = 2) or not(b in ('1'))"),
43 | (1 to 5).map(i => Row(i, (i % 2).toString)))
44 |
45 | checkAnswer(
46 | spark.read.orc(path).where("not (a = 2 and b in ('1'))"),
47 | (1 to 5).map(i => Row(i, (i % 2).toString)))
48 | }
49 | }
50 | }
51 |
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/utils/ExtraAssertions.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import org.apache.hadoop.conf.Configuration
21 | import org.scalatest.Assertions
22 |
23 | trait ExtraAssertions extends Assertions {
24 |
25 | /**
26 | * Expect a specific value; raise an assertion if it is not there
27 | *
28 | * @param v value
29 | * @param msg message
30 | * @tparam T type
31 | * @return the actual value
32 | */
33 | def expectSome[T](v: Option[T], msg: => String): T = {
34 | v.getOrElse(throw new AssertionError(msg))
35 | }
36 |
37 | /**
38 | * Expect a value to be non-null; return it. It will
39 | * implicitly be non-null in further use.
40 | *
41 | * @param v value to check
42 | * @param msg message for any assertion
43 | * @tparam T type of value
44 | * @return
45 | */
46 | def expectNotNull[T](v: T, msg: => String): T = {
47 | if (v != null) v else throw new AssertionError(msg)
48 | }
49 |
50 | /**
51 | * Expect a configuration option to be set
52 | *
53 | * @param c config
54 | * @param key kjey to look for
55 | * @return the set value
56 | */
57 | def expectOptionSet(c: Configuration, key: String): String = {
58 | expectNotNull(c.get(key), s"Unset property ${key}")
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/common/CsvDatasourceSupport.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import org.apache.hadoop.fs.Path
21 |
22 | trait CsvDatasourceSupport {
23 |
24 | /**
25 | * Predicate to define whether or not there's a CSV file to work with.
26 | *
27 | * @return true if the CSV test file is defined.
28 | */
29 | def hasCSVTestFile(): Boolean = false
30 |
31 | /**
32 | * Path to the CSV file's original source
33 | * @return a path
34 | */
35 | def sourceCSVFilePath : Option[Path] = None
36 |
37 | /**
38 | * Path to the CSV file used in the tests themselves; may differ from
39 | * the original source
40 | *
41 | * @return path to test data: valid after `prepareTestCSVFile`.
42 | */
43 | def testCSVFilePath : Option[Path] = sourceCSVFilePath
44 |
45 | /**
46 | * Get the test CSV file or raise an exception.
47 | * @return the CSV path for tests
48 | */
49 | def getTestCSVPath(): Path = testCSVFilePath.get
50 |
51 | /**
52 | * Any operation to prepare the CSV file. After completion, returns
53 | * the path to the test CSV file.
54 | */
55 | def prepareTestCSVFile(): Unit = {
56 | require(hasCSVTestFile(), "No CSV file")
57 | require(sourceCSVFilePath.isDefined, "No source CSV file")
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/resources/log4j2.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # lifted from spark core/src/test/resources.
19 | # from the log4j docs:
20 | # > An understanding of how loggers work in Log4j is critical before
21 | # > trying to configure them.
22 | # > Please reference the Log4j architecture if more information is required.
23 | # > Trying to configure Log4j without understanding those concepts will lead to frustration.
24 |
25 | # Set everything to be logged to the file target/unit-tests.log
26 | rootLogger.level = info
27 | rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
28 |
29 | appender.file.type = File
30 | appender.file.name = File
31 | appender.file.fileName = target/unit-tests.log
32 | appender.file.layout.type = PatternLayout
33 | appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
34 |
35 | # Tests that launch java subprocesses can set the "test.appender" system property to
36 | # "console" to avoid having the child process's logs overwrite the unit test's
37 | # log file.
38 | appender.console.type = Console
39 | appender.console.name = console
40 | appender.console.target = SYSTEM_ERR
41 | appender.console.layout.type = PatternLayout
42 | appender.console.layout.pattern = %t: %m%n%ex
43 |
44 | # Ignore messages below warning level from Jetty, because it's a bit verbose
45 | logger.jetty.name = org.sparkproject.jetty
46 | logger.jetty.level = warn
47 |
48 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/AbstractGsCommitterSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
21 | import com.cloudera.spark.cloud.common.CloudSuite
22 |
23 |
24 |
25 | import org.apache.spark.{SparkConf, SparkScopeWorkarounds}
26 |
27 | abstract class AbstractGsCommitterSuite extends CloudSuite with GsTestSetup {
28 | /**
29 | * Patch up hive for re-use.
30 | *
31 | * @param sparkConf configuration to patch
32 | */
33 | def addTransientDerbySettings(sparkConf: SparkConf): Unit = {
34 | hconf(sparkConf, SparkScopeWorkarounds.tempHiveConfig())
35 | }
36 |
37 | /**
38 | * Override point for suites: a method which is called
39 | * in all the `newSparkConf()` methods.
40 | * This can be used to alter values for the configuration.
41 | * It is called before the configuration read in from the command line
42 | * is applied, so that tests can override the values applied in-code.
43 | *
44 | * @param sparkConf spark configuration to alter
45 | */
46 | override protected def addSuiteConfigurationOptions(sparkConf: SparkConf): Unit = {
47 | super.addSuiteConfigurationOptions(sparkConf)
48 | logDebug("Patching spark conf with committer bindings")
49 | sparkConf.setAll(ObjectStoreConfigurations.COMMITTER_OPTIONS)
50 | addTransientDerbySettings(sparkConf)
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ACSVReadSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.CSVReadTests
21 |
22 | /**
23 | * A suite of tests reading in the S3A CSV file.
24 | */
25 | class S3ACSVReadSuite extends CSVReadTests with S3ATestSetup with SequentialIOPolicy {
26 |
27 | init()
28 |
29 | def init(): Unit = {
30 | setupFilesystemConfiguration(getConf)
31 | if (enabled) {
32 | initDatasources()
33 | }
34 | }
35 |
36 |
37 | /* class RemoteOutputIterator[T](private val source: RemoteIterator[T]) extends Iterator[T] {
38 | def hasNext: Boolean = source.hasNext
39 |
40 | def next: T = source.next()
41 | }*/
42 |
43 | /*
44 | * This doesn't do much, except that it is designed to be pasted straight into
45 | * Zeppelin and work
46 | */
47 | /* ctest("DirOps", "simple directory ops in spark context process") {
48 | val source = CSV_TESTFILE.get
49 | sc = new SparkContext("local", "CSVgz", newSparkConf(source))
50 |
51 | import org.apache.hadoop.fs._
52 | val landsat = "s3a://landsat-pds/scene_list.gz"
53 | val landsatPath = new Path(landsat)
54 | val fs = landsatPath.getFileSystem(sc.hadoopConfiguration)
55 | val files = fs.listFiles(landsatPath.getParent, false)
56 | val listing = new RemoteOutputIterator(files)
57 | listing.foreach(print(_))
58 |
59 | }*/
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/committers/AbstractCommitterSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.committers
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
21 | import com.cloudera.spark.cloud.common.CloudSuite
22 | import com.cloudera.spark.cloud.s3.S3ATestSetup
23 |
24 | import org.apache.spark.{SparkConf, SparkScopeWorkarounds}
25 |
26 | abstract class AbstractCommitterSuite extends CloudSuite {
27 | /**
28 | * Patch up hive for re-use.
29 | *
30 | * @param sparkConf configuration to patch
31 | */
32 | def addTransientDerbySettings(sparkConf: SparkConf): Unit = {
33 | hconf(sparkConf, SparkScopeWorkarounds.tempHiveConfig())
34 | }
35 |
36 | /**
37 | * Override point for suites: a method which is called
38 | * in all the `newSparkConf()` methods.
39 | * This can be used to alter values for the configuration.
40 | * It is called before the configuration read in from the command line
41 | * is applied, so that tests can override the values applied in-code.
42 | *
43 | * @param sparkConf spark configuration to alter
44 | */
45 | override protected def addSuiteConfigurationOptions(sparkConf: SparkConf): Unit = {
46 | super.addSuiteConfigurationOptions(sparkConf)
47 | logDebug("Patching spark conf with s3a committer bindings")
48 | sparkConf.setAll(ObjectStoreConfigurations.COMMITTER_OPTIONS)
49 | addTransientDerbySettings(sparkConf)
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/azure/AzureDataFrameSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.azure
19 |
20 | import com.cloudera.spark.cloud.common.DataFrameTests
21 |
22 | import org.apache.spark.SparkConf
23 | import org.apache.spark.sql.SparkSession
24 | import org.apache.spark.sql.types.StringType
25 |
26 | /**
27 | * Test Azure and DataFrames
28 | */
29 | class AzureDataFrameSuite extends DataFrameTests with AzureTestSetup {
30 |
31 | init()
32 |
33 | def init(): Unit = {
34 | if (enabled) {
35 | initFS()
36 | }
37 | }
38 |
39 | /**
40 | * This is the source for the example; it is here to ensure it compiles.
41 | */
42 | def example(sparkConf: SparkConf): Unit = {
43 | val spark = SparkSession
44 | .builder
45 | .appName("DataFrames")
46 | .config(sparkConf)
47 | .getOrCreate()
48 | import spark.implicits._
49 | val numRows = 1000
50 | val sourceData = spark.range(0, numRows).select($"id".as("l"), $"id".cast(StringType).as("s"))
51 | val dest = "wasb://yourcontainer@youraccount.blob.core.windows.net/dataframes"
52 | val orcFile = dest + "/data.orc"
53 | sourceData.write.format("orc").save(orcFile)
54 | // read it back
55 | val orcData = spark.read.format("orc").load(orcFile)
56 | // save it to parquet
57 | val parquetFile = dest + "/data.parquet"
58 | orcData.write.format("parquet").save(parquetFile)
59 | spark.stop()
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/abfs/commit/AbstractAbfsCommitterSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.abfs.commit
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
21 | import com.cloudera.spark.cloud.abfs.AbfsTestSetup
22 | import com.cloudera.spark.cloud.common.CloudSuite
23 |
24 | import org.apache.spark.{SparkConf, SparkScopeWorkarounds}
25 |
26 | abstract class AbstractAbfsCommitterSuite extends CloudSuite with AbfsTestSetup {
27 | /**
28 | * Patch up hive for re-use.
29 | *
30 | * @param sparkConf configuration to patch
31 | */
32 | def addTransientDerbySettings(sparkConf: SparkConf): Unit = {
33 | hconf(sparkConf, SparkScopeWorkarounds.tempHiveConfig())
34 | }
35 |
36 | /**
37 | * Override point for suites: a method which is called
38 | * in all the `newSparkConf()` methods.
39 | * This can be used to alter values for the configuration.
40 | * It is called before the configuration read in from the command line
41 | * is applied, so that tests can override the values applied in-code.
42 | *
43 | * @param sparkConf spark configuration to alter
44 | */
45 | override protected def addSuiteConfigurationOptions(sparkConf: SparkConf): Unit = {
46 | super.addSuiteConfigurationOptions(sparkConf)
47 | logDebug("Patching spark conf with committer bindings")
48 | sparkConf.setAll(ObjectStoreConfigurations.COMMITTER_OPTIONS)
49 | addTransientDerbySettings(sparkConf)
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/ReadSample.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | //import org.apache.spark.mllib.linalg.Vectors
21 |
22 | /**
23 | * A sample of a read operation.
24 | * @param started start time in nS
25 | * @param duration duration nS
26 | * @param blockSize size of block worked with
27 | * @param bytesRequested how many bytes were requested
28 | * @param bytesRead how many bytes were actually returned
29 | * @param pos position in the object where the read was requested.
30 | */
31 | class ReadSample(
32 | val started: Long,
33 | val duration: Long,
34 | val blockSize: Int,
35 | val bytesRequested: Int,
36 | val bytesRead: Int,
37 | val pos: Long) extends Serializable {
38 |
39 | def perByte: Long = { if (duration > 0) bytesRead / duration else -1L }
40 |
41 | def delta: Int = { bytesRequested - bytesRead }
42 |
43 | override def toString: String = s"ReadSample(started=$started, duration=$duration," +
44 | s" blockSize=$blockSize, bytesRequested=$bytesRequested, bytesRead=$bytesRead)" +
45 | s" pos=$pos"
46 |
47 | /* def toVector = {
48 | val a = new Array[Double](8)
49 | a(0) = started.toDouble
50 | a(1) = duration.toDouble
51 | a(2) = blockSize.toDouble
52 | a(3) = bytesRequested.toDouble
53 | a(4) = bytesRead.toDouble
54 | a(5) = pos.toDouble
55 | a(6) = perByte.toDouble
56 | a(7) = delta.toDouble
57 | Vectors.dense(a)
58 | }*/
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/DataFrameTests.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import com.cloudera.spark.cloud.operations.CloudDataFrames
21 |
22 | /**
23 | * Test dataframe and object store integration
24 | */
25 | abstract class DataFrameTests extends CloudSuite {
26 |
27 | after {
28 | cleanFilesystemInTeardown()
29 | }
30 |
31 | /**
32 | * Override point: the data frame operation to execute
33 | */
34 | protected val instance: CloudDataFrames = new CloudDataFrames()
35 |
36 | ctest("DataFrames",
37 | "Execute the Data Frames example") {
38 | val conf = newSparkConf()
39 | conf.setAppName("DataFrames")
40 | val destDir = testPath(filesystem, "dataframes")
41 | val rowCount = 1000
42 |
43 | val args = Seq(destDir, rowCount)
44 | assert(0 === instance.action(conf, args),
45 | s" action($args) failed against $instance")
46 |
47 | // do a recursive listFiles
48 | val listing = logDuration("listFiles(recursive)") {
49 | listFiles(filesystem, destDir, true)
50 | }
51 |
52 | var recursivelyListedFilesDataset = 0L
53 | var recursivelyListedFiles = 0
54 | logDuration("scan result list") {
55 | listing.foreach{status =>
56 | recursivelyListedFiles += 1
57 | recursivelyListedFilesDataset += status.getLen
58 | logInfo(s"${status.getPath}[${status.getLen}]")
59 | }
60 | }
61 |
62 | logInfo(s"FileSystem $filesystem")
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/common/ContextFreeCloudSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import com.cloudera.spark.cloud.s3.S3AConstants
21 | import org.scalatest.concurrent.Eventually
22 | import org.scalatest.BeforeAndAfter
23 |
24 | import org.apache.spark.SparkFunSuite
25 | import org.apache.spark.sql.SparkSession
26 |
27 | /**
28 | * A cloud suite which doesn't create a spark context.
29 | */
30 | abstract class ContextFreeCloudSuite extends SparkFunSuite
31 | with BeforeAndAfter
32 | with Eventually with S3AConstants with CloudSuiteTrait {
33 |
34 | }
35 |
36 | /**
37 | * Cloud test suite with a spark session to clean up afterwards
38 | */
39 | abstract class SparkSessionCloudSuite extends ContextFreeCloudSuite {
40 |
41 | var _sparkSession: SparkSession = null
42 |
43 | def sparkSession = _sparkSession
44 |
45 | def setSparkSession(s: SparkSession): Unit = {
46 | _sparkSession = s
47 | }
48 |
49 | /**
50 | * Close any spark session.
51 | */
52 | def closeSparkSession(): Unit = {
53 | if (_sparkSession != null) {
54 | _sparkSession.close()
55 | _sparkSession = null
56 | // To avoid RPC rebinding to the same port, since it doesn't unbind immediately on shutdown
57 | // (based on LocalSparkContext; no idea if still holds)
58 | System.clearProperty("spark.driver.port")
59 | }
60 | }
61 |
62 |
63 | override def afterEach(): Unit = {
64 | try {
65 | closeSparkSession()
66 | } finally {
67 | super.afterEach()
68 | }
69 | }
70 |
71 | }
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/gs/GSDependencyCheckSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.gs
19 |
20 | import com.cloudera.spark.cloud.test.UnitTestSuite
21 | import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem
22 | import com.google.cloud.hadoop.fs.gcs.HadoopConfigurationProperty
23 | import org.apache.hadoop.fs.FileSystem
24 |
25 | /**
26 | * Force findClass in hadoop gcs classes and some dependencies.
27 | * Dependency problems should be picked up at compile time; runtime may
28 | * identify problems with any other transitive library
29 | */
30 | class GSDependencyCheckSuite extends UnitTestSuite {
31 |
32 | test("Create GCS FS Instance") {
33 | instantiate("com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem")
34 | }
35 |
36 | test("compile time check of filesystem") {
37 | val fs = new GoogleHadoopFileSystem()
38 | assert(fs.isInstanceOf[FileSystem])
39 | }
40 |
41 | test("config") {
42 | new HadoopConfigurationProperty("key")
43 | }
44 |
45 | /**
46 | * Instantiate the class.
47 | * This is wrapped because scalatest gets confused about instantiation Errors raised
48 | * in a test case: they aren't methods, see.
49 | * @param classname class to instantiate.
50 | */
51 | def instantiate(classname: String) {
52 | try {
53 | val clazz = this.getClass.getClassLoader.loadClass(classname)
54 | clazz.newInstance()
55 | } catch {
56 | case e: Exception => throw e
57 | case e: Throwable => throw new Exception(s"Could not instantiate $classname", e)
58 | }
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/utils/ConfigSerDeser.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import java.io.{ObjectInputStream, ObjectOutputStream}
21 |
22 | import org.apache.hadoop.conf.Configuration
23 |
24 | /**
25 | * Class to make Hadoop configurations serializable; uses the
26 | * `Writeable` operations to do this.
27 | * Note: this only serializes the explicitly set values, not any set
28 | * in site/default or other XML resources.
29 | * @param conf configuration to serialize
30 | */
31 | class ConfigSerDeser(var conf: Configuration) extends Serializable {
32 |
33 | private val serialVersionUID = 0xABBA0000
34 |
35 | /**
36 | * Empty constructor: binds to a `new Configuration()`.
37 | */
38 | def this() {
39 | this(new Configuration())
40 | }
41 |
42 | /**
43 | * Get the current configuration.
44 | * @return the configuration.
45 | */
46 | def get(): Configuration = conf
47 |
48 | /**
49 | * Serializable writer.
50 | * @param out ouput stream
51 | */
52 | private def writeObject (out: ObjectOutputStream): Unit = {
53 | conf.write(out)
54 | }
55 |
56 | /**
57 | * Serializable reader.
58 | * @param in input
59 | */
60 | private def readObject (in: ObjectInputStream): Unit = {
61 | conf = new Configuration()
62 | conf.readFields(in)
63 | }
64 |
65 | /**
66 | * Handle a read without data; this should never be called, but it
67 | * is here as a safety mechanism.
68 | */
69 | private def readObjectNoData(): Unit = {
70 | conf = new Configuration()
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3AExampleSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
21 | import com.cloudera.spark.cloud.common.StoreTestOperations
22 | import org.apache.hadoop.conf.Configuration
23 |
24 | import org.apache.spark.SparkConf
25 |
26 | /**
27 | * Base Class for examples working with S3.
28 | */
29 | trait S3AExampleSetup extends StoreTestOperations with S3AConstants {
30 |
31 | /**
32 | * Set the standard S3A Hadoop options to be used in test/examples.
33 | * If Random IO is expected, then the experimental fadvise option is
34 | * set to random.
35 | *
36 | * @param sparkConf spark configuration to patch
37 | * @param randomIO is the IO expected to be random access?
38 | */
39 | override protected def applyObjectStoreConfigurationOptions(
40 | sparkConf: SparkConf, randomIO: Boolean): Unit = {
41 | super.applyObjectStoreConfigurationOptions(sparkConf, true)
42 | // smaller block size to divide up work
43 | hconf(sparkConf, BLOCK_SIZE, 1 * 1024 * 1024)
44 | hconf(sparkConf, MULTIPART_SIZE, MIN_PERMITTED_MULTIPART_SIZE)
45 | hconf(sparkConf, READAHEAD_RANGE, "128K")
46 | hconf(sparkConf, MIN_MULTIPART_THRESHOLD, MIN_PERMITTED_MULTIPART_SIZE)
47 | hconf(sparkConf, INPUT_FADVISE, if (randomIO) RANDOM_IO else NORMAL_IO)
48 | // disable file output in the path output committer as s safety check
49 | hconf(sparkConf, REJECT_FILE_OUTPUT, true)
50 | verifyConfigurationOptions(sparkConf,
51 | ObjectStoreConfigurations.COMMITTER_OPTIONS)
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/sources/CloudPartitionTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.sources
19 |
20 | import org.apache.hadoop.fs.Path
21 |
22 | import org.apache.spark.sql._
23 | import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
24 |
25 | /**
26 | * Test of a single operation; isolated for debugging.
27 | */
28 | abstract class CloudPartitionTest extends AbstractCloudRelationTest {
29 |
30 | import testImplicits._
31 |
32 | protected val rows = 3
33 | protected val part1size = 2
34 |
35 | ctest(
36 | "save-findClass-partitioned-part-columns-in-data",
37 | "Save sets of files in explicitly set up partition tree; read") {
38 | withTempPathDir("part-columns", None) { path =>
39 | for (p1 <- 1 to part1size; p2 <- Seq("foo", "bar")) {
40 | val partitionDir = new Path(path, s"p1=$p1/p2=$p2")
41 | val df = sparkContext
42 | .parallelize(for (i <- 1 to rows) yield (i, s"val_$i", p1))
43 | .toDF("a", "b", "p1")
44 |
45 | df.write
46 | .format(dataSourceName)
47 | .mode(SaveMode.ErrorIfExists)
48 | .save(partitionDir.toString)
49 | // each of these directories as its own success file; there is
50 | // none at the root
51 | resolveSuccessFile(partitionDir, true)
52 | }
53 |
54 | val dataSchemaWithPartition =
55 | StructType(
56 | dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))
57 |
58 | checkQueries(
59 | spark.read.options(Map(
60 | "path" -> path.toString,
61 | "dataSchema" -> dataSchemaWithPartition.json)).format(dataSourceName)
62 | .load())
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/org/apache/spark/cloudera/statistics/IOStatisticsAccumulator.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.cloudera.statistics
19 |
20 | import org.apache.hadoop.fs.statistics.{IOStatistics, IOStatisticsSnapshot, IOStatisticsSource}
21 |
22 | import org.apache.spark.util.AccumulatorV2
23 |
24 | /**
25 | * An accumulator which collects and aggregates IOStatistics.
26 | */
27 | class IOStatisticsAccumulator extends AccumulatorV2[IOStatistics, IOStatisticsSnapshot]
28 | with IOStatisticsSource {
29 |
30 | // the snapshot to accumulate.
31 | private var iostatistics: IOStatisticsSnapshot = new IOStatisticsSnapshot()
32 |
33 | /**
34 | * Empty if all the various maps are empty.
35 | * Not thread safe.
36 | * @return true if the accumulator is empty.
37 | */
38 | override def isZero: Boolean = iostatistics.counters().isEmpty &&
39 | iostatistics.gauges().isEmpty &&
40 | iostatistics.maximums().isEmpty &&
41 | iostatistics.minimums().isEmpty &&
42 | iostatistics.meanStatistics().isEmpty
43 |
44 | override def copy(): AccumulatorV2[IOStatistics, IOStatisticsSnapshot] = {
45 | val newAcc = new IOStatisticsAccumulator()
46 | newAcc.add(this.iostatistics)
47 | newAcc
48 | }
49 |
50 | override def reset(): Unit = {
51 | iostatistics.clear()
52 | }
53 |
54 | override def add(v: IOStatistics): Unit = iostatistics.aggregate(v)
55 |
56 | override def merge(other: AccumulatorV2[IOStatistics, IOStatisticsSnapshot]): Unit =
57 | add(other.value)
58 |
59 | override def value: IOStatisticsSnapshot = iostatistics
60 |
61 | override def getIOStatistics: IOStatistics = iostatistics
62 |
63 | def register(name: String): Unit = {
64 | super.isRegistered
65 |
66 | }
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/utils/StatisticsTracker.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import scala.collection.JavaConverters._
21 |
22 | import org.apache.hadoop.fs.{FileSystem, StorageStatistics}
23 |
24 | import org.apache.spark.internal.Logging
25 |
26 | class StatisticsTracker(fs: FileSystem) extends Logging {
27 |
28 | private val start: StorageStatistics = fs.getStorageStatistics
29 |
30 | import StatisticsTracker._
31 |
32 | val original: Map[String, Long] = statsToMap(start)
33 |
34 | var updated: Map[String, Long] = Map()
35 |
36 | def update(): StatisticsTracker = {
37 | updated = statsToMap(fs.getStorageStatistics)
38 | this
39 | }
40 |
41 | /**
42 | * Build a diff from current to actual.
43 | * @return map of changed values only
44 | */
45 | def diff(): Map[String, Long] = {
46 | updated.map { case (name: String, value: Long) =>
47 | name -> (value - original.getOrElse(name, 0L))
48 | }.filter{tuple => tuple._2 != 0}
49 | }
50 |
51 | /**
52 | * Dump all changed values.
53 | * @param prefix prefix of a line
54 | * @param join join between values
55 | * @param suffix suffix each line
56 | * @param merge merge between lines
57 | * @return
58 | */
59 | def dump(prefix: String, join: String, suffix: String, merge: String): String = {
60 | diff.map { case (name, value) =>
61 | (prefix + name + join + value + suffix)
62 | }.mkString(merge)
63 |
64 | }
65 |
66 | def dump(): String = {
67 | fs.getUri + "\n" + dump(" [", " = ", "]", "\n")
68 | }
69 |
70 |
71 | }
72 |
73 | object StatisticsTracker {
74 |
75 | def statsToMap(stats: StorageStatistics): Map[String, Long] = {
76 |
77 | stats.getLongStatistics.asScala.map { s =>
78 | s.getName -> s.getValue
79 | }.toMap
80 |
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/commit/S3ACommitterFactorySuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.commit
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
21 | import org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory
22 | import org.apache.hadoop.fs.s3a.commit.magic.MagicS3GuardCommitterFactory
23 | import org.apache.hadoop.fs.s3a.commit.staging.{DirectoryStagingCommitterFactory, PartitionedStagingCommitterFactory}
24 |
25 | import org.apache.spark.SparkConf
26 |
27 | /**
28 | * Explicitly create the S3A committers; forces compile-time
29 | * validation that the factory classes are on the classpath,
30 | * along with any direct dependencies.
31 | */
32 | class S3ACommitterFactorySuite extends AbstractS3ACommitterSuite {
33 |
34 | init()
35 |
36 | def init(): Unit = {
37 | // propagate S3 credentials
38 | if (enabled) {
39 | initFS()
40 | }
41 | }
42 |
43 | /**
44 | * Override point for suites: a method which is called
45 | * in all the `newSparkConf()` methods.
46 | * This can be used to alter values for the configuration.
47 | * It is called before the configuration read in from the command line
48 | * is applied, so that tests can override the values applied in-code.
49 | *
50 | * @param sparkConf spark configuration to alter
51 | */
52 | override protected def addSuiteConfigurationOptions(sparkConf: SparkConf): Unit = {
53 | super.addSuiteConfigurationOptions(sparkConf)
54 | sparkConf.setAll(ObjectStoreConfigurations.COMMITTER_OPTIONS)
55 | }
56 |
57 | ctest("DirectoryStagingCommitterFactory on CP") {
58 | new DirectoryStagingCommitterFactory()
59 | }
60 |
61 | ctest("PartitionedStagingCommitterFactory on CP") {
62 | new PartitionedStagingCommitterFactory()
63 | }
64 |
65 | ctest("MagicS3GuardCommitterFactory on CP") {
66 | new MagicS3GuardCommitterFactory()
67 | }
68 |
69 | ctest("S3ACommitterFactory on CP") {
70 | new S3ACommitterFactory()
71 | }
72 |
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 |
2 | # Licensed under the Apache License, Version 2.0 (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # http://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software
9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | # log4j configuration used during build and unit tests
14 |
15 | log4j.rootLogger=INFO,stdout
16 | log4j.threshold=ALL
17 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
18 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
19 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n
20 |
21 | # ALWAYS leave this at debug, it's used to explore what's up with logging
22 | log4j.logger.com.cloudera.spark.test.loglevels=DEBUG
23 |
24 | # Spark commit protocol
25 | #log4j.logger.org.apache.spark.internal.io=DEBUG
26 | #log4j.logger.com.hortonworks.spark=DEBUG
27 |
28 | #log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
29 | log4j.logger.org.apache.hadoop.fs.s3a.S3ABlockOutputStream=INFO
30 | log4j.logger.org.apache.hadoop.fs.s3a.S3AStorageStatistics=INFO
31 | log4j.logger.org.apache.hadoop.fs.s3a.S3AUtils=INFO
32 | log4j.logger.org.apache.hadoop.mapreduce.lib.output.PathOutputCommitterFactory=DEBUG
33 | log4j.logger.org.apache.hadoop.fs.s3a.commit=DEBUG
34 | #log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
35 |
36 | log4j.logger.org.apache.spark.ContextCleaner=WARN
37 | log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN
38 | log4j.logger.org.apache.spark.sql.execution.FileSourceScanExec=WARN
39 | log4j.logger.org.apache.spark.storage=WARN
40 | log4j.logger.org.apache.spark.sql.catalyst=WARN
41 | log4j.logger.org.apache.spark.SecurityManager=WARN
42 | log4j.logger.org.apache.spark.sql.internal=WARN
43 | log4j.logger.org.apache.spark.scheduler=WARN
44 | log4j.logger.org.apache.spark.SparkEnv=WARN
45 | log4j.logger.org.apache.spark.executor.Executor=WARN
46 | log4j.logger.org.apache.spark.sql.execution.streaming.state=WARN
47 | log4j.logger.org.apache.hadoop.hive.ql.io.orc.RecordReaderFactory=WARN
48 |
49 |
50 | #log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=DEBUG
51 | #log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=DEBUG
52 |
53 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
54 | log4j.logger.org.mortbay.jetty=ERROR
55 | # disable deprecation noise
56 | log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=ERROR
57 |
58 | # turn off other logs which
59 | log4j.logger.org.eclipse.jetty=ERROR
60 | log4j.logger.org.spark_project.jetty=ERROR
61 | log4j.logger.org.apache.hadoop.mapreduce.lib.output.committer.manifest=DEBUG
62 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/commit/Events.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.commit
19 |
20 | import scala.collection.immutable
21 |
22 | /**
23 | * Case class for the dataframes
24 | */
25 | case class Event(
26 | year: Int, month: Int, day: Int, ymd: Int, monthname: String,
27 | datestr: String, value: String)
28 |
29 | object Events {
30 |
31 | /**
32 | * Build up an event sequence across years, every month in every
33 | * year has "rows" events generated.
34 | * @param year1 start year
35 | * @param year2 end year
36 | * @param startMonth start month
37 | * @param endMonth end month
38 | * @param rows rows per month
39 | * @return the event sequence.
40 | */
41 | def events(
42 | year1: Int,
43 | year2: Int,
44 | startMonth: Int,
45 | endMonth: Int,
46 | rows: Int): immutable.IndexedSeq[Event] = {
47 | for (year <- year1 to year2;
48 | month <- startMonth to endMonth;
49 | day <- 1 to Months(month - 1)._2;
50 | r <- 1 to rows)
51 | yield event(year,
52 | month,
53 | day,
54 | "%d/%04f".format(r, Math.random() * 10000))
55 | }
56 |
57 | def monthCount(
58 | year1: Int,
59 | year2: Int,
60 | startMonth: Int,
61 | endMonth: Int): Int = {
62 | var count = 0
63 | for (year <- year1 to year2;
64 | month <- startMonth to endMonth)
65 | count += 1
66 | count
67 | }
68 |
69 | /**
70 | * Create an event.
71 | *
72 | * @return the event.
73 | */
74 | def event(year: Int, month: Int, day: Int, value: String): Event = {
75 | new Event(year, month, day,
76 | day + month * 100 + year * 10000,
77 | Months(month - 1)._1,
78 | "%04d-%02d0-%02d".format(year, month, day),
79 | value
80 | )
81 | }
82 |
83 | val Months = Array(
84 | ("Jan", 31),
85 | ("Feb", 28),
86 | ("Mar", 31),
87 | ("Apr", 30),
88 | ("May", 31),
89 | ("Jun", 30),
90 | ("Jul", 31),
91 | ("Aug", 31),
92 | ("Sep", 30),
93 | ("Oct", 31),
94 | ("Nov", 30),
95 | ("Dec", 31))
96 |
97 | }
98 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/utils/HConf.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.utils
19 |
20 | import org.apache.spark.SparkConf
21 |
22 | /**
23 | * A minimal trait purely to set Hadoop configuration values in a Spark
24 | * Configuration.
25 | */
26 | trait HConf {
27 | /**
28 | * Set a Hadoop option in a spark configuration.
29 | *
30 | * @param sparkConf configuration to update
31 | * @param key key
32 | * @param value new value
33 | */
34 | def hconf(sparkConf: SparkConf, key: String, value: String): SparkConf = {
35 | sparkConf.set(hkey(key), value)
36 | sparkConf
37 | }
38 |
39 | /**
40 | * Set a Hadoop option in a spark configuration.
41 | *
42 | * @param sparkConf configuration to update
43 | * @param key key
44 | * @param value new value
45 | */
46 |
47 | def hconf(sparkConf: SparkConf, key: String, value: Boolean): SparkConf = {
48 | sparkConf.set(hkey(key), value.toString)
49 | sparkConf
50 | }
51 |
52 | /**
53 | * Take a Hadoop key, add the prefix to allow it to be added to
54 | * a Spark Config and then picked up properly later.
55 | *
56 | * @param key key
57 | * @return the new key
58 | */
59 | def hkey(key: String): String = {
60 | "spark.hadoop." + key
61 | }
62 |
63 | /**
64 | * Set a long hadoop option in a spark configuration.
65 | *
66 | * @param sparkConf configuration to update
67 | * @param key key
68 | * @param value new value
69 | */
70 | def hconf(sparkConf: SparkConf, key: String, value: Long): SparkConf = {
71 | sparkConf.set(hkey(key), value.toString)
72 | sparkConf
73 | }
74 |
75 | /**
76 | * Set all supplied options to the spark configuration as hadoop options.
77 | *
78 | * @param sparkConf Spark configuration to update
79 | * @param settings map of settings.
80 | */
81 | def hconf(sparkConf: SparkConf,
82 | settings: Traversable[(String, Object)]): SparkConf = {
83 | settings.foreach(e => hconf(sparkConf, e._1, e._2.toString))
84 | sparkConf
85 | }
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/org/apache/spark/sql/sources/HiveTestTrait.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.sources
19 |
20 | import java.io.File
21 |
22 | import com.cloudera.spark.cloud.ObjectStoreConfigurations
23 | import org.scalatest.BeforeAndAfterAll
24 |
25 | import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
26 | import org.apache.spark.sql.SparkSession
27 | import org.apache.spark.sql.hive.test.TestHiveContext
28 | import org.apache.spark.util.Utils
29 |
30 | /**
31 | * A trait for tests which bonds to a hive context
32 | * After all tests the hive context is reset then it and the spark session
33 | * closed.
34 | */
35 | trait HiveTestTrait extends SparkFunSuite with BeforeAndAfterAll {
36 | // override protected val enableAutoThreadAudit = false
37 | protected var hiveContext: HiveInstanceForTests = _
38 | protected var spark: SparkSession = _
39 |
40 |
41 | protected override def beforeAll(): Unit = {
42 | super.beforeAll()
43 | // set up spark and hive context
44 | hiveContext = new HiveInstanceForTests()
45 | spark = hiveContext.sparkSession
46 | }
47 |
48 | protected override def afterAll(): Unit = {
49 | try {
50 | SparkSession.clearActiveSession()
51 |
52 | if (hiveContext != null) {
53 | hiveContext.reset()
54 | hiveContext = null
55 | }
56 | if (spark != null) {
57 | spark.close()
58 | spark = null
59 | }
60 | } finally {
61 | super.afterAll()
62 | }
63 | }
64 |
65 | }
66 |
67 | class HiveInstanceForTests
68 | extends TestHiveContext(
69 | new SparkContext(
70 | System.getProperty("spark.sql.test.master", "local[1]"),
71 | "TestSQLContext",
72 | new SparkConf()
73 | .setAll(ObjectStoreConfigurations.RW_TEST_OPTIONS)
74 | .set("spark.sql.warehouse.dir",
75 | TestSetup.makeWarehouseDir().toURI.getPath)
76 | )
77 | ) {
78 |
79 | }
80 |
81 |
82 |
83 |
84 | object TestSetup {
85 |
86 | def makeWarehouseDir(): File = {
87 | val warehouseDir = Utils.createTempDir(namePrefix = "warehouse")
88 | warehouseDir.delete()
89 | warehouseDir
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/org/apache/spark/cloudera/statistics/IOStatisticsCollectorExecutorPlugin.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.cloudera.statistics
19 |
20 | import java.util
21 |
22 | import org.apache.hadoop.fs.statistics.IOStatisticsContext
23 |
24 | import org.apache.spark.{SparkContext, TaskContext, TaskFailedReason}
25 | import org.apache.spark.api.plugin.{ExecutorPlugin, PluginContext}
26 | import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
27 | import org.apache.spark.util.TaskCompletionListener
28 |
29 | class IOStatisticsCollectorExecutorPlugin extends ExecutorPlugin {
30 |
31 | var context: PluginContext = _
32 |
33 | override def init(
34 | ctx: PluginContext,
35 | extraConf: util.Map[String, String]): Unit = {
36 |
37 | context = ctx
38 | // somehow get the active spark context to register
39 | // the accumulator
40 | SparkContext.getOrCreate()
41 |
42 | }
43 | override def shutdown(): Unit = super.shutdown()
44 |
45 | override def onTaskStart(): Unit = {
46 | val iostatsCtx:IOStatisticsContext = IOStatisticsContext.getCurrentIOStatisticsContext
47 | iostatsCtx.reset;
48 | val acc = new IOStatisticsAccumulator
49 |
50 |
51 | val taskContext = TaskContext.get()
52 |
53 |
54 | taskContext.registerAccumulator(acc)
55 | taskContext.addTaskCompletionListener(new TaskCompleted(acc, iostatsCtx))
56 |
57 | }
58 |
59 | override def onTaskSucceeded(): Unit = super.onTaskSucceeded()
60 |
61 | override def onTaskFailed(
62 | failureReason: TaskFailedReason): Unit = super
63 | .onTaskFailed(failureReason)
64 |
65 | private class TaskCompleted(
66 | val acc: IOStatisticsAccumulator,
67 | val iostatsCtx: IOStatisticsContext) extends TaskCompletionListener {
68 |
69 | override def onTaskCompletion(context: TaskContext): Unit = {
70 | acc.add(iostatsCtx.getIOStatistics)
71 | }
72 |
73 | }
74 |
75 | private class SparkListenerImpl extends SparkListener {
76 | override def onJobStart(
77 | jobStart: SparkListenerJobStart): Unit = super
78 | .onJobStart(jobStart)
79 | }
80 | }
81 |
82 |
83 | object IOStatisticsCollectorExecutorPlugin {
84 | val ACCUMULATOR_NAME = "io_statistics"
85 | }
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/common/StoreTestHelper.scala:
--------------------------------------------------------------------------------
1 | package com.cloudera.spark.cloud.common
2 |
3 | /*
4 | * Licensed to the Apache Software Foundation (ASF) under one or more
5 | * contributor license agreements. See the NOTICE file distributed with
6 | * this work for additional information regarding copyright ownership.
7 | * The ASF licenses this file to You under the Apache License, Version 2.0
8 | * (the "License"); you may not use this file except in compliance with
9 | * the License. You may obtain a copy of the License at
10 | *
11 | * http://www.apache.org/licenses/LICENSE-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS,
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | * See the License for the specific language governing permissions and
17 | * limitations under the License.
18 | */
19 |
20 | import java.io.{File, FileNotFoundException}
21 |
22 | import com.cloudera.spark.cloud.s3.S3AConstants
23 | import com.cloudera.spark.cloud.CommitterBinding
24 | import org.apache.hadoop.conf.Configuration
25 |
26 | import org.apache.spark.internal.Logging
27 |
28 | /**
29 | * Instantiation of StoreTestHelper.
30 | */
31 | object StoreTestHelper extends StoreTestOperations
32 | with Logging
33 | with S3AConstants
34 | with CloudSuiteTrait {
35 |
36 | private var configLogged = false
37 |
38 | /**
39 | * Load the configuration file from the system property `SYSPROP_CLOUD_TEST_CONFIGURATION_FILE`.
40 | * Throws FileNotFoundException if a configuration is named but not present.
41 | *
42 | * @return the configuration
43 | */
44 | def loadConfiguration(): Configuration = {
45 | val config = new Configuration(true)
46 | getKnownSysprop(SYSPROP_CLOUD_TEST_CONFIGURATION_FILE).foreach { filename =>
47 | logDebug(s"Configuration property = `$filename`")
48 | val f = new File(filename)
49 | if (f.exists()) {
50 | // unsynced but its only a log statement
51 | if (configLogged) {
52 | configLogged = true
53 | logInfo(s"Loading configuration from $f")
54 | }
55 | config.addResource(f.toURI.toURL)
56 | } else {
57 | throw new FileNotFoundException(s"No file '$filename'" +
58 | s" declared in property $SYSPROP_CLOUD_TEST_CONFIGURATION_FILE")
59 | }
60 | }
61 | overlayConfiguration(
62 | config,
63 | Seq(
64 | HIVE_TESTS_DISABLED,
65 | REQUIRED_HADOOP_VERSION,
66 | SCALE_TEST_ENABLED,
67 | SCALE_TEST_SIZE_FACTOR,
68 | S3A_COMMITTER_TEST_ENABLED,
69 | S3A_ENCRYPTION_KEY_1,
70 | S3A_ENCRYPTION_KEY_2
71 | )
72 | )
73 |
74 | // setup the committer from any property passed in
75 | getKnownSysprop(S3A_COMMITTER_NAME).foreach(committer => {
76 | val binding = CommitterBinding.COMMITTERS_BY_NAME(committer.toLowerCase())
77 | binding.bind(config)
78 | logInfo(s"Using committer binding $binding")
79 | })
80 | config
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/FileGeneratorTests.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import com.cloudera.spark.cloud.operations.CloudFileGenerator
21 | import org.apache.hadoop.fs.Path
22 |
23 | import org.apache.spark.SparkConf
24 |
25 | /**
26 | * Test the `FileGenerator` entry point. Use a small file number to keep the unit tests fast; some
27 | * cloud infras are very slow here. System tests can use the CLI instead.
28 | */
29 | abstract class FileGeneratorTests extends CloudSuite {
30 |
31 | ctest("FileGenerator", "Execute the FileGenerator example") {
32 | val conf = newSparkConf()
33 | conf.setAppName("FileGenerator")
34 | val destDir = testPath(filesystem, "filegenerator")
35 | val months = 2
36 | val fileCount = 1
37 | val rowCount = 500
38 |
39 | assert(0 === generate(conf, destDir, months, fileCount, rowCount))
40 |
41 | val status = filesystem.getFileStatus(destDir)
42 | assert(status.isDirectory, s"Not a directory: $status")
43 |
44 | val totalExpectedFiles = months * fileCount
45 |
46 | // do a recursive listFiles
47 | val listing = logDuration("listFiles(recursive)") {
48 | listFiles(filesystem, destDir, true)
49 | }
50 | var recursivelyListedFilesDataset = 0L
51 | var recursivelyListedFiles = 0
52 | logDuration("scan result list") {
53 | listing.foreach { status =>
54 | recursivelyListedFiles += 1
55 | recursivelyListedFilesDataset += status.getLen
56 | logInfo(s"${status.getPath}[${status.getLen}]")
57 | }
58 | }
59 |
60 | logInfo(s"FileSystem $filesystem")
61 | assert(totalExpectedFiles === recursivelyListedFiles)
62 | }
63 |
64 | /**
65 | * Generate a set of files
66 | * @param conf configuration
67 | * @param destDir destination directory
68 | * @param monthCount number of months to generate
69 | * @param fileCount files per month
70 | * @param rowCount rows per file
71 | * @return the exit code of the operation
72 | */
73 | def generate(
74 | conf: SparkConf,
75 | destDir: Path,
76 | monthCount: Int,
77 | fileCount: Int,
78 | rowCount: Int): Int = {
79 | val result = new CloudFileGenerator().action(
80 | conf,
81 | Seq(destDir,
82 | monthCount,
83 | fileCount,
84 | rowCount))
85 | result
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/s3/audit/LogParser.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.audit
19 |
20 | import java.util.regex.Matcher
21 |
22 | import org.apache.hadoop.fs.s3a.audit.S3LogParser
23 | import org.apache.hadoop.fs.s3a.audit.S3LogParser._
24 |
25 |
26 | /**
27 | * Log parsing using s3a audit classes.
28 | */
29 | object LogParser {
30 |
31 | private val pattern = S3LogParser.LOG_ENTRY_PATTERN
32 |
33 | private def entry(matcher: Matcher, group: String): String = {
34 | val g = matcher.group(group)
35 | assert(g != null, s"Group $group is null")
36 | assert(!g.isEmpty, s"Group $group is empty")
37 | g
38 | }
39 |
40 | private def longEntry(m: Matcher, group: String): Long = {
41 | entry(m, group).toLong
42 | }
43 |
44 | /**
45 | * Parse a line.
46 | * @param line line
47 | * @return the entry or None if the regexp didn't match
48 | * @throws AssertionError if a group is null/empty
49 | */
50 | def parse(line: String): Option[ServerLogEntry] = {
51 | val m = pattern.matcher(line)
52 |
53 | if (m.matches()) {
54 | return None
55 | } else {
56 | Some(ServerLogEntry(
57 | bucketowner = entry(m, OWNER_GROUP),
58 | bucket_name = entry(m, BUCKET_GROUP),
59 | requestdatetime = entry(m, TIMESTAMP_GROUP),
60 | remoteip = entry(m, REMOTEIP_GROUP),
61 | requester = entry(m, REQUESTER_GROUP),
62 | requestid = entry(m, REQUESTID_GROUP),
63 | operation = entry(m, VERB_GROUP),
64 | key = entry(m, KEY_GROUP),
65 | request_uri = entry(m, REQUESTURI_GROUP),
66 | httpstatus = entry(m, HTTP_GROUP),
67 | errorcode = entry(m, AWSERRORCODE_GROUP),
68 | bytessent = longEntry(m, BYTESSENT_GROUP),
69 | objectsize = longEntry(m, OBJECTSIZE_GROUP),
70 | totaltime = entry(m, TOTALTIME_GROUP),
71 | turnaroundtime = entry(m, TURNAROUNDTIME_GROUP),
72 | referrer = entry(m, REFERRER_GROUP),
73 | useragent = entry(m, USERAGENT_GROUP),
74 | versionid = entry(m, VERSION_GROUP),
75 | hostid = entry(m, HOSTID_GROUP),
76 | sigv = entry(m, SIGV_GROUP),
77 | ciphersuite = entry(m, CYPHER_GROUP),
78 | authtype = entry(m, AUTH_GROUP),
79 | endpoint = entry(m, ENDPOINT_GROUP),
80 | tlsversion = entry(m, TLS_GROUP)))
81 | }
82 |
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/common/StoreTestOperations.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import scala.concurrent.duration._
21 | import scala.language.postfixOps
22 |
23 | import com.cloudera.spark.cloud.{GeneralCommitterConstants, ObjectStoreOperations}
24 | import org.apache.hadoop.conf.Configuration
25 | import org.apache.hadoop.fs.{FileStatus, FileSystem, LocatedFileStatus, Path}
26 | import org.scalatest.concurrent.Eventually
27 | import org.scalatest.time.Span
28 |
29 | import org.apache.spark.sql._
30 |
31 | /**
32 | * Extends ObjectStoreOperations with some extra ones for testing.
33 | */
34 | trait StoreTestOperations extends ObjectStoreOperations with Eventually {
35 |
36 | protected val retryTimeout: Span = 30 seconds
37 |
38 | protected val retryInterval: Span = 1000 milliseconds
39 |
40 | /**
41 | * Try to get the file status, _eventually_.
42 | *
43 | * @param fs filesystem
44 | * @param p path
45 | * @return the result
46 | */
47 | def eventuallyGetFileStatus(fs: FileSystem, p: Path): FileStatus = {
48 | fs.getFileStatus(p)
49 | }
50 |
51 | /**
52 | * findClass a DF and verify it has the expected number of rows
53 | *
54 | * @param spark session
55 | * @param fs filesystem
56 | * @param source path
57 | * @param srcFormat format of source
58 | * @param rowCount expected row caount
59 | * @return return how long it took
60 | */
61 | def validateRowCount(
62 | spark: SparkSession,
63 | fs: FileSystem,
64 | source: Path,
65 | srcFormat: String,
66 | rowCount: Long): Long = {
67 | val success = new Path(source, GeneralCommitterConstants.SUCCESS_FILE_NAME)
68 | val status = fs.getFileStatus(success)
69 | assert(status.isDirectory || status.getBlockSize > 0,
70 | s"Block size 0 in $status")
71 | val files = listFiles(fs, source, true).filter { st =>
72 | val name = st.getPath.getName
73 | st.isFile && !name.startsWith(".") && !name.startsWith("_")
74 | }
75 | assert(files.nonEmpty, s"No files in the directory $source")
76 | val (loadedCount, loadTime) = durationOf(loadDF(spark, source, srcFormat)
77 | .count())
78 | logInfo(s"Loaded $source in $loadTime nS")
79 | require(rowCount == loadedCount,
80 | s"Expected $rowCount rows, but got $loadedCount from $source formatted as $srcFormat")
81 | loadTime
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/s3/S3ATestSetup.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import java.net.URI
21 |
22 | import com.cloudera.spark.cloud.common.{CloudSuiteTrait, CsvDatasourceSupport}
23 | import org.apache.hadoop.conf.Configuration
24 | import org.apache.hadoop.fs.{FileSystem, Path}
25 |
26 | /**
27 | * Trait for S3A tests.
28 | */
29 | trait S3ATestSetup extends CloudSuiteTrait with RandomIOPolicy with
30 | CsvDatasourceSupport {
31 |
32 | override def enabled: Boolean = {
33 | getConf.getBoolean(S3A_TESTS_ENABLED, false) && super.enabled
34 |
35 | }
36 |
37 | /**
38 | * this is *not* true but here to make sure the tests
39 | * fail the way they are meant to.
40 | * @return true if the store committer expected to support dynamic override
41 | */
42 | override def dynamicPartitioning: Boolean = false
43 |
44 | def initFS(): FileSystem = {
45 | setupFilesystemConfiguration(getConf)
46 | createTestS3AFS
47 | }
48 |
49 | /**
50 | * do the work of setting up the S3Test FS
51 | * @return the filesystem
52 | */
53 | protected def createTestS3AFS: FileSystem = {
54 | val s3aURI = new URI(requiredOption(S3A_TEST_URI))
55 | logInfo(s"Executing S3 tests against $s3aURI with read policy $inputPolicy")
56 | createFilesystem(s3aURI)
57 | }
58 |
59 | /**
60 | * Override point: set up the configuration for the filesystem.
61 | * The base implementation sets up buffer directory, block size and IO Policy.
62 | * @param config configuration to set up
63 | */
64 | def setupFilesystemConfiguration(config: Configuration): Unit = {
65 | config.set(BUFFER_DIR, localTmpDir.getAbsolutePath)
66 | // a block size of 1MB
67 | config.set(BLOCK_SIZE, (1024 * 1024).toString)
68 | // the input policy
69 | config.set(INPUT_FADVISE, inputPolicy)
70 | }
71 |
72 | lazy val CSV_TESTFILE: Option[Path] = {
73 | val pathname = getConf.get(S3A_CSVFILE_PATH, S3A_CSV_PATH_DEFAULT)
74 | if (!pathname.isEmpty) Some(new Path(pathname)) else None
75 | }
76 |
77 | /**
78 | * Predicate to define whether or not there's a CSV file to work with.
79 | * @return true if the CSV test file is defined.
80 | */
81 | override def hasCSVTestFile(): Boolean = CSV_TESTFILE.isDefined
82 |
83 | /**
84 | * Path to the CSV file's original source
85 | *
86 | * @return a path
87 | */
88 | override def sourceCSVFilePath: Option[Path] = CSV_TESTFILE
89 | }
90 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/commit/S3ACommitDataframeSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3.commit
19 |
20 | import com.cloudera.spark.cloud.CommitterBinding._
21 | import com.cloudera.spark.cloud.committers.AbstractCommitDataframeSuite
22 | import com.cloudera.spark.cloud.s3.S3ATestSetup
23 | import com.cloudera.spark.cloud.CommitterInfo
24 | import org.apache.hadoop.fs.{FileSystem, Path}
25 |
26 | import org.apache.spark.sql.{Dataset, SparkSession}
27 | import org.apache.spark.SparkConf
28 |
29 | /**
30 | * Tests different data formats through the committers.
31 | */
32 | class S3ACommitDataframeSuite
33 | extends AbstractCommitDataframeSuite with S3ATestSetup {
34 |
35 | init()
36 |
37 | def init(): Unit = {
38 | // propagate S3 credentials
39 | if (enabled) {
40 | initFS()
41 | }
42 | }
43 |
44 | override def schema: String = "s3a"
45 |
46 |
47 | // there's an empty string at the end to aid with commenting out different
48 | // committers and not have to worry about any trailing commas
49 | override def committers: Seq[String] = Seq(
50 | DIRECTORY,
51 | PARTITIONED,
52 | MAGIC,
53 | ""
54 | )
55 |
56 |
57 | override protected def setDynamicPartitioningOptions(
58 | sparkConf: SparkConf,
59 | committerInfo: CommitterInfo): Unit = {
60 | if (committerInfo.name == PARTITIONED) {
61 | hconf(sparkConf, S3A_CONFLICT_MODE, CONFLICT_MODE_REPLACE)
62 | } else {
63 | super
64 | .setDynamicPartitioningOptions(sparkConf, committerInfo)
65 | }
66 | }
67 |
68 |
69 | override protected def expectDynamicPartitioningToSucceed(
70 | committerInfo: CommitterInfo): Boolean = {
71 | committerInfo.name == PARTITIONED
72 | }
73 |
74 | override def anyOtherTests(spark: SparkSession,
75 | filesystem: FileSystem,
76 | subdir: Path, format: String,
77 | sourceData: Dataset[Event],
78 | eventData2: Dataset[Event],
79 | committerInfo: CommitterInfo): Unit = {
80 | if (committerInfo.name == PARTITIONED) {
81 | logInfo("Executing partitioned committer tests")
82 | // although the dynamic command doesn't work,
83 | // a normal query will trigger overwrite
84 | logDuration(s"overwrite datset2 to $subdir in format $format") {
85 | eventData2
86 | .write
87 | .mode("overwrite")
88 | .partitionBy("year", "month")
89 | .format(format)
90 | .save(subdir.toString)
91 | }
92 | }
93 |
94 |
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/spark-cloud-integration/src/main/scala/com/cloudera/spark/cloud/CommitterBinding.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud
19 |
20 | import com.cloudera.spark.cloud.GeneralCommitterConstants.{ABFS_MANIFEST_COMMITTER_FACTORY, DEFAULT_COMMITTER_FACTORY, MANIFEST_COMMITTER_FACTORY, MANIFEST_COMMITTER_NAME}
21 | import org.apache.hadoop.fs.s3a.commit.CommitConstants
22 |
23 | /**
24 | * Constants related to the S3A committers.
25 | * Originally a copy & paste of the java values, it's now just a reference,
26 | * though retained to reserve the option of moving back to copied values.
27 | */
28 | object CommitterBinding {
29 |
30 | def factoryForSchema(s: String): String =
31 | String.format(
32 | GeneralCommitterConstants.OUTPUTCOMMITTER_FACTORY_SCHEME_PATTERN,
33 | s)
34 |
35 |
36 | val S3A_SCHEME_COMMITTER_FACTORY: String = factoryForSchema("s3a")
37 | val STAGING_PACKAGE = "org.apache.hadoop.fs.s3a.commit.staging."
38 | val S3A_COMMITTER_FACTORY: String = CommitConstants.S3A_COMMITTER_FACTORY
39 |
40 | val S3A_COMMITTER_NAME: String = CommitConstants.FS_S3A_COMMITTER_NAME
41 |
42 | val MAGIC = "magic"
43 | val STAGING = "staging"
44 | val DIRECTORY = "directory"
45 | val PARTITIONED = "partitioned"
46 | val MANIFEST = "manifest"
47 | val MANIFEST_ABFS = "manifest_abfs"
48 | val FILE = "file"
49 |
50 | val S3A_CONFLICT_MODE: String =
51 | CommitConstants.FS_S3A_COMMITTER_STAGING_CONFLICT_MODE
52 |
53 | /** Conflict mode */
54 | val CONFLICT_MODE_FAIL: String = "fail"
55 |
56 | val CONFLICT_MODE_APPEND: String = "append"
57 |
58 | val CONFLICT_MODE_REPLACE: String = "replace"
59 |
60 | /**
61 | * Committer name to: name in _SUCCESS, factory classname, requires consistent FS.
62 | *
63 | * If the first field is "", it means "this committer doesn't put its name into
64 | * the success file (or that it isn't actually created).
65 | */
66 | val COMMITTERS_BY_NAME: Map[String, CommitterInfo] = Map(
67 | MAGIC -> CommitterInfo(MAGIC, S3A_COMMITTER_FACTORY),
68 | STAGING -> CommitterInfo(STAGING, S3A_COMMITTER_FACTORY),
69 | DIRECTORY -> CommitterInfo(DIRECTORY, S3A_COMMITTER_FACTORY),
70 | PARTITIONED -> CommitterInfo(PARTITIONED, S3A_COMMITTER_FACTORY),
71 | MANIFEST -> CommitterInfo(MANIFEST_COMMITTER_NAME,
72 | MANIFEST_COMMITTER_FACTORY),
73 | MANIFEST_ABFS -> CommitterInfo(MANIFEST_COMMITTER_NAME,
74 | ABFS_MANIFEST_COMMITTER_FACTORY),
75 | FILE -> CommitterInfo("", DEFAULT_COMMITTER_FACTORY)
76 | )
77 |
78 | }
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/common/CloudSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import java.io.{File, FileNotFoundException}
21 |
22 | import com.cloudera.spark.cloud.s3.S3AConstants
23 | import com.cloudera.spark.cloud.CommitterBinding
24 | import org.apache.hadoop.conf.Configuration
25 | import org.scalatest.concurrent.Eventually
26 | import org.scalatest.BeforeAndAfter
27 |
28 | import org.apache.spark.{LocalSparkContext, SparkFunSuite}
29 | import org.apache.spark.internal.Logging
30 |
31 | /**
32 | * A cloud suite.
33 | * Adds automatic loading of a Hadoop configuration file with login credentials and
34 | * options to enable/disable tests, and a mechanism to conditionally declare tests
35 | * based on these details
36 | */
37 | abstract class CloudSuite extends ContextFreeCloudSuite
38 | with LocalSparkContext {
39 | }
40 |
41 | object CloudSuite extends Logging with S3AConstants
42 | with CloudSuiteTrait {
43 |
44 | private var configLogged = false
45 |
46 | /**
47 | * Load the configuration file from the system property `SYSPROP_CLOUD_TEST_CONFIGURATION_FILE`.
48 | * Throws FileNotFoundException if a configuration is named but not present.
49 | * @return the configuration
50 | */
51 | def loadConfiguration(): Configuration = {
52 | val config = new Configuration(true)
53 | getKnownSysprop(SYSPROP_CLOUD_TEST_CONFIGURATION_FILE).foreach { filename =>
54 | logDebug(s"Configuration property = `$filename`")
55 | val f = new File(filename)
56 | if (f.exists()) {
57 | // unsynced but its only a log statement
58 | if (configLogged) {
59 | configLogged = true
60 | logInfo(s"Loading configuration from $f")
61 | }
62 | config.addResource(f.toURI.toURL)
63 | } else {
64 | throw new FileNotFoundException(s"No file '$filename'" +
65 | s" declared in property $SYSPROP_CLOUD_TEST_CONFIGURATION_FILE")
66 | }
67 | }
68 | overlayConfiguration(
69 | config,
70 | Seq(
71 | HIVE_TESTS_DISABLED,
72 | REQUIRED_HADOOP_VERSION,
73 | SCALE_TEST_ENABLED,
74 | SCALE_TEST_SIZE_FACTOR,
75 | S3A_CLIENT_FACTORY_IMPL,
76 | S3A_COMMITTER_TEST_ENABLED,
77 | S3A_ENCRYPTION_KEY_1,
78 | S3A_ENCRYPTION_KEY_2
79 | )
80 | )
81 |
82 | // setup the committer from any property passed in
83 | getKnownSysprop(S3A_COMMITTER_NAME).foreach(committer => {
84 | val binding = CommitterBinding.COMMITTERS_BY_NAME(committer.toLowerCase())
85 | binding.bind(config)
86 | logInfo(s"Using committer binding $binding")
87 | })
88 | config
89 | }
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3AEncryptionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import com.cloudera.spark.cloud.common.CloudSuite
21 | import org.apache.hadoop.conf.Configuration
22 | import org.apache.hadoop.fs._
23 |
24 | /**
25 | * A suite of tests working with encryption.
26 | * Needs multiple encryption keys to work with.
27 | */
28 | class S3AEncryptionSuite extends CloudSuite with S3ATestSetup {
29 |
30 | override def enabled: Boolean = {
31 | val conf = getConf
32 | super.enabled && hasConf(conf, S3A_ENCRYPTION_KEY_1) &&
33 | hasConf(conf, S3A_ENCRYPTION_KEY_2)
34 | }
35 |
36 | init()
37 |
38 | def init(): Unit = {
39 | if (enabled) {
40 | initFS()
41 | }
42 | }
43 |
44 | override def setupFilesystemConfiguration(config: Configuration): Unit = {
45 | super.setupFilesystemConfiguration(config)
46 | config.set(SERVER_SIDE_ENCRYPTION_ALGORITHM, SSE_KMS)
47 | config.set(SERVER_SIDE_ENCRYPTION_KEY, config.getTrimmed(S3A_ENCRYPTION_KEY_1))
48 | }
49 |
50 | /**
51 | * Create an FS with key2
52 | */
53 | def createKey2FS(): FileSystem = {
54 | val config = getConf
55 | config.set(SERVER_SIDE_ENCRYPTION_ALGORITHM, SSE_KMS)
56 | config.set(SERVER_SIDE_ENCRYPTION_KEY, config.getTrimmed(S3A_ENCRYPTION_KEY_2))
57 | FileSystem.newInstance(filesystemURI, config)
58 | }
59 |
60 | /**
61 | * Create an FS with key2
62 | */
63 | def createUnencryptedFS(): FileSystem = {
64 | val config = getConf
65 | config.unset(SERVER_SIDE_ENCRYPTION_ALGORITHM)
66 | FileSystem.newInstance(filesystemURI, config)
67 | }
68 |
69 | ctest("TwoKeys", "read and write with two keys") {
70 | val key1 = filesystem.getConf.get(SERVER_SIDE_ENCRYPTION_KEY)
71 | logInfo(s"Test key 1 = $key1")
72 |
73 | val dir = path("TwoKeys")
74 | val key1File = new Path(dir, "key1")
75 | val hello: String = "hello"
76 | write(filesystem, key1File, hello)
77 |
78 | val fs2 = createKey2FS()
79 | val key2 = fs2.getConf.get(SERVER_SIDE_ENCRYPTION_KEY)
80 | logInfo(s"Test key 2 = $key2")
81 | assert( key1 != key2, "same key is used for both filesystems")
82 |
83 | val status = fs2.getFileStatus(key1File)
84 | assert( hello.length === status.getLen, s"wrong length in $status")
85 |
86 | fs2.listStatus(dir)
87 | val data = read(fs2, key1File, 128)
88 | assert (hello.length === data.length)
89 | assert (hello === data)
90 |
91 | val unencryptedFS = createUnencryptedFS()
92 | val dataUnencrypted = read(unencryptedFS, key1File, 128)
93 | assert(hello === dataUnencrypted)
94 |
95 | unencryptedFS.delete(key1File, false)
96 | fs2.delete(dir, true)
97 |
98 | }
99 |
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/s3/S3ALineCountWritebackSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.s3
19 |
20 | import scala.concurrent.duration._
21 | import scala.language.postfixOps
22 |
23 | import com.cloudera.spark.cloud.common.CloudSuiteWithCSVDatasource
24 | import org.apache.hadoop.fs.{FileStatus, Path}
25 |
26 | /**
27 | * Test the `S3LineCount` entry point.
28 | */
29 | class S3ALineCountWritebackSuite extends CloudSuiteWithCSVDatasource with S3ATestSetup {
30 |
31 | init()
32 |
33 | def init(): Unit = {
34 | // propagate S3 credentials
35 | if (enabled) {
36 | initFS()
37 | }
38 | }
39 |
40 | override def enabled: Boolean = super.enabled && hasCSVTestFile
41 |
42 | override def cleanFSInTeardownEnabled: Boolean = true
43 |
44 | after {
45 | cleanFilesystemInTeardown()
46 | }
47 |
48 | ctest("LineCountWriteback",
49 | "Execute the LineCount example with the results written back to the test filesystem.") {
50 | val sourceFile = getTestCSVPath()
51 | val sourceFS = sourceFile.getFileSystem(getConf)
52 | val sourceInfo = sourceFS.getFileStatus(sourceFile)
53 | val sparkConf = newSparkConf()
54 | sparkConf.setAppName("LineCount")
55 | val destDir = testPath(filesystem, "LineCountWriteback")
56 | assert(0 === S3ALineCount.action(sparkConf,
57 | Array(sourceFile.toString, destDir.toString)))
58 |
59 |
60 | val status = filesystem.getFileStatus(destDir)
61 | assert(status.isDirectory, s"Not a directory: $status")
62 |
63 | // only a small fraction of the source data is needed
64 | val expectedLen = sourceInfo.getLen / 1024
65 |
66 | def validateChildSize(qualifier: String, files: Seq[FileStatus]) = {
67 | val (filenames, size) = enumFileSize(destDir, files)
68 | logInfo(s"total size of $qualifier = $size bytes from ${files.length} files: $filenames")
69 | assert(size >= expectedLen, s"$qualifier size $size in files $filenames" +
70 | s" smaller than exoected length $expectedLen")
71 | }
72 |
73 | val stdInterval = interval(100 milliseconds)
74 | val appId = eventually(timeout(20 seconds), stdInterval) {
75 | validateChildSize("descendants",
76 | listFiles(filesystem, destDir, true)
77 | .filter(f => f.getPath.getName != "_SUCCESS"))
78 |
79 | validateChildSize("children",
80 | filesystem.listStatus(destDir,
81 | pathFilter(p => p.getName != "_SUCCESS")).toSeq)
82 | }
83 | }
84 |
85 | private def enumFileSize(destDir: Path, files: Seq[FileStatus]): (String, Long) = {
86 | assert(files.nonEmpty, s"No files in destination directory $destDir")
87 | var size = 0L
88 | val filenames = new StringBuffer()
89 | files.foreach { f =>
90 | size += f.getLen
91 | filenames.append(" ").append(f.getPath)
92 | }
93 | (filenames.toString, size)
94 | }
95 |
96 | }
97 |
--------------------------------------------------------------------------------
/cloud-examples/src/main/scala/com/cloudera/spark/cloud/examples/AzureStreamingExample.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.examples
19 |
20 | import com.cloudera.spark.cloud.ObjectStoreExample
21 | import org.apache.hadoop.fs.Path
22 |
23 | import org.apache.spark.SparkConf
24 | import org.apache.spark.streaming.{Seconds, StreamingContext}
25 |
26 | /**
27 | * Simple example of streaming on Azure.
28 | */
29 | class AzureStreamingExample extends ObjectStoreExample {
30 |
31 | /**
32 | * List of the command args for the current example.
33 | * @return a string
34 | */
35 | override protected def usageArgs(): String = {
36 | " "
37 | }
38 |
39 | /**
40 | * Action to execute.
41 | *
42 | * @param sparkConf configuration to use
43 | * @param args argument array
44 | * @return an exit code
45 | */
46 | override def action(
47 | sparkConf: SparkConf,
48 | args: Array[String]): Int = {
49 | if (args.length != 3) {
50 | return usage()
51 | }
52 | sparkConf.setAppName("CloudStreaming")
53 | applyObjectStoreConfigurationOptions(sparkConf, false)
54 | val dest = args(0)
55 | val delay = Integer.valueOf(args(1))
56 | val interval = Integer.valueOf(args(2))
57 |
58 | // Create the context
59 | val streaming = new StreamingContext(sparkConf, Seconds(10))
60 |
61 | try {
62 | // Create the FileInputDStream on the directory regexp and use the
63 | // stream to look for a new file renamed into it
64 | val destPath = new Path(dest)
65 | val sc = streaming.sparkContext
66 | val hc = sc.hadoopConfiguration
67 |
68 | val fs = destPath.getFileSystem(hc)
69 | rm(fs, destPath)
70 | fs.mkdirs(destPath)
71 |
72 | val sightings = sc.longAccumulator("sightings")
73 |
74 | print("===================================")
75 | print(s"Looking for text files under ${destPath}")
76 | print("===================================")
77 |
78 | val lines = streaming.textFileStream(dest)
79 |
80 | val matches = lines.map(line => {
81 | sightings.add(1)
82 | print(s"[${sightings.value}]: $line")
83 | line
84 | })
85 |
86 | // materialize the operation
87 | matches.print()
88 |
89 | // start the streaming
90 | streaming.start()
91 |
92 | // sleep a bit to get streaming up and running
93 | Thread.sleep(delay * 1000)
94 | print("===================================")
95 | print(s"Seen ${sightings.value} lines")
96 | 0
97 | } finally {
98 | streaming.stop(true)
99 | }
100 | }
101 |
102 | }
103 |
104 | object AzureStreamingExample {
105 |
106 | def main(args: Array[String]) {
107 | new AzureStreamingExample().run(args)
108 | }
109 | }
110 |
111 |
112 |
--------------------------------------------------------------------------------
/cloud-examples/src/test/scala/com/cloudera/spark/cloud/common/SeekReadTests.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.cloudera.spark.cloud.common
19 |
20 | import org.apache.hadoop.fs.FileSystem
21 |
22 | /**
23 | * Tests reading in the CSV file using sequential and Random IO.
24 | */
25 | class SeekReadTests extends CloudSuiteWithCSVDatasource {
26 |
27 | override def enabled: Boolean = super.enabled && hasCSVTestFile
28 |
29 |
30 | ctest("SeekReadFully",
31 | """Assess cost of seek and read operations.
32 | | When moving the cursor in an input stream, an HTTP connection may be closed and
33 | | then re-opened. This can be very expensive; tactics like streaming forwards instead
34 | | of seeking, and/or postponing movement until the following read ('lazy seek') try
35 | | to address this. Logging these operation times helps track performance.
36 | | This test also tries to catch out a regression, where a `close()` operation
37 | | is implemented through reading through the entire input stream. This is exhibited
38 | | in the time to `close()` while at offset 0 being `O(len(file))`.
39 | |
40 | | Note also the cost of `readFully()`; this method call is common inside libraries
41 | | like Orc and Parquet.""".stripMargin) {
42 | val (source, fs) = getCSVSourceAndFileSystem()
43 | FileSystem.clearStatistics
44 | fs.getStorageStatistics.reset()
45 | val st = logDuration("stat") {
46 | fs.getFileStatus(source)
47 | }
48 | val in = logDuration("open") {
49 | fs.open(source)
50 | }
51 | def time[T](operation: String)(testFun: => T): T = {
52 | logInfo(s"")
53 | var r = logDuration(operation + s" [pos = ${in.getPos}]")(testFun)
54 | logInfo(s" ${in.getWrappedStream}")
55 | r
56 | }
57 |
58 | val eof = st.getLen
59 |
60 | time("read()") {
61 | assert(-1 !== in.read())
62 | }
63 | time("seek(256)") {
64 | in.seek(256)
65 | }
66 | time("seek(256)") {
67 | in.seek(256)
68 | }
69 | time("seek(EOF-2)") {
70 | in.seek(eof - 2)
71 | }
72 | time("read()") {
73 | assert(-1 !== in.read())
74 | }
75 |
76 | def readFully(offset: Long, len: Int): Unit = {
77 | time(s"readFully($offset, byte[$len])") {
78 | val bytes = new Array[Byte](len)
79 | assert(-1 !== in.readFully(offset, bytes))
80 | }
81 | }
82 | readFully(1L, 1)
83 | readFully(1L, 256)
84 | readFully(eof - 350, 300)
85 | readFully(260L, 256)
86 | readFully(1024L, 256)
87 | readFully(1536L, 256)
88 | readFully(8192L, 1024)
89 | readFully(8192L + 1024 + 512, 1024)
90 | readFully(0L, 1024)
91 | readFully(eof - 1024, 1024)
92 |
93 | time("seek(getPos)") {
94 | in.seek(in.getPos())
95 | }
96 | time("read()") {
97 | assert(-1 !== in.read())
98 | }
99 | logDuration("close()") {
100 | in.close
101 | }
102 | dumpFileSystemStatistics(fs.getStorageStatistics)
103 |
104 | }
105 |
106 | }
107 |
--------------------------------------------------------------------------------