Skip to content

Commit

Permalink
[SPARK-39102][CORE][SQL][DSTREAM] Add checkstyle rules to disabled us…
Browse files Browse the repository at this point in the history
…e of Guava's `Files.createTempDir()`

### What changes were proposed in this pull request?
The main change of this pr as follows:

- Add a checkstyle to `scalastyle-config.xml` to disabled use of Guava's `Files.createTempDir()` for Scala
- Add a checkstyle to `dev/checkstyle.xml` to disabled use of Guava's `Files.createTempDir()` for Java
- Introduce `JavaUtils.createTempDir()` method to replace the use of Guava's `Files.createTempDir()` in Java code
- Use `Utils.createTempDir()` to replace the use of Guava's `Files.createTempDir()` in Scala code

### Why are the changes needed?
Avoid the use of Guava's `Files.createTempDir()` in Spark code due to [CVE-2020-8908](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8908)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GA

Closes apache#36529 from LuciferYang/SPARK-39102.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
  • Loading branch information
LuciferYang authored and srowen committed May 17, 2022
1 parent b4c0196 commit 6d74557
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Locale;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -362,6 +364,60 @@ public static byte[] bufferToArray(ByteBuffer buffer) {
}
}

/**
* Create a temporary directory inside `java.io.tmpdir` with default namePrefix "spark".
* The directory will be automatically deleted when the VM shuts down.
*/
public static File createTempDir() throws IOException {
return createTempDir(System.getProperty("java.io.tmpdir"), "spark");
}

/**
* Create a temporary directory inside the given parent directory. The directory will be
* automatically deleted when the VM shuts down.
*/
public static File createTempDir(String root, String namePrefix) throws IOException {
if (root == null) root = System.getProperty("java.io.tmpdir");
if (namePrefix == null) namePrefix = "spark";
File dir = createDirectory(root, namePrefix);
dir.deleteOnExit();
return dir;
}

/**
* Create a directory inside the given parent directory with default namePrefix "spark".
* The directory is guaranteed to be newly created, and is not marked for automatic deletion.
*/
public static File createDirectory(String root) throws IOException {
return createDirectory(root, "spark");
}

/**
* Create a directory inside the given parent directory. The directory is guaranteed to be
* newly created, and is not marked for automatic deletion.
*/
public static File createDirectory(String root, String namePrefix) throws IOException {
if (namePrefix == null) namePrefix = "spark";
int attempts = 0;
int maxAttempts = 10;
File dir = null;
while (dir == null) {
attempts += 1;
if (attempts > maxAttempts) {
throw new IOException("Failed to create a temp directory (under " + root + ") after " +
maxAttempts + " attempts!");
}
try {
dir = new File(root, namePrefix + "-" + UUID.randomUUID());
Files.createDirectories(dir.toPath());
} catch (IOException | SecurityException e) {
logger.error("Failed to create directory " + dir, e);
dir = null;
}
}
return dir.getCanonicalFile();
}

/**
* Fills a buffer with data read from the channel.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import java.nio.ByteBuffer;
import java.util.Random;

import com.google.common.io.Files;

import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NioManagedBuffer;
Expand All @@ -51,7 +49,7 @@ private static ByteBuffer createBuffer(int bufSize) {
}

StreamTestHelper() throws Exception {
tempDir = Files.createTempDir();
tempDir = JavaUtils.createTempDir();
emptyBuffer = createBuffer(0);
smallBuffer = createBuffer(100);
largeBuffer = createBuffer(100000);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.network.util;

import java.io.File;
import java.io.IOException;

import org.junit.Test;

import static org.junit.Assert.*;

public class JavaUtilsSuite {

@Test
public void testCreateDirectory() throws IOException {
File tmpDir = new File(System.getProperty("java.io.tmpdir"));
File testDir = new File(tmpDir, "createDirectory" + System.nanoTime());
String testDirPath = testDir.getCanonicalPath();

// 1. Directory created successfully
assertTrue(JavaUtils.createDirectory(testDirPath, "scenario1").exists());

// 2. Illegal file path
StringBuilder namePrefix = new StringBuilder();
for (int i = 0; i < 256; i++) {
namePrefix.append("scenario2");
}
assertThrows(IOException.class,
() -> JavaUtils.createDirectory(testDirPath, namePrefix.toString()));

// 3. The parent directory cannot read
assertTrue(testDir.canRead());
assertTrue(testDir.setReadable(false));
assertTrue(JavaUtils.createDirectory(testDirPath, "scenario3").exists());
assertTrue(testDir.setReadable(true));

// 4. The parent directory cannot write
assertTrue(testDir.canWrite());
assertTrue(testDir.setWritable(false));
assertThrows(IOException.class,
() -> JavaUtils.createDirectory(testDirPath, "scenario4"));
assertTrue(testDir.setWritable(true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.codahale.metrics.Metric;
import com.codahale.metrics.Timer;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import org.junit.Before;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
Expand Down Expand Up @@ -60,6 +59,7 @@
import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
import org.apache.spark.network.shuffle.protocol.StreamHandle;
import org.apache.spark.network.shuffle.protocol.UploadBlock;
import org.apache.spark.network.util.JavaUtils;

public class ExternalBlockHandlerSuite {
TransportClient client = mock(TransportClient.class);
Expand Down Expand Up @@ -126,7 +126,7 @@ private void checkDiagnosisResult(
int reduceId = 0;

// prepare the checksum file
File tmpDir = Files.createTempDir();
File tmpDir = JavaUtils.createTempDir();
File checksumFile = new File(tmpDir,
"shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".checksum." + algorithm);
DataOutputStream out = new DataOutputStream(new FileOutputStream(checksumFile));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.io.OutputStream;

import com.google.common.io.Closeables;
import com.google.common.io.Files;

import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
import org.apache.spark.network.util.JavaUtils;
Expand All @@ -47,9 +46,9 @@ public TestShuffleDataContext(int numLocalDirs, int subDirsPerLocalDir) {
this.subDirsPerLocalDir = subDirsPerLocalDir;
}

public void create() {
public void create() throws IOException {
for (int i = 0; i < localDirs.length; i ++) {
localDirs[i] = Files.createTempDir().getAbsolutePath();
localDirs[i] = JavaUtils.createTempDir().getAbsolutePath();

for (int p = 0; p < subDirsPerLocalDir; p ++) {
new File(localDirs[i], String.format("%02x", p)).mkdirs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
package test.org.apache.spark;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;

import org.apache.spark.network.util.JavaUtils;
import scala.Tuple2;

import com.google.common.collect.Iterables;
import com.google.common.io.Files;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
Expand Down Expand Up @@ -244,8 +245,8 @@ public void mapPartitions() {
}

@Test
public void sequenceFile() {
File tempDir = Files.createTempDir();
public void sequenceFile() throws IOException {
File tempDir = JavaUtils.createTempDir();
tempDir.deleteOnExit();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
Expand Down
5 changes: 3 additions & 2 deletions core/src/test/java/test/org/apache/spark/JavaAPISuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.TaskContext$;
import org.apache.spark.network.util.JavaUtils;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
Expand Down Expand Up @@ -90,9 +91,9 @@ public class JavaAPISuite implements Serializable {
private transient File tempDir;

@Before
public void setUp() {
public void setUp() throws IOException {
sc = new JavaSparkContext("local", "JavaAPISuite");
tempDir = Files.createTempDir();
tempDir = JavaUtils.createTempDir();
tempDir.deleteOnExit();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.apache.ivy.core.settings.IvySettings

import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString}
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
import org.apache.spark.util.Utils

private[deploy] object IvyTestUtils {

Expand Down Expand Up @@ -294,7 +295,7 @@ private[deploy] object IvyTestUtils {
withPython: Boolean = false,
withR: Boolean = false): File = {
// Where the root of the repository exists, and what Ivy will search in
val tempPath = tempDir.getOrElse(Files.createTempDir())
val tempPath = tempDir.getOrElse(Utils.createTempDir())
// Create directory if it doesn't exist
Files.createParentDirs(tempPath)
// Where to create temporary class files and such
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import java.util.zip.ZipFile
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

import com.google.common.io.Files
import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfterEach

Expand Down Expand Up @@ -144,7 +143,7 @@ class RPackageUtilsSuite
}

test("SparkR zipping works properly") {
val tempDir = Files.createTempDir()
val tempDir = Utils.createTempDir()
Utils.tryWithSafeFinally {
IvyTestUtils.writeFile(tempDir, "test.R", "abc")
val fakeSparkRDir = new File(tempDir, "SparkR")
Expand Down
6 changes: 6 additions & 0 deletions dev/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@
<property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
<property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
</module>
<module name="RegexpSinglelineJava">
<property name="format" value="Files\.createTempDir\("/>
<property name="message"
value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
</module>
<module name="IllegalImport">
<property name="illegalPkgs" value="org.apache.log4j" />
</module>
Expand Down
7 changes: 7 additions & 0 deletions scalastyle-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -430,4 +430,11 @@ This file is divided into 3 sections:
<parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
<customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
</check>

<check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
<customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
Use org.apache.spark.util.Utils.createTempDir instead.
</customMessage>
</check>
</scalastyle>
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.JavaCheckpointTestUtils;
import org.apache.spark.streaming.JavaTestUtils;
Expand Down Expand Up @@ -1463,7 +1464,7 @@ public void testLeftOuterJoin() {
}

@Test
public void testCheckpointMasterRecovery() throws InterruptedException {
public void testCheckpointMasterRecovery() throws InterruptedException, IOException {
List<List<String>> inputData = Arrays.asList(
Arrays.asList("this", "is"),
Arrays.asList("a", "test"),
Expand All @@ -1475,7 +1476,7 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
Arrays.asList(1,4),
Arrays.asList(8,7));

File tempDir = Files.createTempDir();
File tempDir = JavaUtils.createTempDir();
tempDir.deleteOnExit();
ssc.checkpoint(tempDir.getAbsolutePath());

Expand All @@ -1498,15 +1499,15 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
}

@Test
public void testContextGetOrCreate() throws InterruptedException {
public void testContextGetOrCreate() throws IOException {
ssc.stop();

SparkConf conf = new SparkConf()
.setMaster("local[2]")
.setAppName("test")
.set("newContext", "true");

File emptyDir = Files.createTempDir();
File emptyDir = JavaUtils.createTempDir();
emptyDir.deleteOnExit();
StreamingContextSuite contextSuite = new StreamingContextSuite();
String corruptedCheckpointDir = contextSuite.createCorruptedCheckpoint();
Expand Down

0 comments on commit 6d74557

Please sign in to comment.