diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt b/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt index 2aabedcdf942f2..77a0d5449e4422 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt +++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt @@ -94,6 +94,16 @@ function is_macos() { [[ "${OSTYPE}" =~ darwin* ]] } +function available_utf8_locale() { + # Both C.UTF-8 and en_US.UTF-8 do not cause any language-specific effects + # when set as LC_CTYPE, but neither is certain to exist on all systems. + if [[ $(LC_CTYPE=C.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then + echo "C.UTF-8" + elif [[ $(LC_CTYPE=en_US.UTF-8 locale charmap 2>/dev/null) == "UTF-8" ]]; then + echo "en_US.UTF-8" + fi +} + # Parse arguments sequentially until the first unrecognized arg is encountered. # Scan the remaining args for --wrapper_script_flag=X options and process them. ARGS=() @@ -362,6 +372,17 @@ if [ -z "$CLASSPATH_LIMIT" ]; then is_windows && CLASSPATH_LIMIT=7000 || CLASSPATH_LIMIT=120000 fi +# On non-macOS Unix, without any locale variable set, the JVM would use +# using ASCII rather than UTF-8 as the encoding for file system paths. +if ! is_macos; then + if [ -z ${LC_CTYPE+x} ] && [ -z ${LC_ALL+x} ] && [ -z ${LANG+x} ]; then + UTF8_LOCALE=$(available_utf8_locale) + if [[ -n "$UTF8_LOCALE" ]]; then + export LC_CTYPE="$UTF8_LOCALE" + fi + fi +fi + if (("${#CLASSPATH}" > ${CLASSPATH_LIMIT})); then export JACOCO_IS_JAR_WRAPPED=1 create_and_run_classpath_jar diff --git a/src/test/shell/bazel/unicode_filenames_test.sh b/src/test/shell/bazel/unicode_filenames_test.sh index 7e38cca62e2bac..75ce8ffd7587a4 100755 --- a/src/test/shell/bazel/unicode_filenames_test.sh +++ b/src/test/shell/bazel/unicode_filenames_test.sh @@ -206,4 +206,35 @@ function test_utf8_source_artifact_in_bep() { expect_log '"name":"pkg/srcs/ünïcödë fïlë.txt"' } +function test_utf8_filename_in_java_test() { + # Intentionally do not check for available locales: Either C.UTF_8 or + # en_US.UTF-8 should exist on all CI machines - if not, we want to learn about + # this so that the Java stub template can be adapted accordingly. + + touch WORKSPACE + mkdir pkg + + cat >pkg/BUILD <<'EOF' +java_test( + name = "Test", + srcs = ["Test.java"], + main_class = "Test", + use_testrunner = False, +) +EOF + + cat >pkg/Test.java <<'EOF' +import java.nio.file.Files; +import java.io.IOException; + +class Test { + public static void main(String[] args) throws IOException { + Files.createTempFile("æøå", null); + } +} +EOF + + bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass" +} + run_suite "Tests for handling of Unicode filenames"