From d1d17255794d7193cdd0cf400cd5c0451ddc487f Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Fri, 1 Apr 2022 17:32:08 +0200 Subject: [PATCH] Default to a UTF-8 locale in Java stub template On non-macOS Unix, without any locale variable set, the OpenJDK defaults to using ASCII rather than UTF-8 as the encoding for file system paths (i.e., the value of the `sun.jnu.encoding` property). --- .../bazel/rules/java/java_stub_template.txt | 9 +++++ .../shell/bazel/unicode_filenames_test.sh | 39 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt b/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt index 2aabedcdf942f2..873f99e3c82dbd 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt +++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/java/java_stub_template.txt @@ -362,6 +362,15 @@ if [ -z "$CLASSPATH_LIMIT" ]; then is_windows && CLASSPATH_LIMIT=7000 || CLASSPATH_LIMIT=120000 fi +# On non-macOS Unix, without any locale variable set, the OpenJDK defaults to +# using ASCII rather than UTF-8 as the encoding for file system paths (i.e., the +# value of the sun.jnu.encoding property). +if ! is_macos && ! is_windows && [ -z "$LC_ALL" ] && [ -z "$LC_CTYPE" ] && [ -z "$LANG" ]; then + if locale -a | grep -q C.UTF-8; then + export LC_CTYPE=C.UTF-8 + fi +fi + if (("${#CLASSPATH}" > ${CLASSPATH_LIMIT})); then export JACOCO_IS_JAR_WRAPPED=1 create_and_run_classpath_jar diff --git a/src/test/shell/bazel/unicode_filenames_test.sh b/src/test/shell/bazel/unicode_filenames_test.sh index 7e38cca62e2bac..0bd0a331db9702 100755 --- a/src/test/shell/bazel/unicode_filenames_test.sh +++ b/src/test/shell/bazel/unicode_filenames_test.sh @@ -100,6 +100,11 @@ function has_utf8_locale() { [[ "${charmap}" == "UTF-8" ]] } +function has_c_utf8_locale() { + charmap="$(LC_ALL=C.UTF-8 locale charmap 2>/dev/null)" + [[ "${charmap}" == "UTF-8" ]] +} + function test_utf8_source_artifact() { # Bazel relies on the JVM for filename encoding, and can only support # UTF-8 if either a UTF-8 or ISO-8859-1 locale is available. @@ -206,4 +211,38 @@ function test_utf8_source_artifact_in_bep() { expect_log '"name":"pkg/srcs/ünïcödë fïlë.txt"' } +function test_utf8_filename_in_java_test() { + if ! has_c_utf8_locale; then + echo "Skipping test (no C.UTF-8 locale)." + echo "Available locales (need C.UTF-8):" + locale -a + return + fi + + touch WORKSPACE + mkdir pkg + + cat >pkg/BUILD <<'EOF' +java_test( + name = "Test", + srcs = ["Test.java"], + main_class = "Test", + use_testrunner = False, +) +EOF + + cat >pkg/Test.java <<'EOF' +import java.nio.file.Files; +import java.io.IOException; + +class Test { + public static void main(String[] args) throws IOException { + Files.createTempFile("æøå", null); + } +} +EOF + + bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass" +} + run_suite "Tests for handling of Unicode filenames"