From f2c9544b1ffcd0142cc198a69a939139ec30cb27 Mon Sep 17 00:00:00 2001
From: Ruslan Iushchenko <yruslan@gmail.com>
Date: Sat, 23 Mar 2024 12:07:02 +0100
Subject: [PATCH] #659 Fix record length option when record id generation is
 turned on.

---
 .../cobol/reader/VarLenNestedReader.scala     |   9 +-
 .../parameters/CobolParametersParser.scala    |   2 +-
 .../regression/Test22RecordLengthGenId.scala  | 102 ++++++++++++++++++
 3 files changed, 107 insertions(+), 6 deletions(-)
 create mode 100644 spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test22RecordLengthGenId.scala

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala
index 41e13e49..a40edab6 100644
--- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala
+++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala
@@ -17,16 +17,13 @@
 package za.co.absa.cobrix.cobol.reader
 
 import za.co.absa.cobrix.cobol.internal.Logging
-
-import java.nio.charset.{Charset, StandardCharsets}
 import za.co.absa.cobrix.cobol.parser.common.Constants
 import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
 import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC}
 import za.co.absa.cobrix.cobol.parser.headerparsers.{RecordHeaderParser, RecordHeaderParserFactory}
-import za.co.absa.cobrix.cobol.parser.policies.FillerNamingPolicy
 import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat.{FixedBlock, VariableBlock}
 import za.co.absa.cobrix.cobol.parser.{Copybook, CopybookParser}
-import za.co.absa.cobrix.cobol.reader.extractors.raw.{FixedBlockParameters, FixedBlockRawRecordExtractor, RawRecordContext, RawRecordExtractor, RawRecordExtractorFactory, TextFullRecordExtractor, TextRecordExtractor, VarOccursRecordExtractor, VariableBlockVariableRecordExtractor}
+import za.co.absa.cobrix.cobol.reader.extractors.raw._
 import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler
 import za.co.absa.cobrix.cobol.reader.index.IndexGenerator
 import za.co.absa.cobrix.cobol.reader.index.entry.SparseIndexEntry
@@ -37,6 +34,7 @@ import za.co.absa.cobrix.cobol.reader.schema.CobolSchema
 import za.co.absa.cobrix.cobol.reader.stream.SimpleStream
 import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator
 
+import java.nio.charset.{Charset, StandardCharsets}
 import scala.collection.immutable.HashMap
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
@@ -317,8 +315,9 @@ class VarLenNestedReader[T: ClassTag](copybookContents: Seq[String],
       }
     } else {
       // Fixed record length record parser
+      val recordSize = readerProperties.recordLength.getOrElse(cobolSchema.getRecordSize)
       RecordHeaderParserFactory.createRecordHeaderParser(Constants.RhRdwFixedLength,
-        cobolSchema.getRecordSize,
+        recordSize,
         readerProperties.fileStartOffset,
         readerProperties.fileEndOffset,
         0
diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala
index 818a31cc..fe835aea 100644
--- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala
+++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala
@@ -492,7 +492,7 @@ object CobolParametersParser extends Logging {
         logger.warn(s"Option '$PARAM_BLOCK_LENGTH' is ignored for record format: VB")
       }
       if (recordFormat == FixedBlock && bdw.recordsPerBlock.nonEmpty) {
-        logger.warn(s"Option '$PARAM_RECORDS_PER_BLOCK' is ignored for record format: VB")
+        logger.warn(s"Option '$PARAM_RECORDS_PER_BLOCK' is ignored for record format: F")
       }
       Some(bdw)
     } else {
diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test22RecordLengthGenId.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test22RecordLengthGenId.scala
new file mode 100644
index 00000000..6ff6c022
--- /dev/null
+++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/regression/Test22RecordLengthGenId.scala
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.spark.cobol.source.regression
+
+import org.scalatest.wordspec.AnyWordSpec
+import org.slf4j.{Logger, LoggerFactory}
+import za.co.absa.cobrix.spark.cobol.source.base.{SimpleComparisonBase, SparkTestBase}
+import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture
+import za.co.absa.cobrix.spark.cobol.utils.SparkUtils
+
+class Test22RecordLengthGenId extends AnyWordSpec with SparkTestBase with BinaryFileFixture with SimpleComparisonBase {
+
+  private implicit val logger: Logger = LoggerFactory.getLogger(this.getClass)
+
+  private val copybook =
+    """         01  R.
+           05  A    PIC 9(1).
+           05  B    PIC X(2).
+    """
+
+  val binFileContents: Array[Byte] = Array[Byte](
+    // 123{456}
+    0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xC0.toByte,
+    // 789J123A
+    0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0xD1.toByte,
+    // 65432101
+    0xF6.toByte, 0xF5.toByte, 0xF4.toByte, 0xF3.toByte
+  )
+
+  "EBCDIC files" should {
+    "correctly work without record it generation" in {
+      withTempBinFile("sign_overpunch", ".dat", binFileContents) { tmpFileName =>
+        val df = spark
+          .read
+          .format("cobol")
+          .option("copybook_contents", copybook)
+          .option("record_format", "F")
+          .option("record_length", "4")
+          .option("pedantic", "true")
+          .load(tmpFileName)
+
+        val expected = """[{"A":1,"B":"23"},{"A":7,"B":"89"},{"A":6,"B":"54"}]"""
+
+        val actual = df.toJSON.collect().mkString("[", ",", "]")
+
+        assertEqualsMultiline(actual, expected)
+      }
+    }
+
+    "correctly work with record it generation" in {
+      withTempBinFile("sign_overpunch", ".dat", binFileContents) { tmpFileName =>
+        val df = spark
+          .read
+          .format("cobol")
+          .option("copybook_contents", copybook)
+          .option("record_format", "F")
+          .option("record_length", "4")
+          .option("generate_record_id", "true")
+          .option("pedantic", "true")
+          .load(tmpFileName)
+
+        val expected = """[ {
+          |  "File_Id" : 0,
+          |  "Record_Id" : 0,
+          |  "Record_Byte_Length" : 4,
+          |  "A" : 1,
+          |  "B" : "23"
+          |}, {
+          |  "File_Id" : 0,
+          |  "Record_Id" : 1,
+          |  "Record_Byte_Length" : 4,
+          |  "A" : 7,
+          |  "B" : "89"
+          |}, {
+          |  "File_Id" : 0,
+          |  "Record_Id" : 2,
+          |  "Record_Byte_Length" : 4,
+          |  "A" : 6,
+          |  "B" : "54"
+          |} ]""".stripMargin
+
+        val actual = SparkUtils.convertDataFrameToPrettyJSON(df)
+
+        assertEqualsMultiline(actual, expected)
+      }
+    }
+  }
+}