@@ -78,21 +78,22 @@ object CopybookParser {
7878 /**
7979 * Tokenizes a Cobol Copybook contents and returns the AST.
8080 *
81- * @param dataEncoding Encoding of the data file (either ASCII/EBCDIC). The encoding of the copybook is expected to be ASCII.
82- * @param copyBookContents A string containing all lines of a copybook
83- * @param dropGroupFillers Drop groups marked as fillers from the output AST
84- * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
85- * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
86- * resolving segment redefines.
87- * @param fieldParentMap A segment fields parent mapping
88- * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
89- * @param commentPolicy Specifies a policy for comments truncation inside a copybook
90- * @param ebcdicCodePage A code page for EBCDIC encoded data
91- * @param asciiCharset A charset for ASCII encoded data
92- * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
93- * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
94- * @param nonTerminals A list of non-terminals that should be extracted as strings
95- * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
81+ * @param dataEncoding Encoding of the data file (either ASCII/EBCDIC). The encoding of the copybook is expected to be ASCII.
82+ * @param copyBookContents A string containing all lines of a copybook
83+ * @param dropGroupFillers Drop groups marked as fillers from the output AST
84+ * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
85+ * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
86+ * resolving segment redefines.
87+ * @param fieldParentMap A segment fields parent mapping
88+ * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
89+ * @param improvedNullDetection If true, string values that contain only zero bytes (0x0) will be considered null.
90+ * @param commentPolicy Specifies a policy for comments truncation inside a copybook
91+ * @param ebcdicCodePage A code page for EBCDIC encoded data
92+ * @param asciiCharset A charset for ASCII encoded data
93+ * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
94+ * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
95+ * @param nonTerminals A list of non-terminals that should be extracted as strings
96+ * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
9697 * @return Seq[Group] where a group is a record inside the copybook
9798 */
9899 def parse (copyBookContents : String ,
@@ -103,6 +104,7 @@ object CopybookParser {
103104 fieldParentMap : Map [String , String ] = HashMap [String , String ](),
104105 stringTrimmingPolicy : StringTrimmingPolicy = StringTrimmingPolicy .TrimBoth ,
105106 commentPolicy : CommentPolicy = CommentPolicy (),
107+ improvedNullDetection : Boolean = false ,
106108 ebcdicCodePage : CodePage = new CodePageCommon ,
107109 asciiCharset : Charset = StandardCharsets .US_ASCII ,
108110 isUtf16BigEndian : Boolean = true ,
@@ -118,6 +120,7 @@ object CopybookParser {
118120 fieldParentMap,
119121 stringTrimmingPolicy,
120122 commentPolicy,
123+ improvedNullDetection,
121124 ebcdicCodePage,
122125 asciiCharset,
123126 isUtf16BigEndian,
@@ -130,19 +133,20 @@ object CopybookParser {
130133 /**
131134 * Tokenizes a Cobol Copybook contents and returns the AST.
132135 *
133- * @param copyBookContents A string containing all lines of a copybook
134- * @param dropGroupFillers Drop groups marked as fillers from the output AST
135- * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
136- * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
137- * @param fieldParentMap A segment fields parent mapping
138- * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
139- * @param commentPolicy Specifies a policy for comments truncation inside a copybook
140- * @param ebcdicCodePage A code page for EBCDIC encoded data
141- * @param asciiCharset A charset for ASCII encoded data
142- * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
143- * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
144- * @param nonTerminals A list of non-terminals that should be extracted as strings
145- * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
136+ * @param copyBookContents A string containing all lines of a copybook
137+ * @param dropGroupFillers Drop groups marked as fillers from the output AST
138+ * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
139+ * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
140+ * @param fieldParentMap A segment fields parent mapping
141+ * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
142+ * @param commentPolicy Specifies a policy for comments truncation inside a copybook
143+ * @param improvedNullDetection If true, string values that contain only zero bytes (0x0) will be considered null.
144+ * @param ebcdicCodePage A code page for EBCDIC encoded data
145+ * @param asciiCharset A charset for ASCII encoded data
146+ * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
147+ * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
148+ * @param nonTerminals A list of non-terminals that should be extracted as strings
149+ * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
146150 * @return Seq[Group] where a group is a record inside the copybook
147151 */
148152 def parseTree (copyBookContents : String ,
@@ -152,6 +156,7 @@ object CopybookParser {
152156 fieldParentMap : Map [String , String ] = HashMap [String , String ](),
153157 stringTrimmingPolicy : StringTrimmingPolicy = StringTrimmingPolicy .TrimBoth ,
154158 commentPolicy : CommentPolicy = CommentPolicy (),
159+ improvedNullDetection : Boolean = false ,
155160 ebcdicCodePage : CodePage = new CodePageCommon ,
156161 asciiCharset : Charset = StandardCharsets .US_ASCII ,
157162 isUtf16BigEndian : Boolean = true ,
@@ -167,6 +172,7 @@ object CopybookParser {
167172 fieldParentMap,
168173 stringTrimmingPolicy,
169174 commentPolicy,
175+ improvedNullDetection,
170176 ebcdicCodePage,
171177 asciiCharset,
172178 isUtf16BigEndian,
@@ -179,21 +185,22 @@ object CopybookParser {
179185 /**
180186 * Tokenizes a Cobol Copybook contents and returns the AST.
181187 *
182- * @param enc Encoding of the data file (either ASCII/EBCDIC). The encoding of the copybook is expected to be ASCII.
183- * @param copyBookContents A string containing all lines of a copybook
184- * @param dropGroupFillers Drop groups marked as fillers from the output AST
185- * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
186- * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
187- * resolving segment redefines.
188- * @param fieldParentMap A segment fields parent mapping
189- * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
190- * @param commentPolicy Specifies a policy for comments truncation inside a copybook
191- * @param ebcdicCodePage A code page for EBCDIC encoded data
192- * @param asciiCharset A charset for ASCII encoded data
193- * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
194- * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
195- * @param nonTerminals A list of non-terminals that should be extracted as strings
196- * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
188+ * @param enc Encoding of the data file (either ASCII/EBCDIC). The encoding of the copybook is expected to be ASCII.
189+ * @param copyBookContents A string containing all lines of a copybook
190+ * @param dropGroupFillers Drop groups marked as fillers from the output AST
191+ * @param dropValueFillers Drop primitive fields marked as fillers from the output AST
192+ * @param segmentRedefines A list of redefined fields that correspond to various segments. This needs to be specified for automatically
193+ * resolving segment redefines.
194+ * @param fieldParentMap A segment fields parent mapping
195+ * @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed
196+ * @param commentPolicy Specifies a policy for comments truncation inside a copybook
197+ * @param improvedNullDetection If true, string values that contain only zero bytes (0x0) will be considered null.
198+ * @param ebcdicCodePage A code page for EBCDIC encoded data
199+ * @param asciiCharset A charset for ASCII encoded data
200+ * @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
201+ * @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
202+ * @param nonTerminals A list of non-terminals that should be extracted as strings
203+ * @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
197204 * @return Seq[Group] where a group is a record inside the copybook
198205 */
199206 @ throws(classOf [SyntaxErrorException ])
@@ -205,6 +212,7 @@ object CopybookParser {
205212 fieldParentMap : Map [String , String ],
206213 stringTrimmingPolicy : StringTrimmingPolicy ,
207214 commentPolicy : CommentPolicy ,
215+ improvedNullDetection : Boolean ,
208216 ebcdicCodePage : CodePage ,
209217 asciiCharset : Charset ,
210218 isUtf16BigEndian : Boolean ,
@@ -213,7 +221,7 @@ object CopybookParser {
213221 occursHandlers : Map [String , Map [String , Int ]],
214222 debugFieldsPolicy : DebugFieldsPolicy ): Copybook = {
215223
216- val schemaANTLR : CopybookAST = ANTLRParser .parse(copyBookContents, enc, stringTrimmingPolicy, commentPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)
224+ val schemaANTLR : CopybookAST = ANTLRParser .parse(copyBookContents, enc, stringTrimmingPolicy, commentPolicy, improvedNullDetection, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)
217225
218226 val nonTerms : Set [String ] = (for (id <- nonTerminals)
219227 yield transformIdentifier(id)
@@ -232,7 +240,7 @@ object CopybookParser {
232240 processGroupFillers(
233241 markDependeeFields(
234242 addNonTerminals(
235- calculateBinaryProperties(schemaANTLR), nonTerms, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat),
243+ calculateBinaryProperties(schemaANTLR), nonTerms, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, improvedNullDetection ),
236244 occursHandlers
237245 ), dropValueFillers
238246 ), dropGroupFillers, dropValueFillers
@@ -249,7 +257,7 @@ object CopybookParser {
249257 renameGroupFillers(
250258 markDependeeFields(
251259 addNonTerminals(
252- calculateBinaryProperties(schemaANTLR), nonTerms, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat),
260+ calculateBinaryProperties(schemaANTLR), nonTerms, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, improvedNullDetection ),
253261 occursHandlers
254262 ),
255263 dropGroupFillers, dropValueFillers
@@ -267,7 +275,8 @@ object CopybookParser {
267275 ebcdicCodePage : CodePage ,
268276 asciiCharset : Charset ,
269277 isUtf16BigEndian : Boolean ,
270- floatingPointFormat : FloatingPointFormat
278+ floatingPointFormat : FloatingPointFormat ,
279+ improvedNullDetection : Boolean
271280 ): CopybookAST = {
272281
273282 def getNonTerminalName (name : String , parent : Group ): String = {
@@ -292,11 +301,11 @@ object CopybookParser {
292301 case g : Group =>
293302 if (nonTerminals contains g.name) {
294303 newChildren.append(
295- addNonTerminals(g, nonTerminals, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat).copy(isRedefined = true )(g.parent)
304+ addNonTerminals(g, nonTerminals, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, improvedNullDetection ).copy(isRedefined = true )(g.parent)
296305 )
297306 val sz = g.binaryProperties.actualSize
298307 val dataType = AlphaNumeric (s " X( $sz) " , sz, enc = Some (enc))
299- val decode = DecoderSelector .getDecoder(dataType, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)
308+ val decode = DecoderSelector .getDecoder(dataType, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, improvedNullDetection )
300309 val newName = getNonTerminalName(g.name, g.parent.get)
301310 newChildren.append(
302311 Primitive (
@@ -310,7 +319,7 @@ object CopybookParser {
310319 }
311320 else
312321 newChildren.append(
313- addNonTerminals(g, nonTerminals, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)
322+ addNonTerminals(g, nonTerminals, enc, stringTrimmingPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, improvedNullDetection )
314323 )
315324 }
316325 }
@@ -840,7 +849,7 @@ object CopybookParser {
840849 * <li>Remove all groups that don't have child nodes.</li>
841850 * </ul>
842851 *
843- * @param ast An AST as a set of copybook records
852+ * @param ast An AST as a set of copybook records
844853 * @param dropValueFillers is there intention to drop primitive fields fillers
845854 * @return The same AST with group fillers processed
846855 */
@@ -919,8 +928,8 @@ object CopybookParser {
919928 val newGrp = processGroup(grp)
920929 newChildren += newGrp
921930 case st : Primitive =>
922- newChildren += st.withUpdatedIsRedefined(newIsRedefined = true )
923- newChildren += getDebugField(st)
931+ newChildren += st.withUpdatedIsRedefined(newIsRedefined = true )
932+ newChildren += getDebugField(st)
924933 }
925934 group.withUpdatedChildren(newChildren)
926935 }
0 commit comments