adaltas#58: skip default row delimiters when quoted

elsbree · Jan 26, 2017 · 3a36065 · 3a36065
1 parent ec818a7
commit 3a36065
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 82 deletions.
diff --git a/lib/index.js b/lib/index.js
diff --git a/src/index.coffee.md b/src/index.coffee.md
@@ -243,6 +243,8 @@ Implementation of the [`stream.Transform` API][transform]
         if not end
           remainingBuffer = chars.substr(i, l - i)
           break if (
+            (not @options.rowDelimiter and i + 3 > l) or
+            # (i+1000 >= l) or
             # Skip if the remaining buffer can be comment
             (not @_.commenting and l - i < @options.comment.length and @options.comment.substr(0, l - i) is remainingBuffer) or
             # Skip if the remaining buffer can be row delimiter
@@ -258,16 +260,17 @@ Implementation of the [`stream.Transform` API][transform]
         @_.nextChar = if l > i + 1 then chars.charAt(i + 1) else ''
         @_.rawBuf += char if @options.raw
         # Auto discovery of rowDelimiter, unix, mac and windows supported
-        unless @options.rowDelimiter?
+        if not @options.rowDelimiter?
+          nextCharPos = i
+          rowDelimiter = null
           # First empty line
-          if (not @_.quoting) and (char is '\n' or char is '\r')
+          if not @_.quoting and (char is '\n' or char is '\r')
             rowDelimiter = char
-            nextCharPos = i+1
-          else if @_.nextChar is '\n' or @_.nextChar is '\r'
+            nextCharPos += 1
+          else if not (not @_.quoting and char is @options.quote) and (@_.nextChar is '\n' or @_.nextChar is '\r')
             rowDelimiter = @_.nextChar
-            nextCharPos = i+2
-            if @raw
-              rawBuf += @_.nextChar
+            nextCharPos += 2
+            rawBuf += @_.nextChar if @raw
           if rowDelimiter
             rowDelimiter += '\n' if rowDelimiter is '\r' and chars.charAt(nextCharPos) is '\n'
             @options.rowDelimiter = [rowDelimiter]

diff --git a/test/api.write.coffee b/test/api.write.coffee
@@ -56,7 +56,7 @@ describe 'write', ->
 
   it 'instantly emits data once a newline is retrieved', (next) ->
     data = []
-    parser = parse()
+    parser = parse rowDelimiter: '\n'
     parser.on 'data', (data) ->
       data.should.eql ['A', 'B', 'C']
       parser.end()

diff --git a/test/options.rowDelimiter.coffee b/test/options.rowDelimiter.coffee
@@ -5,11 +5,6 @@ parse = require '../src'
 
 describe 'rowDelimiter', ->
 
-  it 'No rows', (next) ->
-    parse "", (err, data) ->
-      data.should.eql [] unless err
-      next err
-
   it 'Test line breaks custom when rowDelimiter is a string', (next) ->
     parse """
     ABC,45::DEF,23
@@ -139,51 +134,6 @@ describe 'rowDelimiter', ->
     parser.write ':GHI,94\r'
     parser.write '\nJKL,02'
     parser.end()
-
-  it 'handle chunks in autodiscovery', (next) ->
-    data = []
-    parser = parse()
-    parser.on 'readable', ->
-      while d = parser.read()
-        data.push d
-    parser.on 'finish', ->
-      data.should.eql [
-        [ 'ABC','45' ]
-        [ 'DEF','23' ]
-        [ 'GHI','94' ]
-        [ 'JKL','02' ]
-      ]
-      next()
-    parser.write '"ABC","45"'
-    parser.write '\n"DEF","23"\n'
-    parser.write '"GHI","94"\n'
-    parser.write '"JKL","02"'
-    parser.end()
-
-  it 'write aggressively', (next) ->
-    data = []
-    parser = parse()
-    parser.on 'readable', ->
-      while(d = parser.read())
-        data.push d
-    parser.on 'finish', ->
-      data.should.eql [
-        [ 'abc', '123' ]
-        [ 'def', '456' ]
-      ]
-      next()
-    parser.write 'abc,123'
-    parser.write '\n'
-    parser.write 'def,456'
-    parser.end()
-
-  it 'Test line ends with field delimiter and without row delimiter', (next) ->
-    parse '"a","b","c",', delimiter: ',', (err, data) ->
-      return next err if err
-      data.should.eql [
-        [ 'a','b','c','' ]
-      ]
-      next()
 
   it 'If the rowDelimiter(string) does not match from the csv data, parsing should terminate with appropriate error message when the data read is more than the value set for max_limit_on_data_read', (next) ->
     parse """
@@ -208,23 +158,85 @@ describe 'rowDelimiter', ->
       err.message.should.eql 'Row delimiter not found in the file ["\\t"]'
       should(data).not.be.ok()
       next()
+
+  describe 'auto', ->
+
+    it 'No rows', (next) ->
+      parse "", (err, data) ->
+        data.should.eql [] unless err
+        next err
 
-  it 'ensure autodiscovery support chunck between lines', (next) ->
-    data = []
-    parser = parse()
-    parser.on 'readable', ->
-      while d = parser.read()
-        data.push d
-    parser.on 'finish', ->
-      data.should.eql [
-        [ 'ABC','45' ]
-        [ 'DEF','23' ]
-        [ 'GHI','94' ]
-        [ 'JKL','02' ]
-      ]
-      next()
-    parser.write 'ABC,45'
-    parser.write '\r\nDEF,23\r'
-    parser.write '\nGHI,94\r\n'
-    parser.write 'JKL,02\r\n'
-    parser.end()
+    it 'handle chunks in autodiscovery', (next) ->
+      data = []
+      parser = parse()
+      parser.on 'readable', ->
+        while d = parser.read()
+          data.push d
+      parser.on 'finish', ->
+        data.should.eql [
+          [ 'ABC','45' ]
+          [ 'DEF','23' ]
+          [ 'GHI','94' ]
+          [ 'JKL','02' ]
+        ]
+        next()
+      parser.write '"ABC","45"'
+      parser.write '\n"DEF","23"\n'
+      parser.write '"GHI","94"\n'
+      parser.write '"JKL","02"'
+      parser.end()
+
+    it 'write aggressively', (next) ->
+      data = []
+      parser = parse()
+      parser.on 'readable', ->
+        while(d = parser.read())
+          data.push d
+      parser.on 'finish', ->
+        data.should.eql [
+          [ 'abc', '123' ]
+          [ 'def', '456' ]
+        ]
+        next()
+      parser.write 'abc,123'
+      parser.write '\n'
+      parser.write 'def,456'
+      parser.end()
+
+    it 'Test line ends with field delimiter and without row delimiter', (next) ->
+      parse '"a","b","c",', delimiter: ',', (err, data) ->
+        return next err if err
+        data.should.eql [
+          [ 'a','b','c','' ]
+        ]
+        next()
+
+    it 'ensure autodiscovery support chunck between lines', (next) ->
+      data = []
+      parser = parse()
+      parser.on 'readable', ->
+        while d = parser.read()
+          data.push d
+      parser.on 'finish', ->
+        data.should.eql [
+          [ 'ABC','45' ]
+          [ 'DEF','23' ]
+          [ 'GHI','94' ]
+          [ 'JKL','02' ]
+        ]
+        next()
+      parser.write 'ABC,45'
+      parser.write '\r\nDEF,23\r'
+      parser.write '\nGHI,94\r\n'
+      parser.write 'JKL,02\r\n'
+      parser.end()
+
+    it 'skip default row delimiters when quoted', (next) ->
+      parser = parse (err, data) -> # rowDelimiter: '\r\n', 
+        data.should.eql [
+          ['1', '2', '\n']
+          ['3', '4', '']
+        ] unless err
+        next err
+      parser.write c for c in '1,2,"\n"\r\n3,4,'
+      parser.end()