Skip to content

Commit 5088a47

Browse files
authored
Merge pull request #1486 from bonjune/csv-schema-multiline
2 parents bbe54ed + 21c5f26 commit 5088a47

2 files changed

Lines changed: 38 additions & 9 deletions

File tree

src/FSharp.Data.Csv.Core/CsvInference.fs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module FSharp.Data.Runtime.CsvInference
33

44
open System
55
open System.IO
6+
open System.Text
67
open System.Text.RegularExpressions
78
open FSharp.Data
89
open FSharp.Data.Runtime
@@ -145,28 +146,55 @@ let internal parseHeaders headers numberOfColumns schema unitsOfMeasureProvider
145146
header)
146147
| None -> Array.init numberOfColumns (fun i -> "Column" + (i + 1).ToString())
147148

149+
let readSchema (reader: StringReader) =
150+
let schemas = ResizeArray<string>()
151+
let chars = StringBuilder()
152+
153+
let (|Comma|_|) chr = if char chr = ',' then Some() else None
154+
let (|Quote|_|) chr = if char chr = '"' then Some() else None
155+
let (|Char|) c = char c
156+
157+
let rec iter () =
158+
match reader.Read() with
159+
| -1 ->
160+
schemas.Add(chars.ToString())
161+
()
162+
// Skips quote character ('"')
163+
| Quote -> iter ()
164+
// At comma(,), commits the current characters in the builder
165+
| Comma ->
166+
schemas.Add(chars.ToString())
167+
chars.Clear() |> ignore
168+
iter ()
169+
// Skips CR/LF characters
170+
| Char '\r'
171+
| Char '\n' -> iter ()
172+
| Char c ->
173+
chars.Append(c) |> ignore
174+
iter ()
175+
176+
iter ()
177+
schemas
178+
148179
// If the schema is specified explicitly, then parse the schema
149180
// (This can specify just types, names of columns or a mix of both)
150181
let schema =
151182
if String.IsNullOrWhiteSpace schema then
152183
Array.zeroCreate headers.Length
153184
else
154-
use reader = new StringReader(schema.Replace("\n", ""))
185+
use reader = new StringReader(schema)
155186

156-
let schemaStr =
157-
CsvReader.readCsvFile reader "," '"'
158-
|> Seq.exactlyOne
159-
|> fst
187+
let schemaStr = readSchema reader
160188

161-
if schemaStr.Length > headers.Length then
189+
if schemaStr.Count > headers.Length then
162190
failwithf
163191
"The provided schema contains %d columns, the inference found %d columns - please check the number of columns and the separator "
164-
schemaStr.Length
192+
schemaStr.Count
165193
headers.Length
166194

167195
let schema = Array.zeroCreate headers.Length
168196

169-
for index = 0 to schemaStr.Length - 1 do
197+
for index = 0 to schemaStr.Count - 1 do
170198
let item = schemaStr.[index].Trim()
171199

172200
match item with

tests/FSharp.Data.Tests/CsvProvider.fs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,5 +670,6 @@ let ``Can infer from a multiline schema`` () =
670670
ProductQuantity (string),
671671
ProductPrice (string)">.GetSample ()
672672
let firstRow = csv.Rows |> Seq.head
673+
csv.NumberOfColumns |> should equal 16
673674
firstRow.OrderCreated |> should equal "2022-01-01 10:00:00"
674-
675+
firstRow.FioFull |> should equal "John Smith"

0 commit comments

Comments
 (0)