Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions src/CsvColumnizer/CsvColumnizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public void Selected (ILogLineMemoryColumnizerCallback callback)
{
_columnList.Clear();
var line = _config.HasFieldNames
? _firstLine
? _firstLine ?? callback.GetLogLineMemory(0)
: callback.GetLogLineMemory(0);

if (line != null)
Expand Down Expand Up @@ -205,6 +205,10 @@ public void Selected (ILogLineMemoryColumnizerCallback callback)
}
}
}
else
{
_columnList.Add(new CsvColumn("Text"));
}
}
}

Expand Down Expand Up @@ -290,28 +294,40 @@ public Priority GetPriority (string fileName, IEnumerable<ILogLineMemory> sample

private ColumnizedLogLine SplitCsvLine (ILogLineMemory line)
{
if (line.FullLine.IsEmpty)
{
return CreateColumnizedLogLine(line);
}

ColumnizedLogLine cLogLine = new()
{
LogLine = line
};

using CsvReader csv = new(new StringReader(line.FullLine.ToString()), _config.ReaderConfiguration);
_ = csv.Read();
_ = csv.ReadHeader();

//we only read line by line and not the whole file so it is always the header
var records = csv.HeaderRecord;

if (records != null)
try
{
List<Column> columns = [];
using CsvReader csv = new(new StringReader(line.FullLine.ToString()), _config.ReaderConfiguration);
_ = csv.Read();
_ = csv.ReadHeader();

//we only read line by line and not the whole file so it is always the header
var records = csv.HeaderRecord;

foreach (var record in records)
if (records != null)
{
columns.Add(new Column { FullValue = record.AsMemory(), Parent = cLogLine });
}
List<Column> columns = [];

foreach (var record in records)
{
columns.Add(new Column { FullValue = record.AsMemory(), Parent = cLogLine });
}

cLogLine.ColumnValues = [.. columns.Select(a => a as IColumnMemory)];
cLogLine.ColumnValues = [.. columns.Select(a => a as IColumnMemory)];
}
}
catch (CsvHelperException)
{
return CreateColumnizedLogLine(line);
}

return cLogLine;
Expand Down
113 changes: 113 additions & 0 deletions src/LogExpert.Benchmarks/CsvColumnizerBenchmarks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
using System.Text;

using BenchmarkDotNet.Attributes;

using ColumnizerLib;

using CsvColumnizer;

using Moq;

namespace LogExpert.Benchmarks;

/// <summary>
/// Benchmarks for CsvColumnizer covering PreProcessLine, Selected, and SplitLine operations
/// across varying line counts and column widths.
/// </summary>
[MemoryDiagnoser]
[RankColumn]
public class CsvColumnizerBenchmarks
{
private ILogLineMemory[] _dataLines = null!;
private CsvColumnizer.CsvColumnizer _columnizer = null!;

[Params(100, 1_000, 10_000)]
public int LineCount { get; set; }

[Params(5, 15)]
public int ColumnCount { get; set; }

[GlobalSetup]
public void Setup ()
{
// Build header and data lines
var headerParts = new string[ColumnCount];
for (var i = 0; i < ColumnCount; i++)
{
headerParts[i] = $"Column{i}";
}

var header = string.Join(";", headerParts);

// Initialize columnizer with header
_columnizer = new CsvColumnizer.CsvColumnizer();
_columnizer.PreProcessLine(header.AsMemory(), 0, 0);

var mockCallback = new Mock<ILogLineMemoryColumnizerCallback>();
_columnizer.Selected(mockCallback.Object);

// Generate data lines
_dataLines = new ILogLineMemory[LineCount];
var random = new Random(42);

for (var i = 0; i < LineCount; i++)
{
var parts = new string[ColumnCount];
for (var j = 0; j < ColumnCount; j++)
{
parts[j] = GenerateFieldValue(random, j);
}

_dataLines[i] = new CsvLogLine(string.Join(";", parts), i + 1);
}
}

[Benchmark(Description = "SplitLine: parse all lines")]
public int SplitAllLines ()
{
var totalColumns = 0;
for (var i = 0; i < _dataLines.Length; i++)
{
var result = _columnizer.SplitLine(null, _dataLines[i]);
totalColumns += result.ColumnValues.Length;
}

return totalColumns;
}

[Benchmark(Description = "PreProcessLine: preprocess all lines")]
public int PreProcessAllLines ()
{
var processed = 0;
for (var i = 0; i < _dataLines.Length; i++)
{
var result = _columnizer.PreProcessLine(_dataLines[i].FullLine, i + 1, i + 1);
if (!result.IsEmpty)
{
processed++;
}
}

return processed;
}

[Benchmark(Description = "Selected: re-detect columns from header")]
public int RedetectColumns ()
{
var mockCallback = new Mock<ILogLineMemoryColumnizerCallback>();
_columnizer.Selected(mockCallback.Object);
return _columnizer.GetColumnCount();
}

private static string GenerateFieldValue (Random random, int columnIndex)
{
// Mix of value types: numbers, short text, quoted text with commas
return (columnIndex % 4) switch
{
0 => random.Next(1, 100000).ToString(),
1 => $"text_{random.Next(1, 9999)}",
2 => $"\"Value, with quotes {random.Next(1, 999)}\"",
_ => new string((char)('A' + random.Next(0, 26)), random.Next(5, 20)),
};
}
}
7 changes: 6 additions & 1 deletion src/LogExpert.Benchmarks/LogExpert.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<TargetFramework>net10.0-windows</TargetFramework>
<EnableWindowsTargeting>true</EnableWindowsTargeting>
<UseWindowsForms>true</UseWindowsForms>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<!-- Override Directory.Build.props settings for BenchmarkDotNet compatibility -->
Expand All @@ -12,11 +14,14 @@

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" />
<PackageReference Include="Moq" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\LogExpert.Core\LogExpert.Core.csproj" />
<ProjectReference Include="..\PluginRegistry\LogExpert.PluginRegistry.csproj" />
<ProjectReference Include="..\CsvColumnizer\CsvColumnizer.csproj" />
<ProjectReference Include="..\ColumnizerLib\ColumnizerLib.csproj" />
</ItemGroup>

<!-- Exclude the shared AssemblyInfo.cs that Directory.Build.props tries to add -->
Expand Down
2 changes: 2 additions & 0 deletions src/LogExpert.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public static void Main (string[] args)
_ = BenchmarkRunner.Run<BufferIndexBenchmarks>();
_ = BenchmarkRunner.Run<ReadThroughputBenchmarks>();
_ = BenchmarkRunner.Run<BufferIndexContentionBenchmarks>();
_ = BenchmarkRunner.Run<CsvColumnizerBenchmarks>();
}
else
{
Expand All @@ -28,6 +29,7 @@ public static void Main (string[] args)
Console.WriteLine("ReadThroughputBenchmarks: Benchmarks for read throughput");
Console.WriteLine("BufferIndexBenchmarks: Benchmarks for buffer index");
Console.WriteLine("BufferIndexContentionBenchmarks: Benchmarks for buffer index contention");
Console.WriteLine("CsvColumnizerBenchmarks: Benchmarks for CSV columnizer (SplitLine, PreProcess, Selected)");
Console.WriteLine("Dry run:");
Console.WriteLine("dotnet run -c Release -- --filter \"*<benchmarkname>*\" --job Dry --noOverwrite");
Console.WriteLine("Short run:");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
using LogExpert.Core.Classes.Log.Buffers;
using LogExpert.Core.Entities;
using LogExpert.Core.Interfaces;

namespace LogExpert.Core.Classes.Log.Streamreaders;

public class PositionAwareStreamReaderLegacy (Stream stream, EncodingOptions encodingOptions, int maximumLineLength) : PositionAwareStreamReaderBase(stream, encodingOptions, maximumLineLength)
public class PositionAwareStreamReaderLegacy (Stream stream, EncodingOptions encodingOptions, int maximumLineLength) : PositionAwareStreamReaderBase(stream, encodingOptions, maximumLineLength), ILogStreamReaderMemory
{
#region Fields

Expand All @@ -15,8 +17,39 @@ public class PositionAwareStreamReaderLegacy (Stream stream, EncodingOptions enc

#endregion

#region Properties

public CharBlockAllocator BlockAllocator
{
get => field ??= new CharBlockAllocator();
private set;
}

#endregion

#region Public methods

public bool TryReadLine (out ReadOnlyMemory<char> lineMemory)
{
var line = ReadLine();

if (line is null)
{
lineMemory = default;
return false;
}

var target = BlockAllocator.Rent(line.Length);
line.AsSpan().CopyTo(target.Span);
lineMemory = target;
return true;
}

public void ReturnMemory (ReadOnlyMemory<char> memory)
{
// Bulk return via BlockAllocator.DetachBlocks() when the LogBuffer is evicted.
}

public override string ReadLine ()
{
int readInt;
Expand Down
Loading
Loading