Skip to content

Commit 63fc43b

Browse files
committed
- Prefer returning ReadOnlyMemory<char> to avoid allocating a new string
- Sacrificed a little speed..
1 parent e90e8a1 commit 63fc43b

11 files changed

Lines changed: 218 additions & 44 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
using System;
2+
using System.IO;
3+
using System.IO.Compression;
4+
using System.Reflection;
5+
using System.Threading;
6+
using BenchmarkDotNet.Attributes;
7+
using ExcelPRIME;
8+
9+
namespace Excel_PRIME.Bench;
10+
11+
[MemoryDiagnoser]
12+
public class SharedStringsBenchmarks
13+
{
14+
private const string RootFolder = "Data";
15+
16+
[Params(
17+
"100mb.xlsx",
18+
"sampledocs-50mb-xlsx-file-sst.xlsx"
19+
)]
20+
public string FileName { get; set; }
21+
22+
private ZipArchive? archive;
23+
private Stream? sharedStringsStream;
24+
private ISharedString? sharedStrings;
25+
26+
[GlobalSetup]
27+
public void Setup()
28+
{
29+
string path = Path.Combine(RootFolder, FileName);
30+
FileStream fs = File.OpenRead(path);
31+
archive = new ZipArchive(fs, ZipArchiveMode.Read, leaveOpen: false);
32+
var entry = archive.GetEntry("xl/sharedStrings.xml");
33+
if (entry == null)
34+
{
35+
return;
36+
}
37+
sharedStringsStream = entry.Open();
38+
39+
// Instantiate internal XmlReaderHelpersAsync via reflection and call GetSharedStringsAsync
40+
var asm = Assembly.Load("Excel_PRIME");
41+
var helperType = asm.GetType("ExcelPRIME.Implementation.XmlReaderHelpersAsync", throwOnError: false, ignoreCase: false);
42+
if (helperType != null)
43+
{
44+
// Create instance (internal) via Activator
45+
object? helper = Activator.CreateInstance(helperType, nonPublic: true);
46+
if (helper != null)
47+
{
48+
var getSharedStringsAsync = helperType.GetMethod("GetSharedStringsAsync", BindingFlags.Public | BindingFlags.Instance | BindingFlags.NonPublic);
49+
if (getSharedStringsAsync != null)
50+
{
51+
var task = (System.Threading.Tasks.Task<ISharedString>)getSharedStringsAsync.Invoke(helper, new object[] { sharedStringsStream, CancellationToken.None })!;
52+
task.Wait();
53+
sharedStrings = task.Result;
54+
}
55+
}
56+
}
57+
}
58+
59+
[GlobalCleanup]
60+
public void Cleanup()
61+
{
62+
sharedStrings?.Dispose();
63+
sharedStringsStream?.Dispose();
64+
archive?.Dispose();
65+
}
66+
67+
//[Benchmark(Baseline = true)]
68+
public int AccessFirstThousandSequential()
69+
{
70+
if (sharedStrings is null) return 0;
71+
int total = 0;
72+
for (int i = 0; i < 1000; i++)
73+
{
74+
string? s = sharedStrings[i];
75+
if (s != null) total += s.Length;
76+
}
77+
return total;
78+
}
79+
80+
// [Benchmark]
81+
public int AccessRandomThousand()
82+
{
83+
if (sharedStrings is null) return 0;
84+
int total = 0;
85+
var rnd = new Random(42);
86+
for (int i = 0; i < 1000; i++)
87+
{
88+
int idx = rnd.Next(0, 5000);
89+
string? s = sharedStrings[idx];
90+
if (s != null) total += s.Length;
91+
}
92+
return total;
93+
}
94+
}

Excel_PRIME.RangeBench/GetRangeAsposeCells.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public bool LoadFile(string fullPath)
4343
// Iterate through each row in the range
4444
for (int i = 0; i < rowCount; i++)
4545
{
46-
var rowData = new List<object?>(columnCount);
46+
List<object?> rowData = new List<object?>(columnCount);
4747

4848
// Iterate through each column in the range to access individual cell values
4949
for (int j = 0; j < columnCount; j++)

Excel_PRIME.RangeBench/GetRangeClosedXML.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ public bool LoadFile(string fullPath)
3737
rangesLocal = wb!.Ranges(definedName);
3838
}
3939

40-
foreach (var range in rangesLocal)
40+
foreach (IXLRange range in rangesLocal)
4141
{
42-
foreach (var row in range.Rows())
42+
foreach (IXLRangeRow? row in range.Rows())
4343
{
4444
yield return row.Cells().Select(c => c.Value.ToString());
4545
}

Excel_PRIME.RangeBench/GetRangeEPPlus.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public bool LoadFile(string fullPath)
2929
// worksheet scope
3030
if (localSheetId.HasValue)
3131
{
32-
var worksheet = excelPackage!.Workbook.Worksheets.ElementAt(localSheetId.Value);
32+
ExcelWorksheet? worksheet = excelPackage!.Workbook.Worksheets.ElementAt(localSheetId.Value);
3333
namedRange = worksheet.Names[definedName];
3434
}
3535
else
@@ -39,10 +39,10 @@ public bool LoadFile(string fullPath)
3939
}
4040
if (namedRange != null)
4141
{
42-
var range = namedRange;
42+
ExcelNamedRange range = namedRange;
4343
for (int row = range.Start.Row; row <= range.End.Row; row++)
4444
{
45-
var rowData = new List<object?>(range.End.Column - range.Start.Column);
45+
List<object?> rowData = new List<object?>(range.End.Column - range.Start.Column);
4646
for (int col = range.Start.Column; col <= range.End.Column; col++)
4747
{
4848
rowData.Add(range.Worksheet.Cells[row, col].Value.ToString());

Excel_PRIME.RangeBench/GetRangeExcelPrime.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public bool LoadFile(string fullPath)
2121
IEnumerable<object?[]> rangeRows = localSheetId.HasValue
2222
? _workbook.GetDefinedRange(definedName, localSheetId.Value)
2323
: _workbook.GetDefinedRange(definedName);
24-
foreach (var rangeRow in rangeRows)
24+
foreach (object?[] rangeRow in rangeRows)
2525
{
2626
yield return rangeRow;
2727
}
@@ -30,7 +30,7 @@ public bool LoadFile(string fullPath)
3030
public IEnumerable<IEnumerable<object?>> GetRange(string userRange, string sheetName)
3131
{
3232
IEnumerable<object?[]> rangeRows = _workbook.GetDefinedRange(userRange, sheetName);
33-
foreach (var rangeRow in rangeRows)
33+
foreach (object?[] rangeRow in rangeRows)
3434
{
3535
yield return rangeRow.Select(cell => cell?.ToString());
3636
}

Excel_PRIME.Tests/DefinedRangeTests.cs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Diagnostics.CodeAnalysis;
34
using System.Linq;
45
using System.Threading.Tasks;
@@ -25,7 +26,7 @@ public async Task A010_ReadNamedRange()
2526
taxRate.FirstOrDefault().Should().Be("0.1", "<definedName name=\"TaxRate\">0.1</definedName>");
2627

2728
// Now do <definedName name="Prices">Sheet1!$A$1:$A$4</definedName>
28-
var prices= await workbook.GetDefinedRangeAsync("Prices").ToArrayAsync();
29+
object?[][] prices= await workbook.GetDefinedRangeAsync("Prices").ToArrayAsync();
2930
prices.Should().HaveCount(4);
3031
prices[0].Should().BeEquivalentTo(["5"]);
3132
prices[1].Should().BeEquivalentTo(["4"]);
@@ -41,7 +42,7 @@ public async Task A020_DynamicNamedRange()
4142
await workbook.OpenAsync(fileName).ConfigureAwait(true);
4243

4344
// Do not fallover with <definedName name="Prices">OFFSET(Sheet1!$A$1,0,0,COUNTA(Sheet1!$A:$A),1)</definedName>
44-
var prices = await workbook.GetDefinedRangeAsync("Prices").ToArrayAsync();
45+
object?[][] prices = await workbook.GetDefinedRangeAsync("Prices").ToArrayAsync();
4546
}
4647

4748
[Test]
@@ -60,19 +61,19 @@ await workbook.OpenAsync(fileName,
6061
// Do not fallover with
6162
// <definedName name="OrderSize" localSheetId="0">'Try it Yourself'!$C$12:$E$12</definedName>
6263
// <definedName name="OrderSize">Solution!$C$12:$E$12</definedName>
63-
var orderSizeS = await workbook.GetDefinedRangeAsync("OrderSize").FirstAsync();
64+
object?[] orderSizeS = await workbook.GetDefinedRangeAsync("OrderSize").FirstAsync();
6465
orderSizeS.Should().HaveCount(3);
6566
orderSizeS.Should().HaveElementAt(0, 94);
6667
orderSizeS.Should().HaveElementAt(1, 54);
6768
orderSizeS.Should().HaveElementAt(2, 0);
6869

69-
var orderSizeU = await workbook.GetDefinedRangeAsync("OrderSize", "Try it Yourself").FirstAsync();
70+
object?[] orderSizeU = await workbook.GetDefinedRangeAsync("OrderSize", "Try it Yourself").FirstAsync();
7071
orderSizeU.Should().HaveCount(3);
7172
orderSizeU.Should().HaveElementAt(0, 0);
7273
orderSizeU.Should().HaveElementAt(1, 0);
7374
orderSizeU.Should().HaveElementAt(2, 0);
7475

75-
var orderSizeT = await workbook.GetDefinedRangeAsync("OrderSize (Try it Yourself)").FirstAsync();
76+
object?[] orderSizeT = await workbook.GetDefinedRangeAsync("OrderSize (Try it Yourself)").FirstAsync();
7677
orderSizeT.Should().HaveCount(3);
7778
orderSizeT.Should().HaveElementAt(0, 0);
7879
orderSizeT.Should().HaveElementAt(1, 0);
@@ -119,9 +120,9 @@ public void A052_ExcelPrime_PivotTable()
119120
getRanger.LoadFile("Data\\pivot-tables.xlsx");
120121

121122
//<definedName name="_xlnm._FilterDatabase" localSheetId="2" hidden="1">Sheet1!$A$1:$F$214</definedName>
122-
var filterDatabaseSheet = getRanger.GetDefinedRange("_xlnm._FilterDatabase", 2);
123+
IEnumerable<IEnumerable<object?>> filterDatabaseSheet = getRanger.GetDefinedRange("_xlnm._FilterDatabase", 2);
123124
int cells = filterDatabaseSheet.Sum(row => row.Count());
124-
var filterDatabase = getRanger.GetDefinedRange("_xlnm._FilterDatabase");
125+
IEnumerable<IEnumerable<object?>> filterDatabase = getRanger.GetDefinedRange("_xlnm._FilterDatabase");
125126
cells += filterDatabase.Sum(row => row.Count());
126127
cells.Should().Be(expected);
127128
}

Excel_PRIME/FromExternal/SemaphoreLocker.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,17 @@ public async Task<T> LockAsync<T>(Func<Task<T>> worker)
9494
}
9595
}
9696

97+
// New non-allocating lock helpers
98+
public void Enter()
99+
{
100+
_semaphore.Wait();
101+
}
102+
103+
public void Exit()
104+
{
105+
_semaphore.Release();
106+
}
107+
97108
private void Dispose(bool isDisposing)
98109
{
99110
if (!_isDisposed)

Excel_PRIME/Implementation/Cell.cs

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ void ReadValue()
119119
}
120120
else
121121
{
122-
value = new string(buffer.AsSpan(0, len));
122+
// Prefer returning ReadOnlyMemory<char> to avoid allocating a new string
123+
value = new ReadOnlyMemory<char>(buffer.AsSpan(0, len).ToArray());
123124
}
124125
break;
125126

@@ -245,7 +246,7 @@ void ReadValue()
245246
}
246247
else
247248
{
248-
value = new string(buffer.AsSpan(0, len));
249+
value = new ReadOnlyMemory<char>(buffer.AsSpan(0, len).ToArray());
249250
}
250251
break;
251252

@@ -275,45 +276,65 @@ private static string ReadString(XmlReader reader, StringBuilder valueBuilder)
275276
{
276277
return string.Empty;
277278
}
278-
XmlReader subReader = reader;
279279

280-
if (subReader.NodeType == XmlNodeType.Element)
280+
// If we're positioned on an element, parse its inner textual content by
281+
// iterating over the reader until we exit the element's depth. Avoid
282+
// creating a subtree XmlReader to reduce allocations (XmlSubtreeReader.NodeData).
283+
if (reader.NodeType == XmlNodeType.Element)
281284
{
282-
if (subReader.IsEmptyElement)
285+
if (reader.IsEmptyElement)
283286
{
284287
return string.Empty;
285288
}
286289

287-
subReader = reader.ReadSubtree();
288-
if (subReader.Read())
290+
int startDepth = reader.Depth;
291+
292+
// Move to the first node inside the element
293+
if (!reader.Read())
289294
{
290-
if (reader.NodeType == XmlNodeType.EndElement)
295+
return string.Empty;
296+
}
297+
298+
string result = string.Empty;
299+
int hasMultipleTextForCell = 0;
300+
301+
while (reader.Depth > startDepth)
302+
{
303+
var nt = reader.NodeType;
304+
bool isTextual = nt == XmlNodeType.Text || nt == XmlNodeType.CDATA || nt == XmlNodeType.Whitespace || nt == XmlNodeType.SignificantWhitespace;
305+
if (isTextual)
306+
{
307+
if (hasMultipleTextForCell++ > 0)
308+
{
309+
valueBuilder.Append(result);
310+
}
311+
result = reader.Value;
312+
}
313+
314+
if (!reader.Read())
291315
{
292-
return string.Empty;
316+
break;
293317
}
294318
}
295-
}
296-
string result = string.Empty;
297-
int hasMultipleTextForCell = 0;
298-
while (IsTextualNode(reader.NodeType))
299-
{
300-
if (hasMultipleTextForCell++ > 0)
319+
320+
if (hasMultipleTextForCell > 1)
301321
{
322+
// Append last piece and return combined string
302323
valueBuilder.Append(result);
324+
result = valueBuilder.ToString();
303325
}
304-
result = reader.Value;
305-
if (!subReader.Read())
306-
{
307-
break;
308-
}
326+
327+
return result;
309328
}
310-
if (hasMultipleTextForCell > 1)
329+
330+
// Not positioned on an element - return value if textual
331+
var nodeType = reader.NodeType;
332+
if (nodeType == XmlNodeType.Text || nodeType == XmlNodeType.CDATA || nodeType == XmlNodeType.Whitespace || nodeType == XmlNodeType.SignificantWhitespace)
311333
{
312-
// Add last iteration, and get current combined string
313-
valueBuilder.Append(result);
314-
result = valueBuilder.ToString();
334+
return reader.Value;
315335
}
316-
return result;
336+
337+
return string.Empty;
317338
}
318339

319340
private const uint IsTextualNodeBitmap = 0x6018; // 00 0110 0000 0001 1000
@@ -442,7 +463,8 @@ private static object PerformDoubleConversion(ReadOnlySpan<char> asSpan)
442463
{ // ±5.0 × 10−324 to ±1.7 × 10308 ~15-17 digits 8 bytes
443464
return resultD;
444465
}
445-
return new string(asSpan);
466+
// Fall back to ReadOnlyMemory to avoid immediate string allocation; callers that need string can ToString()
467+
return new ReadOnlyMemory<char>(asSpan.ToArray());
446468
}
447469

448470

Excel_PRIME/Implementation/LazyLoadSharedStrings.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ public string? this[int requestIndex]
8888
// Many sheets may be attempting to get shared strings
8989
if (requestIndex >= _currentlyLoaded.Count)
9090
{
91-
_locker.Lock(() =>
91+
_locker.Enter();
92+
try
9293
{
9394
// Use additional offset to reduce locking intensity
9495
LoadUntil(requestIndex+16);
@@ -99,7 +100,11 @@ public string? this[int requestIndex]
99100
// Release resources
100101
_reader.Close();
101102
}
102-
});
103+
}
104+
finally
105+
{
106+
_locker.Exit();
107+
}
103108
}
104109

105110
if (requestIndex >= _currentlyLoaded.Count)

0 commit comments

Comments
 (0)