I’m working on implementing a library that can read/write from FoxBASE files in C#. I can’t use ODBC for this, because it’s not supported in .NET core.
I’ve got the reading working, but it’s incredibly slow. I’ve tried doing some profiling on it, but the performance report doesn’t list specific line numbers or function names, so it’s hard to tell where the bottlenecks are.
This is the table class I have so far.
using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using DataConverter.Database.Transformers; namespace DataConverter.Database.Toolbox.FoxPro { public class FoxProTable<T> : IEnumerator<T>, IEnumerable<T> where T: new() { [Flags] private enum TableAttribute { HasCDX, HasMemoField, IsDatabase } private string FileLocation { get; } private DbfFileType _fileType; private int _recordCount; private int _firstRecordPosition; private int _dataRecordLength; private TableAttribute _tableAttributes; private int _codePageMark; private string _tableName; protected readonly List<FoxProField> Fields = new List<FoxProField>(); private int _filePosition; private readonly Dictionary<MemberInfo, string> _fieldNameCache = new Dictionary<MemberInfo, string>(); private readonly Dictionary<string, PropertyInfo> _propertyCache = new Dictionary<string, PropertyInfo>(); private readonly Dictionary<Type, Transformer> _transformerCache = new Dictionary<Type, Transformer>(); private FileStream _file; # Keep the file open so that we don't have to open/close it on every record. # Ideally this also means fewer seeks. private FileStream File => _file ?? (_file = System.IO.File.OpenRead(FileLocation)); private readonly List<T> _records = new List<T>(); private int _recordPosition = -1; public FoxProTable() { } public FoxProTable(string filePath, string tableName) { _tableName = tableName; FileLocation = Path.Combine(filePath, tableName); ReadHeader(); } public DateTime LastUpdate { get; set; } private string GetFieldName(MemberInfo property) { if (_fieldNameCache.TryGetValue(property, out var fieldName)) return fieldName; fieldName = ((FieldAttribute) Attribute.GetCustomAttribute(property, typeof(FieldAttribute)))?.Name; _fieldNameCache[property] = fieldName; return fieldName; } # Translate a data field from an array of bytes into its human readable version. private static object TranslateBuffer(IReadOnlyCollection<byte> buffer, FoxProField field) { switch (field.DataType) { case DbfDataType.Integer: case DbfDataType.Numeric: return int.TryParse(string.Join("", buffer.Select(x => (char)x)).TrimEnd(), out var val) ? val : 0; case DbfDataType.Character: return string.Join("", buffer.Select(x => (char) x)); case DbfDataType.Currency: return int.Parse(string.Join("", buffer.Select(x => (char)x)).TrimEnd()) / Math.Pow(10, 4); case DbfDataType.Double: return int.Parse(string.Join("", buffer.Select(x => (char)x)).TrimEnd()); case DbfDataType.Float: return int.Parse(string.Join("", buffer.Select(x => (char)x)).TrimEnd()) / Math.Pow(10, field.NumberOfDecimalPlaces); case DbfDataType.Date: var validYear = int.TryParse(string.Join("", buffer.Take(4).Select(x => (char)x)), out var year); var validMonth = int.TryParse(string.Join("", buffer.Skip(4).Take(2).Select(x => (char)x)), out var month); var validDay = int.TryParse(string.Join("", buffer.Skip(6).Take(2).Select(x => (char)x)), out var day); if (validDay && validMonth && validYear) return new DateTime(year, month, day); return null; case DbfDataType.Logical: return (char)buffer.Single() == 'T'; case DbfDataType.DateTime: case DbfDataType.General: case DbfDataType.Memo: case DbfDataType.Picture: throw new NotSupportedException($ "{field.DataType} fields are not supported yet."); default: throw new ArgumentOutOfRangeException(nameof(field), field.DataType, null); } } private T ReadRecord() { # Make sure we're reading the correct position. if (File.Position != _filePosition) File.Seek(_filePosition, SeekOrigin.Begin); var record = new T(); # Read in the array of bytes that represent the next record. var dataBuffer = new byte[_dataRecordLength]; File.Read(dataBuffer, 0, _dataRecordLength); _filePosition += _dataRecordLength; if (dataBuffer.Length < _dataRecordLength) throw new EndOfStreamException(); # For each field in the FoxBASE table, we try to match it to a field on the model. foreach (var field in Fields) { # If we already found the property once, there's no need to incur the cost of reflecting the model to get the PropertyInfo. if (!_propertyCache.TryGetValue(field.FieldName, out var property)) { # FieldAttribute allows you to define a name for the field that's not the same as the field name in the table. property = typeof(T).GetProperties() .FirstOrDefault(x => x.Name.Equals(field.FieldName, StringComparison.OrdinalIgnoreCase) || (GetFieldName(x)?.Equals(field.FieldName, StringComparison.OrdinalIgnoreCase) ?? false)); } if (property == null) continue; # For reasons unknown to me, the displacement in the header is always high by 2. var fieldStart = field.Displacement - 2; var fieldLength = field.LengthInBytes; # Get the value from the byte slice that represents the field in the database file/ dynamic value = TranslateBuffer(dataBuffer.Skip(fieldStart).Take(fieldLength).ToArray(), field); var transformerType = ((FieldAttribute) Attribute.GetCustomAttribute(property, typeof(FieldAttribute)))? .Transformer; # If there's a transformer associated with the field, load it. if (transformerType != null) { if (!_transformerCache.TryGetValue(transformerType, out var transformer)) { transformer = (Transformer)Activator.CreateInstance(transformerType); _transformerCache.Add(transformerType, transformer); } value = transformer.Transform(value); } property.SetValue(record, value); } return record; } # This method can be ignored. It just reads the metadata from the header. private void ReadHeader() { var tableInfoBuffer = new byte[32]; File.Read(tableInfoBuffer, 0, 32); var fileType = tableInfoBuffer[0]; if (!Enum.IsDefined(typeof(DbfFileType), (int)fileType)) throw new NotSupportedException($ "{FileLocation} is not a supported file type. It starts with {fileType}"); _fileType = (DbfFileType) fileType; var lastUpdateYear = (int)tableInfoBuffer[1]; var lastUpdateMonth = tableInfoBuffer[2]; var lastUpdateDay = tableInfoBuffer[3]; lastUpdateYear += lastUpdateYear + 2000 > DateTime.Today.Year ? 1900 : 2000; LastUpdate = new DateTime(lastUpdateYear, lastUpdateMonth, lastUpdateDay); _recordCount = BitConverter.ToInt32(tableInfoBuffer, 4); _firstRecordPosition = BitConverter.ToInt16(tableInfoBuffer, 8); _dataRecordLength = BitConverter.ToInt16(tableInfoBuffer, 10); _tableAttributes = (TableAttribute)tableInfoBuffer[27]; _codePageMark = tableInfoBuffer[28]; var fieldInfoBuffer = new byte[32]; while (true) { File.Read(fieldInfoBuffer, 0, 32); if (fieldInfoBuffer[0] == 0x0D) break; Fields.Add(new FoxProField(fieldInfoBuffer)); } _filePosition = _firstRecordPosition; } public bool MoveNext() { try { _recordPosition++; if (_recordPosition > _recordCount) return false; var nextRecord = ReadRecord(); _records.Add(nextRecord); return true; } catch (EndOfStreamException) { return false; } } public void Reset() { _recordPosition = -1; } public T Current => _records[_recordPosition]; object IEnumerator.Current => Current; public void Dispose() { } public IEnumerator<T> GetEnumerator() { return this; } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } } }
These are the two helper classes:
Transformer:
using System; namespace DataConverter.Database.Transformers.RoadRunner { public class FoxProTransformer : Transformer { public override object Transform(object value) { switch (value) { case string sValue: return sValue.Equals("x", StringComparison.OrdinalIgnoreCase); default: return value; } } } }
FieldAttribute:
using System; namespace DataConverter.Database.Toolbox.FoxPro { [AttributeUsage(AttributeTargets.Property)] public class FieldAttribute : Attribute { public string Name { get; } public Type Transformer { get; } public FieldAttribute(string name) { Name = name; } public FieldAttribute(string name, Type transformer) { Name = name; Transformer = transformer; } public FieldAttribute(Type transformer) { Transformer = transformer; } } }
I’m fine with the performance of the ReadHeader
method, as it’s only called once. I’m mostly concerned with the performance of ReadRecord
.