You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
DS7/ToolProject/FastDBF/DbfHeader.cs

699 lines
26 KiB
C#

///
/// Author: Ahmed Lacevic
/// Date: 12/1/2007
/// Desc:
///
/// Revision History:
/// -----------------------------------
/// Author:
/// Date:
/// Desc:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace SocialExplorer.IO.FastDBF
{
/// <summary>
/// This class represents a DBF IV file header.
/// </summary>
///
/// <remarks>
/// DBF files are really wasteful on space but this legacy format lives on because it's really really simple.
/// It lacks much in features though.
///
///
/// Thanks to Erik Bachmann for providing the DBF file structure information!!
/// http://www.clicketyclick.dk/databases/xbase/format/dbf.html
///
/// _______________________ _______
/// 00h / 0| Version number *1| ^
/// |-----------------------| |
/// 01h / 1| Date of last update | |
/// 02h / 2| YYMMDD *21| |
/// 03h / 3| *14| |
/// |-----------------------| |
/// 04h / 4| Number of records | Record
/// 05h / 5| in data file | header
/// 06h / 6| ( 32 bits ) *14| |
/// 07h / 7| | |
/// |-----------------------| |
/// 08h / 8| Length of header *14| |
/// 09h / 9| structure ( 16 bits ) | |
/// |-----------------------| |
/// 0Ah / 10| Length of each record | |
/// 0Bh / 11| ( 16 bits ) *2 *14| |
/// |-----------------------| |
/// 0Ch / 12| ( Reserved ) *3| |
/// 0Dh / 13| | |
/// |-----------------------| |
/// 0Eh / 14| Incomplete transac.*12| |
/// |-----------------------| |
/// 0Fh / 15| Encryption flag *13| |
/// |-----------------------| |
/// 10h / 16| Free record thread | |
/// 11h / 17| (reserved for LAN | |
/// 12h / 18| only ) | |
/// 13h / 19| | |
/// |-----------------------| |
/// 14h / 20| ( Reserved for | | _ |=======================| ______
/// | multi-user dBASE ) | | / 00h / 0| Field name in ASCII | ^
/// : ( dBASE III+ - ) : | / : (terminated by 00h) : |
/// : : | | | | |
/// 1Bh / 27| | | | 0Ah / 10| | |
/// |-----------------------| | | |-----------------------| For
/// 1Ch / 28| MDX flag (dBASE IV)*14| | | 0Bh / 11| Field type (ASCII) *20| each
/// |-----------------------| | | |-----------------------| field
/// 1Dh / 29| Language driver *5| | / 0Ch / 12| Field data address | |
/// |-----------------------| | / | *6| |
/// 1Eh / 30| ( Reserved ) | | / | (in memory !!!) | |
/// 1Fh / 31| *3| | / 0Fh / 15| (dBASE III+) | |
/// |=======================|__|____/ |-----------------------| | -
/// 20h / 32| | | ^ 10h / 16| Field length *22| | |
/// |- - - - - - - - - - - -| | | |-----------------------| | | *7
/// | *19| | | 11h / 17| Decimal count *23| | |
/// |- - - - - - - - - - - -| | Field |-----------------------| | -
/// | | | Descriptor 12h / 18| ( Reserved for | |
/// :. . . . . . . . . . . .: | |array 13h / 19| multi-user dBASE)*18| |
/// : : | | |-----------------------| |
/// n | |__|__v_ 14h / 20| Work area ID *16| |
/// |-----------------------| | \ |-----------------------| |
/// n+1| Terminator (0Dh) | | \ 15h / 21| ( Reserved for | |
/// |=======================| | \ 16h / 22| multi-user dBASE ) | |
/// m | Database Container | | \ |-----------------------| |
/// : *15: | \ 17h / 23| Flag for SET FIELDS | |
/// : : | | |-----------------------| |
/// / m+263 | | | 18h / 24| ( Reserved ) | |
/// |=======================|__v_ ___ | : : |
/// : : ^ | : : |
/// : : | | : : |
/// : : | | 1Eh / 30| | |
/// | Record structure | | | |-----------------------| |
/// | | | \ 1Fh / 31| Index field flag *8| |
/// | | | \_ |=======================| _v_____
/// | | Records
/// |-----------------------| |
/// | | | _ |=======================| _______
/// | | | / 00h / 0| Record deleted flag *9| ^
/// | | | / |-----------------------| |
/// | | | / | Data *10| One
/// | | | / : (ASCII) *17: record
/// | |____|_____/ | | |
/// : : | | | _v_____
/// : :____|_____ |=======================|
/// : : |
/// | | |
/// | | |
/// | | |
/// | | |
/// | | |
/// |=======================| |
/// |__End_of_File__________| ___v____ End of file ( 1Ah ) *11
///
/// </remarks>
public class DbfHeader : ICloneable
{
/// <summary>
/// Header file descriptor size is 33 bytes (32 bytes + 1 terminator byte), followed by column metadata which is 32 bytes each.
/// </summary>
public const int FileDescriptorSize = 33;
/// <summary>
/// Field or DBF Column descriptor is 32 bytes long.
/// </summary>
public const int ColumnDescriptorSize = 32;
//type of the file, must be 03h
private const int _fileType = 0x03;
//Date the file was last updated.
private DateTime _updateDate;
//Number of records in the datafile, 32bit little-endian, unsigned
private uint _numRecords = 0;
//Length of the header structure
private ushort _headerLength = FileDescriptorSize; //empty header is 33 bytes long. Each column adds 32 bytes.
//Length of the records, ushort - unsigned 16 bit integer
private int _recordLength = 1; //start with 1 because the first byte is a delete flag
//DBF fields/columns
internal List<DbfColumn> _fields = new List<DbfColumn>();
//indicates whether header columns can be modified!
bool _locked = false;
//keeps column name index for the header, must clear when header columns change.
private Dictionary<string, int> _columnNameIndex = null;
/// <summary>
/// When object is modified dirty flag is set.
/// </summary>
bool _isDirty = false;
/// <summary>
/// mEmptyRecord is an array used to clear record data in CDbf4Record.
/// This is shared by all record objects, used to speed up clearing fields or entire record.
/// <seealso cref="EmptyDataRecord"/>
/// </summary>
private byte[] _emptyRecord = null;
public readonly Encoding encoding = Encoding.ASCII;
[Obsolete]
public DbfHeader()
{
}
public DbfHeader(Encoding encoding)
{
this.encoding = encoding;
}
/// <summary>
/// Specify initial column capacity.
/// </summary>
/// <param name="nInitialFields"></param>
public DbfHeader(int nFieldCapacity)
{
_fields = new List<DbfColumn>(nFieldCapacity);
}
/// <summary>
/// Gets header length.
/// </summary>
public ushort HeaderLength
{
get
{
return _headerLength;
}
}
/// <summary>
/// Add a new column to the DBF header.
/// </summary>
/// <param name="oNewCol"></param>
public void AddColumn(DbfColumn oNewCol)
{
//throw exception if the header is locked
if (_locked)
throw new InvalidOperationException("This header is locked and can not be modified. Modifying the header would result in a corrupt DBF file. You can unlock the header by calling UnLock() method.");
//since we are breaking the spec rules about max number of fields, we should at least
//check that the record length stays within a number that can be recorded in the header!
//we have 2 unsigned bytes for record length for a maximum of 65535.
if (_recordLength + oNewCol.Length > 65535)
throw new ArgumentOutOfRangeException("oNewCol", "Unable to add new column. Adding this column puts the record length over the maximum (which is 65535 bytes).");
//add the column
_fields.Add(oNewCol);
//update offset bits, record and header lengths
oNewCol._dataAddress = _recordLength;
_recordLength += oNewCol.Length;
_headerLength += ColumnDescriptorSize;
//clear empty record
_emptyRecord = null;
//set dirty bit
_isDirty = true;
_columnNameIndex = null;
}
/// <summary>
/// Create and add a new column with specified name and type.
/// </summary>
/// <param name="sName"></param>
/// <param name="type"></param>
public void AddColumn(string sName, DbfColumn.DbfColumnType type)
{
AddColumn(new DbfColumn(sName, type));
}
/// <summary>
/// Create and add a new column with specified name, type, length, and decimal precision.
/// </summary>
/// <param name="sName">Field name. Uniqueness is not enforced.</param>
/// <param name="type"></param>
/// <param name="nLength">Length of the field including decimal point and decimal numbers</param>
/// <param name="nDecimals">Number of decimal places to keep.</param>
public void AddColumn(string sName, DbfColumn.DbfColumnType type, int nLength, int nDecimals)
{
AddColumn(new DbfColumn(sName, type, nLength, nDecimals));
}
/// <summary>
/// Remove column from header definition.
/// </summary>
/// <param name="nIndex"></param>
public void RemoveColumn(int nIndex)
{
//throw exception if the header is locked
if (_locked)
throw new InvalidOperationException("This header is locked and can not be modified. Modifying the header would result in a corrupt DBF file. You can unlock the header by calling UnLock() method.");
DbfColumn oColRemove = _fields[nIndex];
_fields.RemoveAt(nIndex);
oColRemove._dataAddress = 0;
_recordLength -= oColRemove.Length;
_headerLength -= ColumnDescriptorSize;
//if you remove a column offset shift for each of the columns
//following the one removed, we need to update those offsets.
int nRemovedColLen = oColRemove.Length;
for (int i = nIndex; i < _fields.Count; i++)
_fields[i]._dataAddress -= nRemovedColLen;
//clear the empty record
_emptyRecord = null;
//set dirty bit
_isDirty = true;
_columnNameIndex = null;
}
/// <summary>
/// Look up a column index by name. NOT Case Sensitive. This is a change from previous behaviour!
/// </summary>
/// <param name="sName"></param>
public DbfColumn this[string sName]
{
get
{
int colIndex = FindColumn(sName);
if (colIndex > -1)
return _fields[colIndex];
return null;
}
}
/// <summary>
/// Returns column at specified index. Index is 0 based.
/// </summary>
/// <param name="nIndex">Zero based index.</param>
/// <returns></returns>
public DbfColumn this[int nIndex]
{
get
{
return _fields[nIndex];
}
}
/// <summary>
/// Finds a column index by using a fast dictionary lookup-- creates column dictionary on first use. Returns -1 if not found. CHANGE: not case sensitive any longer!
/// </summary>
/// <param name="sName">Column name (case insensitive comparison)</param>
/// <returns>column index (0 based) or -1 if not found.</returns>
public int FindColumn(string sName)
{
if (_columnNameIndex == null)
{
_columnNameIndex = new Dictionary<string, int>(_fields.Count);
//create a new index
for (int i = 0; i < _fields.Count; i++)
{
_columnNameIndex.Add(_fields[i].Name.ToUpper(), i);
}
}
int columnIndex;
if (_columnNameIndex.TryGetValue(sName.ToUpper(), out columnIndex))
return columnIndex;
return -1;
}
/// <summary>
/// Returns an empty data record. This is used to clear columns
/// </summary>
/// <remarks>
/// The reason we put this in the header class is because it allows us to use the CDbf4Record class in two ways.
/// 1. we can create one instance of the record and reuse it to write many records quickly clearing the data array by bitblting to it.
/// 2. we can create many instances of the record (a collection of records) and have only one copy of this empty dataset for all of them.
/// If we had put it in the Record class then we would be taking up twice as much space unnecessarily. The empty record also fits the model
/// and everything is neatly encapsulated and safe.
///
/// </remarks>
protected internal byte[] EmptyDataRecord
{
get { return _emptyRecord ?? (_emptyRecord = encoding.GetBytes("".PadLeft(_recordLength, ' ').ToCharArray())); }
}
/// <summary>
/// Returns Number of columns in this dbf header.
/// </summary>
public int ColumnCount
{
get { return _fields.Count; }
}
/// <summary>
/// Size of one record in bytes. All fields + 1 byte delete flag.
/// </summary>
public int RecordLength
{
get
{
return _recordLength;
}
}
/// <summary>
/// Get/Set number of records in the DBF.
/// </summary>
/// <remarks>
/// The reason we allow client to set RecordCount is beause in certain streams
/// like internet streams we can not update record count as we write out records, we have to set it in advance,
/// so client has to be able to modify this property.
/// </remarks>
public uint RecordCount
{
get
{
return _numRecords;
}
set
{
_numRecords = value;
//set the dirty bit
_isDirty = true;
}
}
/// <summary>
/// Get/set whether this header is read only or can be modified. When you create a CDbfRecord
/// object and pass a header to it, CDbfRecord locks the header so that it can not be modified any longer.
/// in order to preserve DBF integrity.
/// </summary>
internal bool Locked
{
get
{
return _locked;
}
set
{
_locked = value;
}
}
/// <summary>
/// Use this method with caution. Headers are locked for a reason, to prevent DBF from becoming corrupt.
/// </summary>
public void Unlock()
{
_locked = false;
}
/// <summary>
/// Returns true when this object is modified after read or write.
/// </summary>
public bool IsDirty
{
get
{
return _isDirty;
}
set
{
_isDirty = value;
}
}
/// <summary>
/// Encoding must be ASCII for this binary writer.
/// </summary>
/// <param name="writer"></param>
/// <remarks>
/// See class remarks for DBF file structure.
/// </remarks>
public void Write(BinaryWriter writer)
{
//write the header
// write the output file type.
writer.Write((byte)_fileType);
//Update date format is YYMMDD, which is different from the column Date type (YYYYDDMM)
writer.Write((byte)(_updateDate.Year - 1900));
writer.Write((byte)_updateDate.Month);
writer.Write((byte)_updateDate.Day);
// write the number of records in the datafile. (32 bit number, little-endian unsigned)
writer.Write(_numRecords);
// write the length of the header structure.
writer.Write(_headerLength);
// write the length of a record
writer.Write((ushort)_recordLength);
// write the reserved bytes in the header
for (int i = 0; i < 20; i++)
writer.Write((byte)0);
// write all of the header records
byte[] byteReserved = new byte[14]; //these are initialized to 0 by default.
foreach (DbfColumn field in _fields)
{
char[] cname = field.Name.PadRight(11, (char)0).ToCharArray();
writer.Write(cname);
// write the field type
writer.Write((char)field.ColumnTypeChar);
// write the field data address, offset from the start of the record.
writer.Write(field.DataAddress);
// write the length of the field.
// if char field is longer than 255 bytes, then we use the decimal field as part of the field length.
if (field.ColumnType == DbfColumn.DbfColumnType.Character && field.Length > 255)
{
//treat decimal count as high byte of field length, this extends char field max to 65535
writer.Write((ushort)field.Length);
}
else
{
// write the length of the field.
writer.Write((byte)field.Length);
// write the decimal count.
writer.Write((byte)field.DecimalCount);
}
// write the reserved bytes.
writer.Write(byteReserved);
}
// write the end of the field definitions marker
writer.Write((byte)0x0D);
writer.Flush();
//clear dirty bit
_isDirty = false;
//lock the header so it can not be modified any longer,
//we could actually postpond this until first record is written!
_locked = true;
}
/// <summary>
/// Read header data, make sure the stream is positioned at the start of the file to read the header otherwise you will get an exception.
/// When this function is done the position will be the first record.
/// </summary>
/// <param name="reader"></param>
public void Read(BinaryReader reader)
{
// type of reader.
int nFileType = reader.ReadByte();
if (nFileType != 0x03)
throw new NotSupportedException("Unsupported DBF reader Type " + nFileType);
// parse the update date information.
int year = (int)reader.ReadByte();
int month = (int)reader.ReadByte();
int day = (int)reader.ReadByte();
_updateDate = new DateTime(year + 1900, month, day);
// read the number of records.
_numRecords = reader.ReadUInt32();
// read the length of the header structure.
_headerLength = reader.ReadUInt16();
// read the length of a record
_recordLength = reader.ReadInt16();
// skip the reserved bytes in the header.
reader.ReadBytes(20);
// calculate the number of Fields in the header
int nNumFields = (_headerLength - FileDescriptorSize) / ColumnDescriptorSize;
//offset from start of record, start at 1 because that's the delete flag.
int nDataOffset = 1;
// read all of the header records
_fields = new List<DbfColumn>(nNumFields);
for (int i = 0; i < nNumFields; i++)
{
// read the field name
char[] buffer = new char[11];
buffer = reader.ReadChars(11);
string sFieldName = new string(buffer);
int nullPoint = sFieldName.IndexOf((char)0);
if (nullPoint != -1)
sFieldName = sFieldName.Substring(0, nullPoint);
//read the field type
char cDbaseType = (char)reader.ReadByte();
// read the field data address, offset from the start of the record.
int nFieldDataAddress = reader.ReadInt32();
//read the field length in bytes
//if field type is char, then read FieldLength and Decimal count as one number to allow char fields to be
//longer than 256 bytes (ASCII char). This is the way Clipper and FoxPro do it, and there is really no downside
//since for char fields decimal count should be zero for other versions that do not support this extended functionality.
//-----------------------------------------------------------------------------------------------------------------------
int nFieldLength = 0;
int nDecimals = 0;
if (cDbaseType == 'C' || cDbaseType == 'c')
{
//treat decimal count as high byte
nFieldLength = (int)reader.ReadUInt16();
}
else
{
//read field length as an unsigned byte.
nFieldLength = (int)reader.ReadByte();
//read decimal count as one byte
nDecimals = (int)reader.ReadByte();
}
//read the reserved bytes.
reader.ReadBytes(14);
//Create and add field to collection
_fields.Add(new DbfColumn(sFieldName, DbfColumn.GetDbaseType(cDbaseType), nFieldLength, nDecimals, nDataOffset));
// add up address information, you can not trust the address recorded in the DBF file...
nDataOffset += nFieldLength;
}
// Last byte is a marker for the end of the field definitions.
reader.ReadBytes(1);
//read any extra header bytes...move to first record
//equivalent to reader.BaseStream.Seek(mHeaderLength, SeekOrigin.Begin) except that we are not using the seek function since
//we need to support streams that can not seek like web connections.
int nExtraReadBytes = _headerLength - (FileDescriptorSize + (ColumnDescriptorSize * _fields.Count));
if (nExtraReadBytes > 0)
reader.ReadBytes(nExtraReadBytes);
//if the stream is not forward-only, calculate number of records using file size,
//sometimes the header does not contain the correct record count
//if we are reading the file from the web, we have to use ReadNext() functions anyway so
//Number of records is not so important and we can trust the DBF to have it stored correctly.
if (reader.BaseStream.CanSeek && _numRecords == 0)
{
//notice here that we subtract file end byte which is supposed to be 0x1A,
//but some DBF files are incorrectly written without this byte, so we round off to nearest integer.
//that gives a correct result with or without ending byte.
if (_recordLength > 0)
_numRecords = (uint)Math.Round(((double)(reader.BaseStream.Length - _headerLength - 1) / _recordLength));
}
//lock header since it was read from a file. we don't want it modified because that would corrupt the file.
//user can override this lock if really necessary by calling UnLock() method.
_locked = true;
//clear dirty bit
_isDirty = false;
}
public object Clone()
{
return this.MemberwiseClone();
}
}
}