/// /// Author: Ahmed Lacevic /// Date: 12/1/2007 /// Desc: /// /// Revision History: /// ----------------------------------- /// Author: /// Date: /// Desc: using System; using System.Collections.Generic; using System.Text; using System.IO; namespace SocialExplorer.IO.FastDBF { ///

/// This class represents a DBF IV file header. ///

/// /// /// DBF files are really wasteful on space but this legacy format lives on because it's really really simple. /// It lacks much in features though. /// /// /// Thanks to Erik Bachmann for providing the DBF file structure information!! /// http://www.clicketyclick.dk/databases/xbase/format/dbf.html /// /// _______________________ _______ /// 00h / 0| Version number *1| ^ /// |-----------------------| | /// 01h / 1| Date of last update | | /// 02h / 2| YYMMDD *21| | /// 03h / 3| *14| | /// |-----------------------| | /// 04h / 4| Number of records | Record /// 05h / 5| in data file | header /// 06h / 6| ( 32 bits ) *14| | /// 07h / 7| | | /// |-----------------------| | /// 08h / 8| Length of header *14| | /// 09h / 9| structure ( 16 bits ) | | /// |-----------------------| | /// 0Ah / 10| Length of each record | | /// 0Bh / 11| ( 16 bits ) *2 *14| | /// |-----------------------| | /// 0Ch / 12| ( Reserved ) *3| | /// 0Dh / 13| | | /// |-----------------------| | /// 0Eh / 14| Incomplete transac.*12| | /// |-----------------------| | /// 0Fh / 15| Encryption flag *13| | /// |-----------------------| | /// 10h / 16| Free record thread | | /// 11h / 17| (reserved for LAN | | /// 12h / 18| only ) | | /// 13h / 19| | | /// |-----------------------| | /// 14h / 20| ( Reserved for | | _ |=======================| ______ /// | multi-user dBASE ) | | / 00h / 0| Field name in ASCII | ^ /// : ( dBASE III+ - ) : | / : (terminated by 00h) : | /// : : | | | | | /// 1Bh / 27| | | | 0Ah / 10| | | /// |-----------------------| | | |-----------------------| For /// 1Ch / 28| MDX flag (dBASE IV)*14| | | 0Bh / 11| Field type (ASCII) *20| each /// |-----------------------| | | |-----------------------| field /// 1Dh / 29| Language driver *5| | / 0Ch / 12| Field data address | | /// |-----------------------| | / | *6| | /// 1Eh / 30| ( Reserved ) | | / | (in memory !!!) | | /// 1Fh / 31| *3| | / 0Fh / 15| (dBASE III+) | | /// |=======================|__|____/ |-----------------------| | - /// 20h / 32| | | ^ 10h / 16| Field length *22| | | /// |- - - - - - - - - - - -| | | |-----------------------| | | *7 /// | *19| | | 11h / 17| Decimal count *23| | | /// |- - - - - - - - - - - -| | Field |-----------------------| | - /// | | | Descriptor 12h / 18| ( Reserved for | | /// :. . . . . . . . . . . .: | |array 13h / 19| multi-user dBASE)*18| | /// : : | | |-----------------------| | /// n | |__|__v_ 14h / 20| Work area ID *16| | /// |-----------------------| | \ |-----------------------| | /// n+1| Terminator (0Dh) | | \ 15h / 21| ( Reserved for | | /// |=======================| | \ 16h / 22| multi-user dBASE ) | | /// m | Database Container | | \ |-----------------------| | /// : *15: | \ 17h / 23| Flag for SET FIELDS | | /// : : | | |-----------------------| | /// / m+263 | | | 18h / 24| ( Reserved ) | | /// |=======================|__v_ ___ | : : | /// : : ^ | : : | /// : : | | : : | /// : : | | 1Eh / 30| | | /// | Record structure | | | |-----------------------| | /// | | | \ 1Fh / 31| Index field flag *8| | /// | | | \_ |=======================| _v_____ /// | | Records /// |-----------------------| | /// | | | _ |=======================| _______ /// | | | / 00h / 0| Record deleted flag *9| ^ /// | | | / |-----------------------| | /// | | | / | Data *10| One /// | | | / : (ASCII) *17: record /// | |____|_____/ | | | /// : : | | | _v_____ /// : :____|_____ |=======================| /// : : | /// | | | /// | | | /// | | | /// | | | /// | | | /// |=======================| | /// |__End_of_File__________| ___v____ End of file ( 1Ah ) *11 /// /// public class DbfHeader : ICloneable { ///

/// Header file descriptor size is 33 bytes (32 bytes + 1 terminator byte), followed by column metadata which is 32 bytes each. ///

public const int FileDescriptorSize = 33; ///

/// Field or DBF Column descriptor is 32 bytes long. ///

public const int ColumnDescriptorSize = 32; //type of the file, must be 03h private const int _fileType = 0x03; //Date the file was last updated. private DateTime _updateDate; //Number of records in the datafile, 32bit little-endian, unsigned private uint _numRecords = 0; //Length of the header structure private ushort _headerLength = FileDescriptorSize; //empty header is 33 bytes long. Each column adds 32 bytes. //Length of the records, ushort - unsigned 16 bit integer private int _recordLength = 1; //start with 1 because the first byte is a delete flag //DBF fields/columns internal List _fields = new List(); //indicates whether header columns can be modified! bool _locked = false; //keeps column name index for the header, must clear when header columns change. private Dictionary _columnNameIndex = null; ///

/// When object is modified dirty flag is set. ///

bool _isDirty = false; ///

/// mEmptyRecord is an array used to clear record data in CDbf4Record. /// This is shared by all record objects, used to speed up clearing fields or entire record. /// ///

private byte[] _emptyRecord = null; public readonly Encoding encoding = Encoding.ASCII; [Obsolete] public DbfHeader() { } public DbfHeader(Encoding encoding) { this.encoding = encoding; } ///

/// Specify initial column capacity. ///

/// public DbfHeader(int nFieldCapacity) { _fields = new List(nFieldCapacity); } ///

/// Gets header length. ///

public ushort HeaderLength { get { return _headerLength; } } ///

/// Add a new column to the DBF header. ///

/// public void AddColumn(DbfColumn oNewCol) { //throw exception if the header is locked if (_locked) throw new InvalidOperationException("This header is locked and can not be modified. Modifying the header would result in a corrupt DBF file. You can unlock the header by calling UnLock() method."); //since we are breaking the spec rules about max number of fields, we should at least //check that the record length stays within a number that can be recorded in the header! //we have 2 unsigned bytes for record length for a maximum of 65535. if (_recordLength + oNewCol.Length > 65535) throw new ArgumentOutOfRangeException("oNewCol", "Unable to add new column. Adding this column puts the record length over the maximum (which is 65535 bytes)."); //add the column _fields.Add(oNewCol); //update offset bits, record and header lengths oNewCol._dataAddress = _recordLength; _recordLength += oNewCol.Length; _headerLength += ColumnDescriptorSize; //clear empty record _emptyRecord = null; //set dirty bit _isDirty = true; _columnNameIndex = null; } ///

/// Create and add a new column with specified name and type. ///

/// /// public void AddColumn(string sName, DbfColumn.DbfColumnType type) { AddColumn(new DbfColumn(sName, type)); } ///

/// Create and add a new column with specified name, type, length, and decimal precision. ///

/// Field name. Uniqueness is not enforced. /// /// Length of the field including decimal point and decimal numbers /// Number of decimal places to keep. public void AddColumn(string sName, DbfColumn.DbfColumnType type, int nLength, int nDecimals) { AddColumn(new DbfColumn(sName, type, nLength, nDecimals)); } ///

/// Remove column from header definition. ///

/// public void RemoveColumn(int nIndex) { //throw exception if the header is locked if (_locked) throw new InvalidOperationException("This header is locked and can not be modified. Modifying the header would result in a corrupt DBF file. You can unlock the header by calling UnLock() method."); DbfColumn oColRemove = _fields[nIndex]; _fields.RemoveAt(nIndex); oColRemove._dataAddress = 0; _recordLength -= oColRemove.Length; _headerLength -= ColumnDescriptorSize; //if you remove a column offset shift for each of the columns //following the one removed, we need to update those offsets. int nRemovedColLen = oColRemove.Length; for (int i = nIndex; i < _fields.Count; i++) _fields[i]._dataAddress -= nRemovedColLen; //clear the empty record _emptyRecord = null; //set dirty bit _isDirty = true; _columnNameIndex = null; } ///

/// Look up a column index by name. NOT Case Sensitive. This is a change from previous behaviour! ///

/// public DbfColumn this[string sName] { get { int colIndex = FindColumn(sName); if (colIndex > -1) return _fields[colIndex]; return null; } } ///

/// Returns column at specified index. Index is 0 based. ///

/// Zero based index. /// public DbfColumn this[int nIndex] { get { return _fields[nIndex]; } } ///

/// Finds a column index by using a fast dictionary lookup-- creates column dictionary on first use. Returns -1 if not found. CHANGE: not case sensitive any longer! ///

/// Column name (case insensitive comparison) /// column index (0 based) or -1 if not found. public int FindColumn(string sName) { if (_columnNameIndex == null) { _columnNameIndex = new Dictionary(_fields.Count); //create a new index for (int i = 0; i < _fields.Count; i++) { _columnNameIndex.Add(_fields[i].Name.ToUpper(), i); } } int columnIndex; if (_columnNameIndex.TryGetValue(sName.ToUpper(), out columnIndex)) return columnIndex; return -1; } ///

/// Returns an empty data record. This is used to clear columns ///

/// /// The reason we put this in the header class is because it allows us to use the CDbf4Record class in two ways. /// 1. we can create one instance of the record and reuse it to write many records quickly clearing the data array by bitblting to it. /// 2. we can create many instances of the record (a collection of records) and have only one copy of this empty dataset for all of them. /// If we had put it in the Record class then we would be taking up twice as much space unnecessarily. The empty record also fits the model /// and everything is neatly encapsulated and safe. /// /// protected internal byte[] EmptyDataRecord { get { return _emptyRecord ?? (_emptyRecord = encoding.GetBytes("".PadLeft(_recordLength, ' ').ToCharArray())); } } ///

/// Returns Number of columns in this dbf header. ///

public int ColumnCount { get { return _fields.Count; } } ///

/// Size of one record in bytes. All fields + 1 byte delete flag. ///

public int RecordLength { get { return _recordLength; } } ///

/// Get/Set number of records in the DBF. ///

/// /// The reason we allow client to set RecordCount is beause in certain streams /// like internet streams we can not update record count as we write out records, we have to set it in advance, /// so client has to be able to modify this property. /// public uint RecordCount { get { return _numRecords; } set { _numRecords = value; //set the dirty bit _isDirty = true; } } ///

/// Get/set whether this header is read only or can be modified. When you create a CDbfRecord /// object and pass a header to it, CDbfRecord locks the header so that it can not be modified any longer. /// in order to preserve DBF integrity. ///

internal bool Locked { get { return _locked; } set { _locked = value; } } ///

/// Use this method with caution. Headers are locked for a reason, to prevent DBF from becoming corrupt. ///

public void Unlock() { _locked = false; } ///

/// Returns true when this object is modified after read or write. ///

public bool IsDirty { get { return _isDirty; } set { _isDirty = value; } } ///

/// Encoding must be ASCII for this binary writer. ///

/// /// /// See class remarks for DBF file structure. /// public void Write(BinaryWriter writer) { //write the header // write the output file type. writer.Write((byte)_fileType); //Update date format is YYMMDD, which is different from the column Date type (YYYYDDMM) writer.Write((byte)(_updateDate.Year - 1900)); writer.Write((byte)_updateDate.Month); writer.Write((byte)_updateDate.Day); // write the number of records in the datafile. (32 bit number, little-endian unsigned) writer.Write(_numRecords); // write the length of the header structure. writer.Write(_headerLength); // write the length of a record writer.Write((ushort)_recordLength); // write the reserved bytes in the header for (int i = 0; i < 20; i++) writer.Write((byte)0); // write all of the header records byte[] byteReserved = new byte[14]; //these are initialized to 0 by default. foreach (DbfColumn field in _fields) { char[] cname = field.Name.PadRight(11, (char)0).ToCharArray(); writer.Write(cname); // write the field type writer.Write((char)field.ColumnTypeChar); // write the field data address, offset from the start of the record. writer.Write(field.DataAddress); // write the length of the field. // if char field is longer than 255 bytes, then we use the decimal field as part of the field length. if (field.ColumnType == DbfColumn.DbfColumnType.Character && field.Length > 255) { //treat decimal count as high byte of field length, this extends char field max to 65535 writer.Write((ushort)field.Length); } else { // write the length of the field. writer.Write((byte)field.Length); // write the decimal count. writer.Write((byte)field.DecimalCount); } // write the reserved bytes. writer.Write(byteReserved); } // write the end of the field definitions marker writer.Write((byte)0x0D); writer.Flush(); //clear dirty bit _isDirty = false; //lock the header so it can not be modified any longer, //we could actually postpond this until first record is written! _locked = true; } ///

/// Read header data, make sure the stream is positioned at the start of the file to read the header otherwise you will get an exception. /// When this function is done the position will be the first record. ///

/// public void Read(BinaryReader reader) { // type of reader. int nFileType = reader.ReadByte(); if (nFileType != 0x03) throw new NotSupportedException("Unsupported DBF reader Type " + nFileType); // parse the update date information. int year = (int)reader.ReadByte(); int month = (int)reader.ReadByte(); int day = (int)reader.ReadByte(); _updateDate = new DateTime(year + 1900, month, day); // read the number of records. _numRecords = reader.ReadUInt32(); // read the length of the header structure. _headerLength = reader.ReadUInt16(); // read the length of a record _recordLength = reader.ReadInt16(); // skip the reserved bytes in the header. reader.ReadBytes(20); // calculate the number of Fields in the header int nNumFields = (_headerLength - FileDescriptorSize) / ColumnDescriptorSize; //offset from start of record, start at 1 because that's the delete flag. int nDataOffset = 1; // read all of the header records _fields = new List(nNumFields); for (int i = 0; i < nNumFields; i++) { // read the field name char[] buffer = new char[11]; buffer = reader.ReadChars(11); string sFieldName = new string(buffer); int nullPoint = sFieldName.IndexOf((char)0); if (nullPoint != -1) sFieldName = sFieldName.Substring(0, nullPoint); //read the field type char cDbaseType = (char)reader.ReadByte(); // read the field data address, offset from the start of the record. int nFieldDataAddress = reader.ReadInt32(); //read the field length in bytes //if field type is char, then read FieldLength and Decimal count as one number to allow char fields to be //longer than 256 bytes (ASCII char). This is the way Clipper and FoxPro do it, and there is really no downside //since for char fields decimal count should be zero for other versions that do not support this extended functionality. //----------------------------------------------------------------------------------------------------------------------- int nFieldLength = 0; int nDecimals = 0; if (cDbaseType == 'C' || cDbaseType == 'c') { //treat decimal count as high byte nFieldLength = (int)reader.ReadUInt16(); } else { //read field length as an unsigned byte. nFieldLength = (int)reader.ReadByte(); //read decimal count as one byte nDecimals = (int)reader.ReadByte(); } //read the reserved bytes. reader.ReadBytes(14); //Create and add field to collection _fields.Add(new DbfColumn(sFieldName, DbfColumn.GetDbaseType(cDbaseType), nFieldLength, nDecimals, nDataOffset)); // add up address information, you can not trust the address recorded in the DBF file... nDataOffset += nFieldLength; } // Last byte is a marker for the end of the field definitions. reader.ReadBytes(1); //read any extra header bytes...move to first record //equivalent to reader.BaseStream.Seek(mHeaderLength, SeekOrigin.Begin) except that we are not using the seek function since //we need to support streams that can not seek like web connections. int nExtraReadBytes = _headerLength - (FileDescriptorSize + (ColumnDescriptorSize * _fields.Count)); if (nExtraReadBytes > 0) reader.ReadBytes(nExtraReadBytes); //if the stream is not forward-only, calculate number of records using file size, //sometimes the header does not contain the correct record count //if we are reading the file from the web, we have to use ReadNext() functions anyway so //Number of records is not so important and we can trust the DBF to have it stored correctly. if (reader.BaseStream.CanSeek && _numRecords == 0) { //notice here that we subtract file end byte which is supposed to be 0x1A, //but some DBF files are incorrectly written without this byte, so we round off to nearest integer. //that gives a correct result with or without ending byte. if (_recordLength > 0) _numRecords = (uint)Math.Round(((double)(reader.BaseStream.Length - _headerLength - 1) / _recordLength)); } //lock header since it was read from a file. we don't want it modified because that would corrupt the file. //user can override this lock if really necessary by calling UnLock() method. _locked = true; //clear dirty bit _isDirty = false; } public object Clone() { return this.MemberwiseClone(); } } }