# -*- coding: iso-8859-1 -*- """ fixedlenfields.py: reads and writes files with one data set per line with fixed-length fields The design is inspired by module csv in Python 2.3+. written by Michael Ströder Copyright given to Python Software Foundation $Id: fixedlenfields.py,v 1.9 2008/03/03 20:45:05 michael Exp $ Compability: Needs Python 2.2 or above (tested with Python 2.4.2 and Python 2.5.1). """ # Only needed by Python 2.2 from __future__ import generators __version__ = '$Revision: 1.9 $'.split(' ')[1] __all__ = [ 'ReadError', 'InvalidLineTerminator', 'InvalidLineLength', 'ReaderFormat', 'reader' ] import types class ReadError(Exception): """ Base class for errors during reading data """ pass class InvalidLineTerminator(ReadError): """ Line terminator did not match """ pass class InvalidLineLength(ReadError): """ Line length was invalid """ pass class InvalidFieldIndex(ReadError): """ Raised if IndexError was raised during extracting the fields """ pass class ReaderFormat: def __init__(self,linelength,findex,hasheader=False,lineterminator = '\r\n'): """ linelength Integer defining the line's length without trailing line terminator. If None the line is not checked. Otherwise InvalidLineLength is raised if the length of a read line does not match. findex Sequence of tuples defining start index and length of fields in the line. The length of findex is assumed to be the number of fields. If a field is defined with length None the field ends at linelength. hasheader Boolean specifying whether the first row of the table is a header line. Default is False lineterminator The string used to terminate lines in the input file. It defaults to '\r\n'. """ self.linelength = linelength self.hasheader = hasheader self.lineterminator = lineterminator # Pre-calculate all the slice indexes self.findex = [] for i,l in findex: if l is None: self.findex.append((i,None)) else: self.findex.append((i,i+l)) self.findex = tuple(self.findex) class BaseReader: """ Iterator for reading files with fixed length fields """ def __init__(self,fileobj,fmt): """ fileobj file object to read from fmt Instance of class ReaderFormat specifying the input format """ self.fileobj = fileobj self.fmt = fmt self.fieldcount = len(self.fmt.findex) self.lineterminatorlen = len(self.fmt.lineterminator) self.linecounter = 0 if self.fmt.hasheader: # Consume and store header line line = self.fileobj.readline() self.linecounter += 1 self.headers = self._split(line) else: self.headers = None def __iter__(self): return self def _split(self,line): """ Splits the line into fields according to the field format specified. """ # Check whether line ends with pre-defined line terminator if not line.endswith(self.fmt.lineterminator): raise InvalidLineTerminator,"Line %d has invalid line terminator %s" % (self.linecounter,repr(line[-2:])) line = line[:-self.lineterminatorlen] if self.fmt.linelength!=None: # Check whether line length without line terminator is correct if len(line)!=self.fmt.linelength: raise InvalidLineLength,"Line %d has invalid length %d" % (self.linecounter,len(line)) # Split the fields into result tuple try: result = tuple([ line[startpos:endpos] for startpos,endpos in self.fmt.findex ]) except IndexError: raise InvalidFieldIndex,"Line %d caused IndexError for slice [%d:%d]" % (self.linecounter,startpos,endpos) return result # _split() def next(self): """ Grabs the next line from input file object and returns a tuple containing the fields """ line = self.fileobj.readline() self.linecounter += 1 if line: return self._split(line) raise StopIteration # The Reader class to be returned by factory function reader() Reader = BaseReader def reader(fileobj,fmt): """ Returns a Reader object which will iterate over lines in the given fileobj. """ return Reader(fileobj,fmt)