Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations IamaSherpa on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

export data from Cobol data files 2

Status
Not open for further replies.

mikrom

Programmer
Mar 27, 2002
2,997
SK
I tried to solve (approximatelly) the task described in COBOL thread: Given is the binary file Bp081005.dat ( which contains COBOL data: character, zoned, packed - all that merged in one record of fixed length of 774 bytes. The details are explained in the above thread.

To parse the records I use the experimental method. Therefore my skript has an config file where I first configure the field types and their length and then I run the script. If I then see that the parsing gives something usable in result I accept the field definition, else I need to change some adjustments. All that is very time consuming, but it brings results in end effect.
The scripts displays the screen output and exports data to the CSV-output file Bp081005.dat.csv

Hier is the config file cbl_data.cfg
Code:
# The definition of every field has a form:
#
# <Field DataType>:<Field Length>
#
# where 
# <Field DataType> can be:
#   X - Character Field
#   Z - Zoned Decimal (Unpacked)
#   P - Packed Decimal
# 
# <Field Length> is number of characters or numebr of digits 
# (But not number of Bytes !!!)
# If the field is Packed Decimal then the length in bytes will 
# be computed automatically by the program

x:10    # character field - probably key
z:226   # unpacked numeric
z:3
z:3
z:12
p:11
p:11
p:11
p:11
p:11
p:11
z:2
z:3
z:3
z:12
p:11
p:11
p:11
p:11
p:11
p:11
z:20
p:11
p:11
p:11
p:11
p:11
p:11
z:20
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11
z:12
z:12
z:40
p:11
p:11
p:11
p:11
p:11
p:11
p:11
z:96
z:30
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11
p:11

and here is the python program cbl_data.py which does the work
Code:
import sys, string, array
# The functions zoned2num() and packed2num() are taken from Carey Evans
# ([URL unfurl="true"]http://mail.python.org/pipermail/python-list/2000-April/031051.html)[/URL]
def zoned2num(z):
  a = array.array('B', z)
  v = 0L

  for i in a:
    v = (v * 10) + (i & 0xf)

  if (a[-1] & 0xf0) == 0xd0:
    v = -v

  return v

def packed2num(p):
  a = array.array('B', p)
  v = 0L

  for i in a[:-1]:
    v = (v * 100) + (((i & 0xf0) >> 4) * 10) + (i & 0xf)

  i = a[-1]
  v = (v * 10) + ((i & 0xf0) >> 4)
  if (i & 0xf) == 0xd:
    v = -v

  return v


def create_field_list(config_file):
  ''returns list of fields definition from the config file'' 
  config_list = []
  nr_lines = 0
  for line in open(config_file):
    nr_lines += 1
    cfg_line = line
    # remove comments from line
    if '#' in cfg_line:
      cfg_line=cfg_line[:cfg_line.index('#')]
    # strip blanks from both ends
    cfg_line = cfg_line.strip()
    # process not empty lines
    if len(cfg_line) > 0:
      if ':' in cfg_line:
	# to uppercase
	cfg_line=cfg_line.upper()
	# split into a list
        cfg_line_list=cfg_line.split(':')
	if cfg_line_list[1].isdigit():
          cfg_line_list[1] = string.atoi(cfg_line_list[1])
	  # compute the length of Packed Decimal (COMP-3)
	  if cfg_line_list[0]=='P':
	    cfg_line_list[1]=cfg_line_list[1]/2 + 1	  
	  #print cfg_line_list
	  config_list.append(cfg_line_list)
        else:
          print "Error in config line %d:" % nr_lines
	  print "'%s'" % cfg_line
	  print "Data type length is not numeric !"
	  sys.exit()
      else:
	print "Error in config line %d:" % nr_lines
        print "'%s'" % cfg_line
	print "Line should have a form <DataType>:<length> !"
	sys.exit()
  # return list of fields
  return config_list

def parse_cbl_data(cbl_data_file, 
		   record_size, nr_records, field_list, 
		   format_packed=1, dbg_mode=1):
  ''Parsing data from cbl_data_file according to the fields 
     definition in field_list
     dbg_mode = 0 without debug info
              = 1 print debug info
     format_packed = 0 print digits in packed fields as necoded
                   = 1 print packed fields as numbers with 2 decimals''    
  # open input file in binyry mode for reading
  f=open(cbl_data_file,"rb")
  # open output file for writing
  csv_out_file=open(cbl_data_file+".csv","w")
  # process records
  nr_rec=0
  while (nr_rec < nr_records):
    nr_rec += 1
    rec = f.read(record_size)
    print "* Processing %0004d record " % nr_rec
    if dbg_mode:
      print repr(rec)
    start_byte=0
    end_byte = 0
    total_bytes =0
    # parse fields
    nr_fld=0
    data_line_list=[]
    for field in field_list:
      nr_fld += 1	    
      #print field
      total_bytes += field[1]
      end_byte = total_bytes
      #print "%d - %d" % (start_byte,end_byte)
      field_data=rec[start_byte:end_byte]      
      if dbg_mode:
        print repr(field_data)
	#ar=array.array('B', field_data)
        #print ar
      if field[0]=='X':
	fld_data=repr(field_data)
	fld_data=field_data
      elif field[0]=='Z':
        fld_data_num=zoned2num(field_data)
	# convert to string
        fld_data=str(fld_data_num).strip()
      elif field[0]=='P':
        fld_data_num=packed2num(field_data)
	# convert to string
        fld_data=str(fld_data_num).strip()
	if format_packed:
	  if fld_data_num == 0:
	    fld_data = '0.00'
          else:
	    fld_data = fld_data[:-2]+'.'+fld_data[-2:]
      print fld_data
      data_line_list.append(fld_data)
      start_byte=end_byte
    # write data_line in a result file:
    data_line=string.join(data_line_list,';')+"\n"
    if dbg_mode:
      print data_line_list
      print data_line
    csv_out_file.write(data_line)
  #
  print "number of records processed = %d" % nr_records
  print "number of fields in record  = %d" % len(field_list)
  print "bytes in fields = %d / bytes in record = %d"      % (total_bytes,record_size)
  if total_bytes > record_size:
    print "The record is not correct divided into fields !"
  # close all files
  f.close()
  csv_out_file.close()

# Main program
if __name__ == "__main__":
  #print packed2num('\x12\x34\x5f')
  #print zoned2num('\xf1\xf2\xd3')
  #print zoned2num('\x01\x02\x03')
  field_list = create_field_list('cbl_data.cfg') 
  # record_size=774
  # nr_records=277
  parse_cbl_data('Bp081005.dat',774,277,field_list,dbg_mode=0)
The results from the CSV-file (encoded COBOL data) looks like:
Code:
ABB       	0	600	  0	 500 000 000 000    	982.00	300.00	65.00
ENERGO    	0	500	500	 300 000 000 000    	960.00	270.00	39.00
          	2	404	803	 430 000 000 000    	28003.00	2730.00	2760.30
WASA      	0	100	300	 300 000 000 000    	540.00	0.00	39.00
KWB KONIN 	0	400	400	 400 000 000 000    	480.00	0.00	13.00
UNICO     	0	500	500	 500 000 000 000    	1660.00	420.00	195.20
ELEKTROWN 	0	500	700	 400 000 000 000    	930.00	0.00	52.00
INST.CHEM.	0	100	100	 100 000 000 000    	160.00	0.00	0.00
GMINA     	0	401	300	 880 000 000 000    	120.00	0.00	26.00
BIOENERGIA	0	500	500	 500 000 000 000    	900.00	0.00	1434.90
DPK       	0	700	900	 390 000 000 000    	750.00	270.00	26.00
ECOLAB    	0	100	100	 100 000 000 000    	180.00	0.00	13.00
PRUF      	0	800	200	               -    	2895.00	750.00	0.00
ASKOM     	0	300	300	 300 000 000 000    	380.00	30.00	26.00
FOSTER    	0	  0	  0	               -    	0.00	0.00	39.00
KLIMATECH 	0	  0	  0	               -    	0.00	0.00	0.00
STAL-KRAFT	0	100	100	 100 000 000 000    	180.00	0.00	13.00
 
There doesn't seem to be a question here. Maybe this should be a FAQ entry.

_________________
Bob Rashkin
 
As I look at the python source I posted yesterday, I see that the code is unusable - it will not work, because the indentation, which is a "feature" of Python is corruped, and the docstrings delimiters are not correct. I don't know why, I probably mixed tabs and spaces in the source.
 
Here is the corrected source of cbl_data.py, I hope now the indentation will be displayed correctly:
Code:
import sys, string, array
# The functions zoned2num() and packed2num() are taken from Carey Evans
# ([URL unfurl="true"]http://mail.python.org/pipermail/python-list/2000-April/031051.html)[/URL]
def zoned2num(z):
  a = array.array('B', z)
  v = 0L

  for i in a:
    v = (v * 10) + (i & 0xf)

  if (a[-1] & 0xf0) == 0xd0:
    v = -v

  return v

def packed2num(p):
  a = array.array('B', p)
  v = 0L

  for i in a[:-1]:
    v = (v * 100) + (((i & 0xf0) >> 4) * 10) + (i & 0xf)

  i = a[-1]
  v = (v * 10) + ((i & 0xf0) >> 4)
  if (i & 0xf) == 0xd:
    v = -v

  return v


def create_field_list(config_file):
  """returns list of fields definition from the config file""" 
  config_list = []
  nr_lines = 0
  for line in open(config_file):
    nr_lines += 1
    cfg_line = line
    # remove comments from line
    if '#' in cfg_line:
      cfg_line=cfg_line[:cfg_line.index('#')]
    # strip blanks from both ends
    cfg_line = cfg_line.strip()
    # process not empty lines
    if len(cfg_line) > 0:
      if ':' in cfg_line:
        # to uppercase
        cfg_line=cfg_line.upper()
        # split into a list
        cfg_line_list=cfg_line.split(':')
        if cfg_line_list[1].isdigit():
          cfg_line_list[1] = string.atoi(cfg_line_list[1])
          # compute the length of Packed Decimal (COMP-3)
          if cfg_line_list[0]=='P':
            cfg_line_list[1]=cfg_line_list[1]/2 + 1	  
          #print cfg_line_list
          config_list.append(cfg_line_list)
        else:
          print "Error in config line %d:" % nr_lines
          print "'%s'" % cfg_line
          print "Data type length is not numeric !"
          sys.exit()
      else:
        print "Error in config line %d:" % nr_lines
        print "'%s'" % cfg_line
        print "Line should have a form <DataType>:<length> !"
        sys.exit()
  # return list of fields
  return config_list

def parse_cbl_data(cbl_data_file, 
                   record_size, nr_records, field_list, 
                   format_packed=1, dbg_mode=1):
  """Parsing data from cbl_data_file according to the fields 
     definition in field_list
     dbg_mode = 0 without debug info
              = 1 print debug info
     format_packed = 0 print digits in packed fields as necoded
                   = 1 print packed fields as numbers with 2 decimals"""    
  # open input file in binyry mode for reading
  f=open(cbl_data_file,"rb")
  # open output file for writing
  csv_out_file=open(cbl_data_file+".csv","w")
  # process records
  nr_rec=0
  while (nr_rec < nr_records):
    nr_rec += 1
    rec = f.read(record_size)
    print "* Processing %0004d record " % nr_rec
    if dbg_mode:
      print repr(rec)
    start_byte=0
    end_byte = 0
    total_bytes =0
    # parse fields
    nr_fld=0
    data_line_list=[]
    for field in field_list:
      nr_fld += 1	    
      #print field
      total_bytes += field[1]
      end_byte = total_bytes
      #print "%d - %d" % (start_byte,end_byte)
      field_data=rec[start_byte:end_byte]      
      if dbg_mode:
        print repr(field_data)
        #ar=array.array('B', field_data)
        #print ar
      if field[0]=='X':
        fld_data=repr(field_data)
        fld_data=field_data
      elif field[0]=='Z':
        fld_data_num=zoned2num(field_data)
        # convert to string
        fld_data=str(fld_data_num).strip()
      elif field[0]=='P':
        fld_data_num=packed2num(field_data)
        # convert to string
        fld_data=str(fld_data_num).strip()
        if format_packed:
          if fld_data_num == 0:
            fld_data = '0.00'
          else:
            fld_data = fld_data[:-2]+'.'+fld_data[-2:]
      print fld_data
      data_line_list.append(fld_data)
      start_byte=end_byte
    # write data_line in a result file:
    data_line=string.join(data_line_list,';')+"\n"
    if dbg_mode:
      print data_line_list
      print data_line
    csv_out_file.write(data_line)
  #
  print "number of records processed = %d" % nr_records
  print "number of fields in record  = %d" % len(field_list)
  print "bytes in fields = %d / bytes in record = %d"\
        % (total_bytes,record_size)
  if total_bytes > record_size:
    print "The record is not correct divided into fields !"
  # close all files
  f.close()
  csv_out_file.close()

# Main program
if __name__ == "__main__":
  #print packed2num('\x12\x34\x5f')
  #print zoned2num('\xf1\xf2\xd3')
  #print zoned2num('\x01\x02\x03')
  field_list = create_field_list('cbl_data.cfg') 
  # record_size=774
  # nr_records=277
  parse_cbl_data('Bp081005.dat',774,277,field_list,dbg_mode=0)
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top