added length checking to field matching criteria for parser
This commit is contained in:
parent
2c9551f677
commit
ad5262e37e
1 changed files with 85 additions and 18 deletions
|
@ -1,7 +1,13 @@
|
|||
#!/usr/bin/python
|
||||
# coding=UTF-8
|
||||
"""
|
||||
Parser utility to read data from Publication 1220 and
|
||||
convert it into python classes.
|
||||
|
||||
"""
|
||||
import re
|
||||
|
||||
|
||||
class SimpleDefParser(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
@ -35,6 +41,36 @@ class SimpleDefParser(object):
|
|||
yield item
|
||||
|
||||
|
||||
class LengthExpression(object):
|
||||
import operator
|
||||
REG = re.compile(r'([^\d]*)(\d+)')
|
||||
OPS = {
|
||||
'<': operator.lt,
|
||||
'>': operator.gt,
|
||||
'<=': operator.le,
|
||||
'>=': operator.ge,
|
||||
'=': operator.eq,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.exp_cache = {}
|
||||
|
||||
def __call__(self, value, exps):
|
||||
return len(exps) == sum(map(lambda x: self.check(value, x), exps))
|
||||
|
||||
def compile_exp(self, exp):
|
||||
op, val = self.REG.match(exp).groups()
|
||||
val = int(val)
|
||||
return (self.OPS.get(op, None), val)
|
||||
|
||||
def check(self, value, exp):
|
||||
if exp not in self.exp_cache:
|
||||
(op, opval) = self.exp_cache[exp] = self.compile_exp(exp)
|
||||
else:
|
||||
(op, opval) = self.exp_cache[exp]
|
||||
return op(value, opval)
|
||||
|
||||
|
||||
class BaseToken(object):
|
||||
regexp = re.compile('(.*)')
|
||||
|
||||
|
@ -85,17 +121,41 @@ class PastedDefParser(object):
|
|||
]
|
||||
|
||||
FIELD_TYPES = [
|
||||
(fields.BlankField, {'name': [
|
||||
re.compile(r'^blank$'),
|
||||
]}),
|
||||
(fields.MoneyField, {'desc': [
|
||||
re.compile(r'right\-justified'),
|
||||
re.compile(r'amount'),
|
||||
re.compile(r'zero\-filled'),
|
||||
]}),
|
||||
(fields.TextField, {'desc': [
|
||||
re.compile(r'enter blanks')
|
||||
]})
|
||||
(fields.BlankField, {
|
||||
'regexp': {
|
||||
'name': [
|
||||
re.compile(r'^blank$'),
|
||||
],
|
||||
},
|
||||
}),
|
||||
|
||||
(fields.MoneyField, {
|
||||
'regexp': {
|
||||
'desc': [
|
||||
re.compile(r'right\-justified'),
|
||||
re.compile(r'amount'),
|
||||
re.compile(r'zero\-filled'),
|
||||
],
|
||||
},
|
||||
}),
|
||||
|
||||
(fields.TextField, {
|
||||
'regexp': {
|
||||
'desc': [
|
||||
re.compile(r'enter blanks'),
|
||||
],
|
||||
},
|
||||
}),
|
||||
|
||||
(fields.StateField, {
|
||||
'regexp': {
|
||||
'desc': [
|
||||
re.compile(r'state'),
|
||||
re.compile(r'postal'),
|
||||
],
|
||||
},
|
||||
'length': ['=2'],
|
||||
})
|
||||
]
|
||||
|
||||
def load(self, infile):
|
||||
|
@ -204,13 +264,20 @@ class PastedDefParser(object):
|
|||
|
||||
|
||||
def _guess_field_types(self, entries):
|
||||
lengthexp = LengthExpression()
|
||||
|
||||
for entry in entries:
|
||||
matches = dict(map(lambda x:(x[0],0), self.FIELD_TYPES))
|
||||
|
||||
for (classtype, criteria) in self.FIELD_TYPES:
|
||||
for crit_key, crit_values in criteria.items():
|
||||
for crit_re in crit_values:
|
||||
matches[classtype] += 1 if crit_re.search(entry[crit_key]) else 0
|
||||
if 'length' in criteria:
|
||||
if not lengthexp(int(entry['length']), criteria['length']):
|
||||
continue
|
||||
|
||||
if 'regexp' in criteria:
|
||||
for crit_key, crit_values in criteria['regexp'].items():
|
||||
for crit_re in crit_values:
|
||||
matches[classtype] += 1 if crit_re.search(entry[crit_key]) else 0
|
||||
|
||||
|
||||
matches = list(matches.items())
|
||||
|
@ -230,10 +297,10 @@ class PastedDefParser(object):
|
|||
|
||||
# FIELD NAME
|
||||
if entry['name'] == 'blank':
|
||||
add('blank%d' % blank_count)
|
||||
add( (u'blank%d' % blank_count).ljust(40) )
|
||||
blank_count += 1
|
||||
else:
|
||||
add(entry['name'])
|
||||
add(entry['name'].ljust(40))
|
||||
|
||||
add(' = ')
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue