bumping version, improving field type guessing

This commit is contained in:
Binh 2013-02-19 15:55:05 -06:00
parent 730073dcd1
commit b40e736ae0
4 changed files with 49 additions and 29 deletions

View file

@ -12,7 +12,7 @@ class ValidationError(Exception):
return "(%s.%s) %s" % (self.field.parent_name, self.field.name, self.msg) return "(%s.%s) %s" % (self.field.parent_name, self.field.name, self.msg)
else: else:
return repr(self.msg) return repr(self.msg)
class Field(object): class Field(object):
creation_counter = 0 creation_counter = 0
@ -31,13 +31,13 @@ class Field(object):
def get_data(self): def get_data(self):
raise NotImplemented raise NotImplemented
def __setvalue(self, value): def __setvalue(self, value):
self._value = value self._value = value
def __getvalue(self): def __getvalue(self):
return self._value return self._value
value = property(__getvalue, __setvalue) value = property(__getvalue, __setvalue)
def read(self, fp): def read(self, fp):
@ -50,7 +50,7 @@ class Field(object):
self.value = s.strip() self.value = s.strip()
class TextField(Field): class TextField(Field):
def validate(self): def validate(self):
if self.value == None and self.required: if self.value == None and self.required:
raise ValidationError("value required", field=self) raise ValidationError("value required", field=self)
@ -65,7 +65,7 @@ class TextField(Field):
class StateField(TextField): class StateField(TextField):
def __init__(self, name=None, required=True, use_numeric=False): def __init__(self, name=None, required=True, use_numeric=False, max_length=2):
super(StateField, self).__init__(name=name, max_length=2, required=required) super(StateField, self).__init__(name=name, max_length=2, required=required)
self.use_numeric = use_numeric self.use_numeric = use_numeric
@ -90,7 +90,7 @@ class StateField(TextField):
class EmailField(TextField): class EmailField(TextField):
def __init__(self, name=None, required=True, max_length=None): def __init__(self, name=None, required=True, max_length=None):
return super(EmailField, self).__init__(name=name, max_length=max_length, return super(EmailField, self).__init__(name=name, max_length=max_length,
required=required, uppercase=False) required=required, uppercase=False)
class IntegerField(TextField): class IntegerField(TextField):
@ -101,7 +101,7 @@ class IntegerField(TextField):
int(self.value) int(self.value)
except ValueError: except ValueError:
raise ValidationError("field contains non-numeric characters", field=self) raise ValidationError("field contains non-numeric characters", field=self)
def get_data(self): def get_data(self):
value = self.value or "" value = self.value or ""
@ -123,7 +123,7 @@ class StaticField(TextField):
class BlankField(TextField): class BlankField(TextField):
def __init__(self, name=None, max_length=0, required=False): def __init__(self, name=None, max_length=0, required=False):
super(TextField, self).__init__(name=name, max_length=max_length, required=required, uppercase=False) super(TextField, self).__init__(name=name, max_length=max_length, required=required, uppercase=False)
def get_data(self): def get_data(self):
return " " * self.max_length return " " * self.max_length
@ -161,7 +161,7 @@ class MoneyField(Field):
class DateField(TextField): class DateField(TextField):
def __init__(self, name=None, required=True, value=None): def __init__(self, name=None, required=True, value=None):
super(TextField, self).__init__(name=name, required=required, max_length=8) super(TextField, self).__init__(name=name, required=required, max_length=8)
if value: if value:
self.value = value self.value = value
@ -169,7 +169,7 @@ class DateField(TextField):
if self._value: if self._value:
return self._value.strftime('%m%d%Y') return self._value.strftime('%m%d%Y')
return '0' * self.max_length return '0' * self.max_length
def parse(self, s): def parse(self, s):
if int(s) > 0: if int(s) > 0:
self.value = datetime.date(*[int(x) for x in s[4:8], s[0:2], s[2:4]]) self.value = datetime.date(*[int(x) for x in s[4:8], s[0:2], s[2:4]])
@ -184,9 +184,9 @@ class DateField(TextField):
else: else:
self._value = None self._value = None
def __getvalue(self): def __getvalue(self):
return self._value return self._value
value = property(__getvalue, __setvalue) value = property(__getvalue, __setvalue)
@ -196,7 +196,7 @@ class MonthYearField(TextField):
if value: if value:
self.value = value self.value = value
def get_data(self): def get_data(self):
if self._value: if self._value:
return self._value.strftime("%m%Y") return self._value.strftime("%m%Y")
@ -215,9 +215,9 @@ class MonthYearField(TextField):
self._value = datetime.date(*[int(x) for x in value[2:6], value[0:2], 1]) self._value = datetime.date(*[int(x) for x in value[2:6], value[0:2], 1])
else: else:
self._value = None self._value = None
def __getvalue(self): def __getvalue(self):
return self._value return self._value
value = property(__getvalue, __setvalue) value = property(__getvalue, __setvalue)

View file

@ -132,7 +132,7 @@ class RecordBuilder(object):
(fields.BlankField, { (fields.BlankField, {
'regexp': { 'regexp': {
'name': [ 'name': [
re.compile(r'^blank$'), (re.compile(r'^blank$', re.IGNORECASE), +1),
], ],
}, },
}), }),
@ -140,9 +140,13 @@ class RecordBuilder(object):
(fields.MoneyField, { (fields.MoneyField, {
'regexp': { 'regexp': {
'desc': [ 'desc': [
re.compile(r'right\-justified'), (re.compile(r'right\-justif', re.IGNORECASE), +1),
re.compile(r'amount'), (re.compile(r'amount', re.IGNORECASE), +1),
re.compile(r'zero\-filled'), (re.compile(r'zero\-filled', re.IGNORECASE), +1),
(re.compile(r'leading zeroes', re.IGNORECASE), +1),
(re.compile(r'left-\justif', re.IGNORECASE), -1),
], ],
}, },
}), }),
@ -150,7 +154,7 @@ class RecordBuilder(object):
(fields.TextField, { (fields.TextField, {
'regexp': { 'regexp': {
'desc': [ 'desc': [
re.compile(r'enter blanks'), (re.compile(r'blanks', re.IGNORECASE), +1),
], ],
}, },
}), }),
@ -158,12 +162,24 @@ class RecordBuilder(object):
(fields.StateField, { (fields.StateField, {
'regexp': { 'regexp': {
'desc': [ 'desc': [
re.compile(r'state'), (re.compile(r'state', re.IGNORECASE), +1),
re.compile(r'postal'), (re.compile(r'postal', re.IGNORECASE), +1),
], ],
}, },
'length': ['=2'], 'length': ['=2'],
}) }),
(fields.IntegerField, {
'regexp': {
'desc': [
(re.compile(r'right\-justif', re.IGNORECASE), +1),
(re.compile(r'leading zeroes', re.IGNORECASE), +1),
(re.compile(r'number', re.IGNORECASE), +1),
(re.compile(r'left\-justif', re.IGNORECASE), -1),
],
},
}),
] ]
def load(self, entries): def load(self, entries):
@ -233,8 +249,8 @@ class RecordBuilder(object):
if 'regexp' in criteria: if 'regexp' in criteria:
for crit_key, crit_values in criteria['regexp'].items(): for crit_key, crit_values in criteria['regexp'].items():
for crit_re in crit_values: for (crit_re, score) in crit_values:
matches[classtype] += 1 if crit_re.search(entry[crit_key]) else 0 matches[classtype] += score if crit_re.search(entry[crit_key]) else 0
matches = list(matches.items()) matches = list(matches.items())

View file

@ -21,6 +21,8 @@ def generate_imports():
]) ])
def generate_class_begin(name): def generate_class_begin(name):
name = re.sub(r"^[\d]*", "", name)
print name
return "class %s(mode.Model):\n" % name return "class %s(mode.Model):\n" % name
if args.full: if args.full:
@ -67,7 +69,9 @@ for rec in records:
#print last_record_ends_at + 1, begins_at #print last_record_ends_at + 1, begins_at
if last_record_ends_at + 1 != begins_at: if last_record_ends_at + 1 != begins_at:
sys.stdout.write("\nclass %s(object):\n" % re.sub('[^\w]','',rec[0].split(':')[-1])) name = re.sub('^[^a-zA-Z]*','',rec[0].split(':')[-1])
name = re.sub('[^\w]*', '', name)
sys.stdout.write("\nclass %s(object):\n" % name)
for field in builder.load(map(lambda x:x.tuple, rec[1][0:])): for field in builder.load(map(lambda x:x.tuple, rec[1][0:])):
sys.stdout.write('\t' + field + '\n') sys.stdout.write('\t' + field + '\n')

View file

@ -1,6 +1,6 @@
from distutils.core import setup from distutils.core import setup
setup(name='pyaccuwage', setup(name='pyaccuwage',
version='0.2012.0', version='0.2012.1',
packages=['pyaccuwage'], packages=['pyaccuwage'],
scripts=['scripts/pyaccuwage-parse', 'scripts/pyaccuwage-pdfparse'], scripts=['scripts/pyaccuwage-parse', 'scripts/pyaccuwage-pdfparse'],
zip_safe=True, zip_safe=True,