bumping version, improving field type guessing
This commit is contained in:
parent
730073dcd1
commit
b40e736ae0
4 changed files with 49 additions and 29 deletions
|
@ -65,7 +65,7 @@ class TextField(Field):
|
||||||
|
|
||||||
|
|
||||||
class StateField(TextField):
|
class StateField(TextField):
|
||||||
def __init__(self, name=None, required=True, use_numeric=False):
|
def __init__(self, name=None, required=True, use_numeric=False, max_length=2):
|
||||||
super(StateField, self).__init__(name=name, max_length=2, required=required)
|
super(StateField, self).__init__(name=name, max_length=2, required=required)
|
||||||
self.use_numeric = use_numeric
|
self.use_numeric = use_numeric
|
||||||
|
|
||||||
|
|
|
@ -132,7 +132,7 @@ class RecordBuilder(object):
|
||||||
(fields.BlankField, {
|
(fields.BlankField, {
|
||||||
'regexp': {
|
'regexp': {
|
||||||
'name': [
|
'name': [
|
||||||
re.compile(r'^blank$'),
|
(re.compile(r'^blank$', re.IGNORECASE), +1),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
@ -140,9 +140,13 @@ class RecordBuilder(object):
|
||||||
(fields.MoneyField, {
|
(fields.MoneyField, {
|
||||||
'regexp': {
|
'regexp': {
|
||||||
'desc': [
|
'desc': [
|
||||||
re.compile(r'right\-justified'),
|
(re.compile(r'right\-justif', re.IGNORECASE), +1),
|
||||||
re.compile(r'amount'),
|
(re.compile(r'amount', re.IGNORECASE), +1),
|
||||||
re.compile(r'zero\-filled'),
|
(re.compile(r'zero\-filled', re.IGNORECASE), +1),
|
||||||
|
(re.compile(r'leading zeroes', re.IGNORECASE), +1),
|
||||||
|
|
||||||
|
(re.compile(r'left-\justif', re.IGNORECASE), -1),
|
||||||
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
@ -150,7 +154,7 @@ class RecordBuilder(object):
|
||||||
(fields.TextField, {
|
(fields.TextField, {
|
||||||
'regexp': {
|
'regexp': {
|
||||||
'desc': [
|
'desc': [
|
||||||
re.compile(r'enter blanks'),
|
(re.compile(r'blanks', re.IGNORECASE), +1),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
@ -158,12 +162,24 @@ class RecordBuilder(object):
|
||||||
(fields.StateField, {
|
(fields.StateField, {
|
||||||
'regexp': {
|
'regexp': {
|
||||||
'desc': [
|
'desc': [
|
||||||
re.compile(r'state'),
|
(re.compile(r'state', re.IGNORECASE), +1),
|
||||||
re.compile(r'postal'),
|
(re.compile(r'postal', re.IGNORECASE), +1),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
'length': ['=2'],
|
'length': ['=2'],
|
||||||
})
|
}),
|
||||||
|
|
||||||
|
(fields.IntegerField, {
|
||||||
|
'regexp': {
|
||||||
|
'desc': [
|
||||||
|
(re.compile(r'right\-justif', re.IGNORECASE), +1),
|
||||||
|
(re.compile(r'leading zeroes', re.IGNORECASE), +1),
|
||||||
|
(re.compile(r'number', re.IGNORECASE), +1),
|
||||||
|
|
||||||
|
(re.compile(r'left\-justif', re.IGNORECASE), -1),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}),
|
||||||
]
|
]
|
||||||
|
|
||||||
def load(self, entries):
|
def load(self, entries):
|
||||||
|
@ -233,8 +249,8 @@ class RecordBuilder(object):
|
||||||
|
|
||||||
if 'regexp' in criteria:
|
if 'regexp' in criteria:
|
||||||
for crit_key, crit_values in criteria['regexp'].items():
|
for crit_key, crit_values in criteria['regexp'].items():
|
||||||
for crit_re in crit_values:
|
for (crit_re, score) in crit_values:
|
||||||
matches[classtype] += 1 if crit_re.search(entry[crit_key]) else 0
|
matches[classtype] += score if crit_re.search(entry[crit_key]) else 0
|
||||||
|
|
||||||
|
|
||||||
matches = list(matches.items())
|
matches = list(matches.items())
|
||||||
|
|
|
@ -21,6 +21,8 @@ def generate_imports():
|
||||||
])
|
])
|
||||||
|
|
||||||
def generate_class_begin(name):
|
def generate_class_begin(name):
|
||||||
|
name = re.sub(r"^[\d]*", "", name)
|
||||||
|
print name
|
||||||
return "class %s(mode.Model):\n" % name
|
return "class %s(mode.Model):\n" % name
|
||||||
|
|
||||||
if args.full:
|
if args.full:
|
||||||
|
@ -67,7 +69,9 @@ for rec in records:
|
||||||
|
|
||||||
#print last_record_ends_at + 1, begins_at
|
#print last_record_ends_at + 1, begins_at
|
||||||
if last_record_ends_at + 1 != begins_at:
|
if last_record_ends_at + 1 != begins_at:
|
||||||
sys.stdout.write("\nclass %s(object):\n" % re.sub('[^\w]','',rec[0].split(':')[-1]))
|
name = re.sub('^[^a-zA-Z]*','',rec[0].split(':')[-1])
|
||||||
|
name = re.sub('[^\w]*', '', name)
|
||||||
|
sys.stdout.write("\nclass %s(object):\n" % name)
|
||||||
|
|
||||||
for field in builder.load(map(lambda x:x.tuple, rec[1][0:])):
|
for field in builder.load(map(lambda x:x.tuple, rec[1][0:])):
|
||||||
sys.stdout.write('\t' + field + '\n')
|
sys.stdout.write('\t' + field + '\n')
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -1,6 +1,6 @@
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
setup(name='pyaccuwage',
|
setup(name='pyaccuwage',
|
||||||
version='0.2012.0',
|
version='0.2012.1',
|
||||||
packages=['pyaccuwage'],
|
packages=['pyaccuwage'],
|
||||||
scripts=['scripts/pyaccuwage-parse', 'scripts/pyaccuwage-pdfparse'],
|
scripts=['scripts/pyaccuwage-parse', 'scripts/pyaccuwage-pdfparse'],
|
||||||
zip_safe=True,
|
zip_safe=True,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue