From 7ddcfcc1c3bf55af61cacbeac6862691f6061785 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Sun, 27 Jan 2019 10:36:37 -0600
Subject: [PATCH 01/20] clean up some indent

---
 pyaccuwage/enums.py  | 649 ++++++++++++++++++++++---------------------
 pyaccuwage/fields.py |  12 +-
 pyaccuwage/model.py  |   5 -
 pyaccuwage/parser.py |   5 +-
 4 files changed, 331 insertions(+), 340 deletions(-)

diff --git a/pyaccuwage/enums.py b/pyaccuwage/enums.py
index b5c3c09..8c96ebf 100644
--- a/pyaccuwage/enums.py
+++ b/pyaccuwage/enums.py
@@ -1,338 +1,339 @@
 state_postal_numeric = {
-    'AL': 1,
-    'AK': 2,
-    'AZ': 4,
-    'AR': 5,
-    'CA': 6,
-    'CO': 8,
-    'CT': 9,
-    'DE': 10,
-    'DC': 11,
-    'FL': 12,
-    'GA': 13,
-    'HI': 15,
-    'ID': 16,
-    'IL': 17,
-    'IN': 18,
-    'IA': 19,
-    'KS': 20,
-    'KY': 21,
-    'LA': 22,
-    'ME': 23,
-    'MD': 24,
-    'MA': 25,
-    'MI': 26,
-    'MN': 27,
-    'MS': 28,
-    'MO': 29,
-    'MT': 30,
-    'NE': 31,
-    'NV': 32,
-    'NH': 33,
-    'NJ': 34,
-    'NM': 35,
-    'NY': 36,
-    'NC': 37,
-    'ND': 38,
-    'OH': 39,
-    'OK': 40,
-    'OR': 41,
-    'PA': 42,
-    'RI': 44,
-    'SC': 45,
-    'SD': 46,
-    'TN': 47,
-    'TX': 48,
-    'UT': 49,
-    'VT': 50,
-    'VA': 51,
-    'WA': 53,
-    'WV': 54,
-    'WI': 55,
-    'WY': 56,
+        'AL': 1,
+        'AK': 2,
+        'AZ': 4,
+        'AR': 5,
+        'CA': 6,
+        'CO': 8,
+        'CT': 9,
+        'DE': 10,
+        'DC': 11,
+        'FL': 12,
+        'GA': 13,
+        'HI': 15,
+        'ID': 16,
+        'IL': 17,
+        'IN': 18,
+        'IA': 19,
+        'KS': 20,
+        'KY': 21,
+        'LA': 22,
+        'ME': 23,
+        'MD': 24,
+        'MA': 25,
+        'MI': 26,
+        'MN': 27,
+        'MS': 28,
+        'MO': 29,
+        'MT': 30,
+        'NE': 31,
+        'NV': 32,
+        'NH': 33,
+        'NJ': 34,
+        'NM': 35,
+        'NY': 36,
+        'NC': 37,
+        'ND': 38,
+        'OH': 39,
+        'OK': 40,
+        'OR': 41,
+        'PA': 42,
+        'RI': 44,
+        'SC': 45,
+        'SD': 46,
+        'TN': 47,
+        'TX': 48,
+        'UT': 49,
+        'VT': 50,
+        'VA': 51,
+        'WA': 53,
+        'WV': 54,
+        'WI': 55,
+        'WY': 56,
 }
 
 countries = (
- ('AF', 'Afghanistan'),
- ('AX', 'Aland Islands'),
- ('AL', 'Albania'),
- ('DZ', 'Algeria'),
- ('AS', 'American Samoa'),
- ('AD', 'Andorra'),
- ('AO', 'Angola'),
- ('AI', 'Anguilla'),
- ('AQ', 'Antarctica'),
- ('AG', 'Antigua and Barbuda'),
- ('AR', 'Argentina'),
- ('AM', 'Armenia'),
- ('AW', 'Aruba'),
- ('AU', 'Australia'),
- ('AT', 'Austria'),
- ('AZ', 'Azerbaijan'),
- ('BS', 'Bahamas'),
- ('BH', 'Bahrain'),
- ('BD', 'Bangladesh'),
- ('BB', 'Barbados'),
- ('BY', 'Belarus'),
- ('BE', 'Belgium'),
- ('BZ', 'Belize'),
- ('BJ', 'Benin'),
- ('BM', 'Bermuda'),
- ('BT', 'Bhutan'),
- ('BO', 'Bolivia, Plurinational State of'),
- ('BQ', 'Bonaire, Saint Eustatius and Saba'),
- ('BA', 'Bosnia and Herzegovina'),
- ('BW', 'Botswana'),
- ('BV', 'Bouvet Island'),
- ('BR', 'Brazil'),
- ('IO', 'British Indian Ocean Territory'),
- ('BN', 'Brunei Darussalam'),
- ('BG', 'Bulgaria'),
- ('BF', 'Burkina Faso'),
- ('BI', 'Burundi'),
- ('KH', 'Cambodia'),
- ('CM', 'Cameroon'),
- ('CA', 'Canada'),
- ('CV', 'Cape Verde'),
- ('KY', 'Cayman Islands'),
- ('CF', 'Central African Republic'),
- ('TD', 'Chad'),
- ('CL', 'Chile'),
- ('CN', 'China'),
- ('CX', 'Christmas Island'),
- ('CC', 'Cocos (Keeling) Islands'),
- ('CO', 'Colombia'),
- ('KM', 'Comoros'),
- ('CG', 'Congo'),
- ('CD', 'Congo, The Democratic Republic of the'),
- ('CK', 'Cook Islands'),
- ('CR', 'Costa Rica'),
- ('CI', "Cote D'ivoire"),
- ('HR', 'Croatia'),
- ('CU', 'Cuba'),
- ('CW', 'Curacao'),
- ('CY', 'Cyprus'),
- ('CZ', 'Czech Republic'),
- ('DK', 'Denmark'),
- ('DJ', 'Djibouti'),
- ('DM', 'Dominica'),
- ('DO', 'Dominican Republic'),
- ('EC', 'Ecuador'),
- ('EG', 'Egypt'),
- ('SV', 'El Salvador'),
- ('GQ', 'Equatorial Guinea'),
- ('ER', 'Eritrea'),
- ('EE', 'Estonia'),
- ('ET', 'Ethiopia'),
- ('FK', 'Falkland Islands (Malvinas)'),
- ('FO', 'Faroe Islands'),
- ('FJ', 'Fiji'),
- ('FI', 'Finland'),
- ('FR', 'France'),
- ('GF', 'French Guiana'),
- ('PF', 'French Polynesia'),
- ('TF', 'French Southern Territories'),
- ('GA', 'Gabon'),
- ('GM', 'Gambia'),
- ('GE', 'Georgia'),
- ('DE', 'Germany'),
- ('GH', 'Ghana'),
- ('GI', 'Gibraltar'),
- ('GR', 'Greece'),
- ('GL', 'Greenland'),
- ('GD', 'Grenada'),
- ('GP', 'Guadeloupe'),
- ('GU', 'Guam'),
- ('GT', 'Guatemala'),
- ('GG', 'Guernsey'),
- ('GN', 'Guinea'),
- ('GW', 'Guinea-Bissau'),
- ('GY', 'Guyana'),
- ('HT', 'Haiti'),
- ('HM', 'Heard Island and McDonald Islands'),
- ('VA', 'Holy See (Vatican City State)'),
- ('HN', 'Honduras'),
- ('HK', 'Hong Kong'),
- ('HU', 'Hungary'),
- ('IS', 'Iceland'),
- ('IN', 'India'),
- ('ID', 'Indonesia'),
- ('IR', 'Iran, Islamic Republic of'),
- ('IQ', 'Iraq'),
- ('IE', 'Ireland'),
- ('IM', 'Isle of Man'),
- ('IL', 'Israel'),
- ('IT', 'Italy'),
- ('JM', 'Jamaica'),
- ('JP', 'Japan'),
- ('JE', 'Jersey'),
- ('JO', 'Jordan'),
- ('KZ', 'Kazakhstan'),
- ('KE', 'Kenya'),
- ('KI', 'Kiribati'),
- ('KP', "Korea, Democratic People's Republic of"),
- ('KR', 'Korea, Republic of'),
- ('KW', 'Kuwait'),
- ('KG', 'Kyrgyzstan'),
- ('LA', "Lao People's Democratic Republic"),
- ('LV', 'Latvia'),
- ('LB', 'Lebanon'),
- ('LS', 'Lesotho'),
- ('LR', 'Liberia'),
- ('LY', 'Libyan Arab Jamahiriya'),
- ('LI', 'Liechtenstein'),
- ('LT', 'Lithuania'),
- ('LU', 'Luxembourg'),
- ('MO', 'Macao'),
- ('MK', 'Macedonia, The Former Yugoslav Republic of'),
- ('MG', 'Madagascar'),
- ('MW', 'Malawi'),
- ('MY', 'Malaysia'),
- ('MV', 'Maldives'),
- ('ML', 'Mali'),
- ('MT', 'Malta'),
- ('MH', 'Marshall Islands'),
- ('MQ', 'Martinique'),
- ('MR', 'Mauritania'),
- ('MU', 'Mauritius'),
- ('YT', 'Mayotte'),
- ('MX', 'Mexico'),
- ('FM', 'Micronesia, Federated States of'),
- ('MD', 'Moldova, Republic of'),
- ('MC', 'Monaco'),
- ('MN', 'Mongolia'),
- ('ME', 'Montenegro'),
- ('MS', 'Montserrat'),
- ('MA', 'Morocco'),
- ('MZ', 'Mozambique'),
- ('MM', 'Myanmar'),
- ('NA', 'Namibia'),
- ('NR', 'Nauru'),
- ('NP', 'Nepal'),
- ('NL', 'Netherlands'),
- ('NC', 'New Caledonia'),
- ('NZ', 'New Zealand'),
- ('NI', 'Nicaragua'),
- ('NE', 'Niger'),
- ('NG', 'Nigeria'),
- ('NU', 'Niue'),
- ('NF', 'Norfolk Island'),
- ('MP', 'Northern Mariana Islands'),
- ('NO', 'Norway'),
- ('OM', 'Oman'),
- ('PK', 'Pakistan'),
- ('PW', 'Palau'),
- ('PS', 'Palestinian Territory, Occupied'),
- ('PA', 'Panama'),
- ('PG', 'Papua New Guinea'),
- ('PY', 'Paraguay'),
- ('PE', 'Peru'),
- ('PH', 'Philippines'),
- ('PN', 'Pitcairn'),
- ('PL', 'Poland'),
- ('PT', 'Portugal'),
- ('PR', 'Puerto Rico'),
- ('QA', 'Qatar'),
- ('RE', 'Reunion'),
- ('RO', 'Romania'),
- ('RU', 'Russian Federation'),
- ('RW', 'Rwanda'),
- ('BL', 'Saint Barthelemy'),
- ('SH', 'Saint Helena, Ascension and Tristan Da Cunha'),
- ('KN', 'Saint Kitts and Nevis'),
- ('LC', 'Saint Lucia'),
- ('MF', 'Saint Martin (French Part)'),
- ('PM', 'Saint Pierre and Miquelon'),
- ('VC', 'Saint Vincent and the Grenadines'),
- ('WS', 'Samoa'),
- ('SM', 'San Marino'),
- ('ST', 'Sao Tome and Principe'),
- ('SA', 'Saudi Arabia'),
- ('SN', 'Senegal'),
- ('RS', 'Serbia'),
- ('SC', 'Seychelles'),
- ('SL', 'Sierra Leone'),
- ('SG', 'Singapore'),
- ('SX', 'Sint Maarten (Dutch Part)'),
- ('SK', 'Slovakia'),
- ('SI', 'Slovenia'),
- ('SB', 'Solomon Islands'),
- ('SO', 'Somalia'),
- ('ZA', 'South Africa'),
- ('GS', 'South Georgia and the South Sandwich Islands'),
- ('ES', 'Spain'),
- ('LK', 'Sri Lanka'),
- ('SD', 'Sudan'),
- ('SR', 'Suriname'),
- ('SJ', 'Svalbard and Jan Mayen'),
- ('SZ', 'Swaziland'),
- ('SE', 'Sweden'),
- ('CH', 'Switzerland'),
- ('SY', 'Syrian Arab Republic'),
- ('TW', 'Taiwan, Province of China'),
- ('TJ', 'Tajikistan'),
- ('TZ', 'Tanzania, United Republic of'),
- ('TH', 'Thailand'),
- ('TL', 'Timor-Leste'),
- ('TG', 'Togo'),
- ('TK', 'Tokelau'),
- ('TO', 'Tonga'),
- ('TT', 'Trinidad and Tobago'),
- ('TN', 'Tunisia'),
- ('TR', 'Turkey'),
- ('TM', 'Turkmenistan'),
- ('TC', 'Turks and Caicos Islands'),
- ('TV', 'Tuvalu'),
- ('UG', 'Uganda'),
- ('UA', 'Ukraine'),
- ('AE', 'United Arab Emirates'),
- ('GB', 'United Kingdom'),
- ('US', 'United States'),
- ('UM', 'United States Minor Outlying Islands'),
- ('UY', 'Uruguay'),
- ('UZ', 'Uzbekistan'),
- ('VU', 'Vanuatu'),
- ('VE', 'Venezuela, Bolivarian Republic of'),
- ('VN', 'Viet Nam'),
- ('VG', 'Virgin Islands, British'),
- ('VI', 'Virgin Islands, U.S.'),
- ('WF', 'Wallis and Futuna'),
- ('EH', 'Western Sahara'),
- ('YE', 'Yemen'),
- ('ZM', 'Zambia'),
- ('ZW', 'Zimbabwe'))
+        ('AF', 'Afghanistan'),
+        ('AX', 'Aland Islands'),
+        ('AL', 'Albania'),
+        ('DZ', 'Algeria'),
+        ('AS', 'American Samoa'),
+        ('AD', 'Andorra'),
+        ('AO', 'Angola'),
+        ('AI', 'Anguilla'),
+        ('AQ', 'Antarctica'),
+        ('AG', 'Antigua and Barbuda'),
+        ('AR', 'Argentina'),
+        ('AM', 'Armenia'),
+        ('AW', 'Aruba'),
+        ('AU', 'Australia'),
+        ('AT', 'Austria'),
+        ('AZ', 'Azerbaijan'),
+        ('BS', 'Bahamas'),
+        ('BH', 'Bahrain'),
+        ('BD', 'Bangladesh'),
+        ('BB', 'Barbados'),
+        ('BY', 'Belarus'),
+        ('BE', 'Belgium'),
+        ('BZ', 'Belize'),
+        ('BJ', 'Benin'),
+        ('BM', 'Bermuda'),
+        ('BT', 'Bhutan'),
+        ('BO', 'Bolivia, Plurinational State of'),
+        ('BQ', 'Bonaire, Saint Eustatius and Saba'),
+        ('BA', 'Bosnia and Herzegovina'),
+        ('BW', 'Botswana'),
+        ('BV', 'Bouvet Island'),
+        ('BR', 'Brazil'),
+        ('IO', 'British Indian Ocean Territory'),
+        ('BN', 'Brunei Darussalam'),
+        ('BG', 'Bulgaria'),
+        ('BF', 'Burkina Faso'),
+        ('BI', 'Burundi'),
+        ('KH', 'Cambodia'),
+        ('CM', 'Cameroon'),
+        ('CA', 'Canada'),
+        ('CV', 'Cape Verde'),
+        ('KY', 'Cayman Islands'),
+        ('CF', 'Central African Republic'),
+        ('TD', 'Chad'),
+        ('CL', 'Chile'),
+        ('CN', 'China'),
+        ('CX', 'Christmas Island'),
+        ('CC', 'Cocos (Keeling) Islands'),
+        ('CO', 'Colombia'),
+        ('KM', 'Comoros'),
+        ('CG', 'Congo'),
+        ('CD', 'Congo, The Democratic Republic of the'),
+        ('CK', 'Cook Islands'),
+        ('CR', 'Costa Rica'),
+        ('CI', "Cote D'ivoire"),
+        ('HR', 'Croatia'),
+        ('CU', 'Cuba'),
+        ('CW', 'Curacao'),
+        ('CY', 'Cyprus'),
+        ('CZ', 'Czech Republic'),
+        ('DK', 'Denmark'),
+        ('DJ', 'Djibouti'),
+        ('DM', 'Dominica'),
+        ('DO', 'Dominican Republic'),
+        ('EC', 'Ecuador'),
+        ('EG', 'Egypt'),
+        ('SV', 'El Salvador'),
+        ('GQ', 'Equatorial Guinea'),
+        ('ER', 'Eritrea'),
+        ('EE', 'Estonia'),
+        ('ET', 'Ethiopia'),
+        ('FK', 'Falkland Islands (Malvinas)'),
+        ('FO', 'Faroe Islands'),
+        ('FJ', 'Fiji'),
+        ('FI', 'Finland'),
+        ('FR', 'France'),
+        ('GF', 'French Guiana'),
+        ('PF', 'French Polynesia'),
+        ('TF', 'French Southern Territories'),
+        ('GA', 'Gabon'),
+        ('GM', 'Gambia'),
+        ('GE', 'Georgia'),
+        ('DE', 'Germany'),
+        ('GH', 'Ghana'),
+        ('GI', 'Gibraltar'),
+        ('GR', 'Greece'),
+        ('GL', 'Greenland'),
+        ('GD', 'Grenada'),
+        ('GP', 'Guadeloupe'),
+        ('GU', 'Guam'),
+        ('GT', 'Guatemala'),
+        ('GG', 'Guernsey'),
+        ('GN', 'Guinea'),
+        ('GW', 'Guinea-Bissau'),
+        ('GY', 'Guyana'),
+        ('HT', 'Haiti'),
+        ('HM', 'Heard Island and McDonald Islands'),
+        ('VA', 'Holy See (Vatican City State)'),
+        ('HN', 'Honduras'),
+        ('HK', 'Hong Kong'),
+        ('HU', 'Hungary'),
+        ('IS', 'Iceland'),
+        ('IN', 'India'),
+        ('ID', 'Indonesia'),
+        ('IR', 'Iran, Islamic Republic of'),
+        ('IQ', 'Iraq'),
+        ('IE', 'Ireland'),
+        ('IM', 'Isle of Man'),
+        ('IL', 'Israel'),
+        ('IT', 'Italy'),
+        ('JM', 'Jamaica'),
+        ('JP', 'Japan'),
+        ('JE', 'Jersey'),
+        ('JO', 'Jordan'),
+        ('KZ', 'Kazakhstan'),
+        ('KE', 'Kenya'),
+        ('KI', 'Kiribati'),
+        ('KP', "Korea, Democratic People's Republic of"),
+        ('KR', 'Korea, Republic of'),
+        ('KW', 'Kuwait'),
+        ('KG', 'Kyrgyzstan'),
+        ('LA', "Lao People's Democratic Republic"),
+        ('LV', 'Latvia'),
+        ('LB', 'Lebanon'),
+        ('LS', 'Lesotho'),
+        ('LR', 'Liberia'),
+        ('LY', 'Libyan Arab Jamahiriya'),
+        ('LI', 'Liechtenstein'),
+        ('LT', 'Lithuania'),
+        ('LU', 'Luxembourg'),
+        ('MO', 'Macao'),
+        ('MK', 'Macedonia, The Former Yugoslav Republic of'),
+        ('MG', 'Madagascar'),
+        ('MW', 'Malawi'),
+        ('MY', 'Malaysia'),
+        ('MV', 'Maldives'),
+        ('ML', 'Mali'),
+        ('MT', 'Malta'),
+        ('MH', 'Marshall Islands'),
+        ('MQ', 'Martinique'),
+        ('MR', 'Mauritania'),
+        ('MU', 'Mauritius'),
+        ('YT', 'Mayotte'),
+        ('MX', 'Mexico'),
+        ('FM', 'Micronesia, Federated States of'),
+        ('MD', 'Moldova, Republic of'),
+        ('MC', 'Monaco'),
+        ('MN', 'Mongolia'),
+        ('ME', 'Montenegro'),
+        ('MS', 'Montserrat'),
+        ('MA', 'Morocco'),
+        ('MZ', 'Mozambique'),
+        ('MM', 'Myanmar'),
+        ('NA', 'Namibia'),
+        ('NR', 'Nauru'),
+        ('NP', 'Nepal'),
+        ('NL', 'Netherlands'),
+        ('NC', 'New Caledonia'),
+        ('NZ', 'New Zealand'),
+        ('NI', 'Nicaragua'),
+        ('NE', 'Niger'),
+        ('NG', 'Nigeria'),
+        ('NU', 'Niue'),
+        ('NF', 'Norfolk Island'),
+        ('MP', 'Northern Mariana Islands'),
+        ('NO', 'Norway'),
+        ('OM', 'Oman'),
+        ('PK', 'Pakistan'),
+        ('PW', 'Palau'),
+        ('PS', 'Palestinian Territory, Occupied'),
+        ('PA', 'Panama'),
+        ('PG', 'Papua New Guinea'),
+        ('PY', 'Paraguay'),
+        ('PE', 'Peru'),
+        ('PH', 'Philippines'),
+        ('PN', 'Pitcairn'),
+        ('PL', 'Poland'),
+        ('PT', 'Portugal'),
+        ('PR', 'Puerto Rico'),
+        ('QA', 'Qatar'),
+        ('RE', 'Reunion'),
+        ('RO', 'Romania'),
+        ('RU', 'Russian Federation'),
+        ('RW', 'Rwanda'),
+        ('BL', 'Saint Barthelemy'),
+        ('SH', 'Saint Helena, Ascension and Tristan Da Cunha'),
+        ('KN', 'Saint Kitts and Nevis'),
+        ('LC', 'Saint Lucia'),
+        ('MF', 'Saint Martin (French Part)'),
+        ('PM', 'Saint Pierre and Miquelon'),
+        ('VC', 'Saint Vincent and the Grenadines'),
+        ('WS', 'Samoa'),
+        ('SM', 'San Marino'),
+        ('ST', 'Sao Tome and Principe'),
+        ('SA', 'Saudi Arabia'),
+        ('SN', 'Senegal'),
+        ('RS', 'Serbia'),
+        ('SC', 'Seychelles'),
+        ('SL', 'Sierra Leone'),
+        ('SG', 'Singapore'),
+        ('SX', 'Sint Maarten (Dutch Part)'),
+        ('SK', 'Slovakia'),
+        ('SI', 'Slovenia'),
+        ('SB', 'Solomon Islands'),
+        ('SO', 'Somalia'),
+        ('ZA', 'South Africa'),
+        ('GS', 'South Georgia and the South Sandwich Islands'),
+        ('ES', 'Spain'),
+        ('LK', 'Sri Lanka'),
+        ('SD', 'Sudan'),
+        ('SR', 'Suriname'),
+        ('SJ', 'Svalbard and Jan Mayen'),
+        ('SZ', 'Swaziland'),
+        ('SE', 'Sweden'),
+        ('CH', 'Switzerland'),
+        ('SY', 'Syrian Arab Republic'),
+        ('TW', 'Taiwan, Province of China'),
+        ('TJ', 'Tajikistan'),
+        ('TZ', 'Tanzania, United Republic of'),
+        ('TH', 'Thailand'),
+        ('TL', 'Timor-Leste'),
+        ('TG', 'Togo'),
+        ('TK', 'Tokelau'),
+        ('TO', 'Tonga'),
+        ('TT', 'Trinidad and Tobago'),
+        ('TN', 'Tunisia'),
+        ('TR', 'Turkey'),
+        ('TM', 'Turkmenistan'),
+        ('TC', 'Turks and Caicos Islands'),
+        ('TV', 'Tuvalu'),
+        ('UG', 'Uganda'),
+        ('UA', 'Ukraine'),
+        ('AE', 'United Arab Emirates'),
+        ('GB', 'United Kingdom'),
+        ('US', 'United States'),
+        ('UM', 'United States Minor Outlying Islands'),
+        ('UY', 'Uruguay'),
+        ('UZ', 'Uzbekistan'),
+        ('VU', 'Vanuatu'),
+        ('VE', 'Venezuela, Bolivarian Republic of'),
+        ('VN', 'Viet Nam'),
+        ('VG', 'Virgin Islands, British'),
+        ('VI', 'Virgin Islands, U.S.'),
+        ('WF', 'Wallis and Futuna'),
+        ('EH', 'Western Sahara'),
+        ('YE', 'Yemen'),
+        ('ZM', 'Zambia'),
+        ('ZW', 'Zimbabwe'),
+        )
 
 
 employer_types = (
-    ('F','Federal Government'),
-    ('S','State and Local Governmental Employer'),
-    ('T','Tax Exempt Employer'),
-    ('Y','State and Local Tax Exempt Employer'),
-    ('N','None Apply'),
-    )
+        ('F','Federal Government'),
+        ('S','State and Local Governmental Employer'),
+        ('T','Tax Exempt Employer'),
+        ('Y','State and Local Tax Exempt Employer'),
+        ('N','None Apply'),
+        )
 
 employment_codes = (
-    ('A', 'Agriculture'),
-    ('H', 'Household'),
-    ('M', 'Military'),
-    ('Q', 'Medicare Qualified Government Employee'),
-    ('X', 'Railroad'),
-    ('F', 'Regular'),
-    ('R', 'Regular (all others)'),
-    )
+        ('A', 'Agriculture'),
+        ('H', 'Household'),
+        ('M', 'Military'),
+        ('Q', 'Medicare Qualified Government Employee'),
+        ('X', 'Railroad'),
+        ('F', 'Regular'),
+        ('R', 'Regular (all others)'),
+        )
 
 tax_jurisdiction_codes = (
-    ('V', 'Virgin Islands'),
-    ('G', 'Guam'),
-    ('S', 'American Samoa'),
-    ('N', 'Northern Mariana Islands'),
-    ('P', 'Puerto Rico'),
-    )
+        ('V', 'Virgin Islands'),
+        ('G', 'Guam'),
+        ('S', 'American Samoa'),
+        ('N', 'Northern Mariana Islands'),
+        ('P', 'Puerto Rico'),
+        )
 
 tax_type_codes = (
-    ('C', 'City Income Tax'),
-    ('D', 'Country Income Tax'),
-    ('E', 'School District Income Tax'),
-    ('F', 'Other Income Tax'),
-    )
+        ('C', 'City Income Tax'),
+        ('D', 'Country Income Tax'),
+        ('E', 'School District Income Tax'),
+        ('F', 'Other Income Tax'),
+        )
 
diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index 94147a8..ef60b92 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -2,6 +2,7 @@ import decimal, datetime
 import inspect
 from . import enums
 
+
 class ValidationError(Exception):
     def __init__(self, msg, field=None):
         self.msg = msg
@@ -90,14 +91,10 @@ class Field(object):
         wrapper = textwrap.TextWrapper(replace_whitespace=False, drop_whitespace=False)
         wrapper.width = 100
         value = wrapper.wrap(value)
-        #value = textwrap.wrap(value, 100)
-        #print value
         value = list([(" " * 9) + ('"' + x + '"') for x in value])
-        #value[0] = '"' + value[0] + '"'
         value.append(" " * 10 + ('_' * 10) * (wrapper.width / 10))
         value.append(" " * 10 + ('0123456789') * (wrapper.width / 10))
         value.append(" " * 10 + ''.join(([str(x) + (' ' * 9) for x in range(wrapper.width / 10 )])))
-        #value.append((" " * 59) + map(lambda x:("%x" % x), range(16))
 
         start = counter['c']
         counter['c'] += len(self._orig_value or self.value)
@@ -146,7 +143,6 @@ class StateField(TextField):
         self.use_numeric = use_numeric
 
     def get_data(self):
-        # value = str(self.value or 'XX').encode('ascii') or b''
         value = str(self.value or 'XX')
         if value.strip() and self.use_numeric:
             postcode = enums.state_postal_numeric[value.upper()]
@@ -199,6 +195,7 @@ class StaticField(TextField):
     def parse(self, s):
         pass
 
+
 class BlankField(TextField):
     def __init__(self, name=None, max_length=0, required=False):
         super(TextField, self).__init__(name=name, max_length=max_length, required=required, uppercase=False)
@@ -209,10 +206,12 @@ class BlankField(TextField):
     def parse(self, s):
         pass
 
+
 class ZeroField(BlankField):
     def get_data(self):
         return b'0' * self.max_length
 
+
 class CRLFField(TextField):
     def __init__(self, name=None, required=False):
         super(TextField, self).__init__(name=name, max_length=2, required=required, uppercase=False)
@@ -231,6 +230,7 @@ class CRLFField(TextField):
     def parse(self, s):
         self.value = s
 
+
 class BooleanField(Field):
     def __init__(self, name=None, required=True, value=None):
         super(BooleanField, self).__init__(name=name, required=required, max_length=1)
@@ -299,7 +299,6 @@ class DateField(TextField):
 class MonthYearField(TextField):
     def __init__(self, name=None, required=True, value=None):
         super(TextField, self).__init__(name=name, required=required, max_length=6)
-
         if value:
             self.value = value
 
@@ -326,4 +325,3 @@ class MonthYearField(TextField):
         return self._value
 
     value = property(__getvalue, __setvalue)
-
diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py
index ae5f9e1..b71c26f 100644
--- a/pyaccuwage/model.py
+++ b/pyaccuwage/model.py
@@ -86,10 +86,5 @@ class Model(object):
 
             target._value = f._value
 
-
-            #print (self.__dict__[f.name].name == f.name)
-            #self.__dict__[f.name].name == f.name
-            #self.__dict__[f.name].max_length == f.max_length
-
         return self
 
diff --git a/pyaccuwage/parser.py b/pyaccuwage/parser.py
index 6b808fc..250d122 100644
--- a/pyaccuwage/parser.py
+++ b/pyaccuwage/parser.py
@@ -1,5 +1,3 @@
-#!/usr/bin/python
-# coding=UTF-8
 """
 Parser utility to read data from Publication 1220 and
 convert it into python classes.
@@ -146,8 +144,7 @@ class RecordBuilder(object):
                     (re.compile(r'zero\-filled', re.IGNORECASE), +1),
                     (re.compile(r'leading zeroes', re.IGNORECASE), +1),
 
-                    (re.compile(r'left-\justif', re.IGNORECASE), -1),
-
+                    (re.compile(r'left\-justif', re.IGNORECASE), -1),
                 ],
             },
         }),

From 250ca8d31f61108373dd2e486c2020759116a602 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Mon, 28 Jan 2019 13:29:14 -0600
Subject: [PATCH 02/20] fix flubbed blank field specifier on StateTotalRecordIA

---
 pyaccuwage/record.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyaccuwage/record.py b/pyaccuwage/record.py
index 472dde9..9e46217 100644
--- a/pyaccuwage/record.py
+++ b/pyaccuwage/record.py
@@ -364,7 +364,7 @@ class StateTotalRecordIA(EFW2Record):
     state_income_tax_wh =           MoneyField(max_length=15)
     employer_ben =                  TextField(max_length=8)
     iowa_confirmation_number =      ZeroField(max_length=10)
-    blank1 =                        BlankField(455)
+    blank1 =                        BlankField(max_length=455)
 
 
 class FinalRecord(EFW2Record):

From 6af5067fcaa161489a0f9cfbd7b876c511bdacc3 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Wed, 30 Jan 2019 14:25:24 -0600
Subject: [PATCH 03/20] add option for record delimiter

---
 pyaccuwage/__init__.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index a6775be..abb380f 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -38,7 +38,7 @@ def test_dump():
         record.EmployeeWageRecord(),
         ]
     out = io.BytesIO()
-    dump(records, out)
+    dump(records, out, None)
     return out
 
 
@@ -79,14 +79,16 @@ def loads(s):
     return load(fp)
 
 
-def dump(records, fp):
+def dump(records, fp, delim=None):
     for r in records:
         fp.write(r.output())
+        if delim:
+            fp.write(delim)
 
-def dumps(records):
+def dumps(records, delim=None):
     import io
     fp = io.BytesIO()
-    dump(records, fp)
+    dump(records, fp, delim=delim)
     fp.seek(0)
     return fp.read()
 

From 8f86f761674a060dee9d8ca3929acab728aef62d Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 12 Jun 2020 13:07:41 -0500
Subject: [PATCH 04/20] add format interchange functions, add tests, fix stuff

---
 pyaccuwage/__init__.py | 169 ++++++++++++++++++++++-------------------
 pyaccuwage/fields.py   |  49 +++++++++---
 pyaccuwage/model.py    |  38 +++++++--
 pyaccuwage/modeldef.py |   4 +-
 pyaccuwage/parser.py   |   2 +-
 tests/test_fields.py   |  55 ++------------
 tests/test_records.py  | 127 +++++++++++++++++++++++++++++++
 7 files changed, 298 insertions(+), 146 deletions(-)
 create mode 100644 tests/test_records.py

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index abb380f..513b8c6 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -1,6 +1,4 @@
-from .record import *
-from .reader import RecordReader
-import collections
+from collections import Callable
 
 VERSION = (0, 2012, 0)
 
@@ -14,77 +12,55 @@ RECORD_TYPES = [
     'OptionalTotalRecord',
     'StateTotalRecord',
     'FinalRecord'
-    ]
-
-def test():
-    from . import record, model
-    from .fields import ValidationError
-    for rname in RECORD_TYPES:
-        inst = record.__dict__[rname]()
-        try:
-            output_length = len(inst.output())
-        except ValidationError as e:
-            print(e.msg, type(inst), inst.record_identifier)
-            continue
-
-        print(type(inst), inst.record_identifier, output_length)
+]
 
 
-def test_dump():
-    import record, io
-    records = [
-        record.SubmitterRecord(),
-        record.EmployerRecord(),
-        record.EmployeeWageRecord(),
-        ]
-    out = io.BytesIO()
-    dump(records, out, None)
-    return out
-
-
-def test_record_order():
-    from . import record
-    records = [
-        record.SubmitterRecord(),
-        record.EmployerRecord(),
-        record.EmployeeWageRecord(),
-        record.TotalRecord(),
-        record.FinalRecord(),
-    ]
-    validate_record_order(records)
-
-
-def test_load(fp):
-    return load(fp)
-
-def load(fp):
-    # BUILD LIST OF RECORD TYPES
+def get_record_types():
     from . import record
     types = {}
     for r in RECORD_TYPES:
         klass = record.__dict__[r]
         types[klass.record_identifier] = klass
+    return types
+
+
+def load(fp, record_types):
+    distinct_identifier_lengths = set([len(record_types[k].record_identifier) for k in record_types])
+    assert(len(distinct_identifier_lengths) == 1)
+    ident_length = list(distinct_identifier_lengths)[0]
+
+    # Add aliases for the record types based on their record_identifier since that's all
+    # we have to work with with the e1099 data.
+    record_types_by_ident = {}
+    for k in record_types:
+        record_type = record_types[k]
+        record_identifier = record_type.record_identifier
+        record_types_by_ident[record_identifier] = record_type
 
     # PARSE DATA INTO RECORDS AND YIELD THEM
-    while fp.tell() < fp.len:
-        record_ident = fp.read(2)
-        if record_ident in types:
-            record = types[record_ident]()
+    while True:
+        record_ident = fp.read(ident_length)
+        if not record_ident:
+            break
+        if record_ident in record_types_by_ident:
+            record = record_types_by_ident[record_ident]()
             record.read(fp)
             yield record
 
-def loads(s):
+
+def loads(s, record_types=get_record_types()):
     import io
     fp = io.BytesIO(s)
-    return load(fp)
+    return load(fp, record_types)
 
 
-def dump(records, fp, delim=None):
+def dump(fp, records, delim=None):
     for r in records:
         fp.write(r.output())
         if delim:
             fp.write(delim)
 
+
 def dumps(records, delim=None):
     import io
     fp = io.BytesIO()
@@ -92,15 +68,15 @@ def dumps(records, delim=None):
     fp.seek(0)
     return fp.read()
 
+
 def json_dumps(records):
     import json
-    from . import model
     import decimal
 
     class JSONEncoder(json.JSONEncoder):
 
         def default(self, o):
-            if hasattr(o, 'toJSON') and isinstance(getattr(o, 'toJSON'), collections.Callable):
+            if hasattr(o, 'toJSON') and isinstance(getattr(o, 'toJSON'), Callable):
                 return o.toJSON()
 
             if type(o) is bytes:
@@ -111,37 +87,76 @@ def json_dumps(records):
 
             return super(JSONEncoder, self).default(o)
 
-    return json.dumps(records, cls=JSONEncoder, indent=2)
+    return json.dumps(list(records), cls=JSONEncoder, indent=2)
 
 
-def json_loads(s, record_classes):
+def json_dump(fp, records):
+    fp.write(json_dumps(records))
+
+
+def json_loads(s, record_types):
     import json
     from . import fields
     import decimal
-    import re
 
-    if not isinstance(record_classes, dict):
-        record_classes = dict([ (x.__class__.__name__, x) for x in record_classes])
+    if not isinstance(record_types, dict):
+        record_types = dict([ (x.__name__, x) for x in record_types])
 
     def object_hook(o):
         if '__class__' in o:
             klass = o['__class__']
-
-            if klass in record_classes:
-                return record_classes[klass]().fromJSON(o)
-
+            if klass in record_types:
+                record = record_types[klass]()
+                record.fromJSON(o)
+                return record
             elif hasattr(fields, klass):
                 return getattr(fields, klass)().fromJSON(o)
-
         return o
 
-        #print "OBJECTHOOK", str(o)
-        #return {'object_hook':str(o)}
-        #def default(self, o):
-        #    return super(JSONDecoder, self).default(o)
-
     return json.loads(s, parse_float=decimal.Decimal, object_hook=object_hook)
 
+def json_load(fp, record_types):
+    return json_loads(fp.read(), record_types)
+
+def text_dump(fp, records):
+    for r in records:
+        fp.write(r.output(format='text').encode('ascii'))
+
+
+def text_dumps(records):
+    import io
+    fp = io.BytesIO()
+    text_dump(fp, records)
+    fp.seek(0)
+    return fp.read()
+
+
+def text_load(fp, record_classes):
+    records = []
+    current_record = None
+
+    if not isinstance(record_classes, dict):
+        record_classes = dict([ (x.__name__, x) for x in record_classes])
+
+    while True: #fp.readable():
+        line = fp.readline().decode('ascii')
+        if not line:
+            break
+        if line.startswith('---'):
+            record_name = line.strip('---').strip()
+            current_record = record_classes[record_name]()
+            records.append(current_record)
+        elif ':' in line:
+            field, value = [x.strip() for x in line.split(':')]
+            current_record.set_field_value(field, value)
+    return records
+
+def text_loads(s, record_classes):
+    import io
+    fp = io.BytesIO(s)
+    return text_load(fp, record_classes)
+
+
 # THIS WAS IN CONTROLLER, BUT UNLESS WE
 # REALLY NEED A CONTROLLER CLASS, IT'S SIMPLER
 # TO JUST KEEP IT IN HERE.
@@ -153,7 +168,7 @@ def validate_required_records(records):
         klass = record.__dict__[r]
         if klass.required:
             req_types.append(klass.__name__)
-    
+
     while req_types:
         req = req_types[0]
         if req not in types:
@@ -162,10 +177,11 @@ def validate_required_records(records):
         else:
             req_types.remove(req)
 
+
 def validate_record_order(records):
     from . import record
     from .fields import ValidationError
-    
+
     # 1st record must be SubmitterRecord
     if not isinstance(records[0], record.SubmitterRecord):
         raise ValidationError("First record must be SubmitterRecord")
@@ -211,15 +227,10 @@ def test_unique_fields():
     r1 = EmployeeWageRecord()
 
     r1.employee_first_name.value = "John Johnson"
-    
+
     r2 = EmployeeWageRecord()
     print('r1:', r1.employee_first_name.value, r1.employee_first_name, r1.employee_first_name.creation_counter)
     print('r2:', r2.employee_first_name.value, r2.employee_first_name, r2.employee_first_name.creation_counter)
-    
+
     if r1.employee_first_name.value == r2.employee_first_name.value:
         raise ValidationError("Horrible problem involving shared values across records")
-
-#def state_postal_code(state_abbr):
-#    import enums
-#    return enums.state_postal_numeric[ state_abbr.upper() ]
-
diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index ef60b92..0d6fcd3 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -1,7 +1,10 @@
 import decimal, datetime
 import inspect
+from six import string_types
 from . import enums
 
+def is_blank_space(val):
+    return len(val.strip()) == 0
 
 class ValidationError(Exception):
     def __init__(self, msg, field=None):
@@ -17,6 +20,7 @@ class ValidationError(Exception):
 
 class Field(object):
     creation_counter = 0
+    is_read_only = False
 
     def __init__(self, name=None, max_length=0, required=True, uppercase=True, creation_counter=None):
         self.name = name
@@ -29,10 +33,10 @@ class Field(object):
         Field.creation_counter += 1
 
     def validate(self):
-        raise NotImplemented
+        raise NotImplementedError
 
     def get_data(self):
-        raise NotImplemented
+        raise NotImplementedError
 
     def __setvalue(self, value):
         self._value = value
@@ -77,7 +81,7 @@ class Field(object):
                 required=o['required'],
             )
 
-        if isinstance(o['value'], str) and re.match('^\d*\.\d*$', o['value']):
+        if isinstance(o['value'], str) and re.match(r'^\d*\.\d*$', o['value']):
             o['value'] = decimal.Decimal(o['value'])
 
         self.value = o['value']
@@ -164,9 +168,10 @@ class StateField(TextField):
         else:
             self.value = s
 
+
 class EmailField(TextField):
     def __init__(self, name=None, required=True, max_length=None):
-        return super(EmailField, self).__init__(name=name, max_length=max_length,
+        super(EmailField, self).__init__(name=name, max_length=max_length,
                                                 required=required, uppercase=False)
 
 class IntegerField(TextField):
@@ -183,7 +188,10 @@ class IntegerField(TextField):
         return value.zfill(self.max_length)[:self.max_length]
 
     def parse(self, s):
-        self.value = int(s)
+        if not is_blank_space(s):
+            self.value = int(s)
+        else:
+            self.value = 0
 
 
 class StaticField(TextField):
@@ -197,8 +205,10 @@ class StaticField(TextField):
 
 
 class BlankField(TextField):
+    is_read_only = True
+
     def __init__(self, name=None, max_length=0, required=False):
-        super(TextField, self).__init__(name=name, max_length=max_length, required=required, uppercase=False)
+        super(BlankField, self).__init__(name=name, max_length=max_length, required=required, uppercase=False)
 
     def get_data(self):
         return b' ' * self.max_length
@@ -208,13 +218,17 @@ class BlankField(TextField):
 
 
 class ZeroField(BlankField):
+    is_read_only = True
+
     def get_data(self):
         return b'0' * self.max_length
 
 
 class CRLFField(TextField):
+    is_read_only = True
+
     def __init__(self, name=None, required=False):
-        super(TextField, self).__init__(name=name, max_length=2, required=required, uppercase=False)
+        super(CRLFField, self).__init__(name=name, max_length=2, required=required, uppercase=False)
 
     def __setvalue(self, value):
         self._value = value
@@ -262,12 +276,27 @@ class MoneyField(Field):
         return formatted[:self.max_length]
 
     def parse(self, s):
-        self.value = decimal.Decimal(s) * decimal.Decimal('0.01')
+        if not is_blank_space(s):
+            self.value = decimal.Decimal(s) * decimal.Decimal('0.01')
+        else:
+            self.value = decimal.Decimal(0.0)
 
+    def __setvalue(self, value):
+        new_value = value
+        if isinstance(new_value, string_types):
+            new_value = decimal.Decimal(new_value or '0')
+            if '.' not in value:  # must be cents?
+                new_value *= decimal.Decimal('100.')
+        self._value = new_value
+
+    def __getvalue(self):
+        return self._value
+
+    value = property(__getvalue, __setvalue)
 
 class DateField(TextField):
     def __init__(self, name=None, required=True, value=None):
-        super(TextField, self).__init__(name=name, required=required, max_length=8)
+        super(DateField, self).__init__(name=name, required=required, max_length=8)
         if value:
             self.value = value
 
@@ -298,7 +327,7 @@ class DateField(TextField):
 
 class MonthYearField(TextField):
     def __init__(self, name=None, required=True, value=None):
-        super(TextField, self).__init__(name=name, required=required, max_length=6)
+        super(MonthYearField, self).__init__(name=name, required=required, max_length=6)
         if value:
             self.value = value
 
diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py
index b71c26f..becd3ce 100644
--- a/pyaccuwage/model.py
+++ b/pyaccuwage/model.py
@@ -4,11 +4,15 @@ import collections
 
 
 class Model(object):
+    record_length = -1
     record_identifier = '  '
     required = False
     target_size = 512
 
     def __init__(self):
+        if self.record_length == -1:
+            raise ValueError(self.record_length)
+
         for (key, value) in list(self.__class__.__dict__.items()):
             if isinstance(value, Field):
                 # GRAB THE FIELD INSTANCE FROM THE CLASS DEFINITION
@@ -19,15 +23,22 @@ class Model(object):
                 if not src_field.name:
                     setattr(src_field, 'name', key)
                     setattr(src_field, 'parent_name', self.__class__.__name__)
-                self.__dict__[key] = copy.copy(src_field)
+                new_field_instance = copy.copy(src_field)
+                new_field_instance._orig_value = None
+                new_field_instance._value = None
+                self.__dict__[key] = new_field_instance
 
     def __setattr__(self, key, value):
         if hasattr(self, key) and isinstance(getattr(self, key), Field):
-            getattr(self, key).value = value
+            self.set_field_value(key, value)
         else:
             # MAYBE THIS SHOULD RAISE A PROPERTY ERROR?
             self.__dict__[key] = value
 
+    def set_field_value(self, field_name, value):
+        print('setfieldval: ' + field_name + ' ' + value)
+        getattr(self, field_name).value = value
+
     def get_fields(self):
         identifier = TextField("record_identifier", max_length=len(self.record_identifier), creation_counter=-1)
         identifier.value = self.record_identifier
@@ -55,18 +66,28 @@ class Model(object):
             if isinstance(custom_validator, collections.Callable):
                 custom_validator(f)
 
-    def output(self):
+    def output(self, format='binary'):
+        if format == 'text':
+            return self.output_text()
+        return self.output_efile()
+
+    def output_efile(self):
         result = b''.join([field.get_data() for field in self.get_sorted_fields()])
-
-        if hasattr(self, 'record_length') and len(result) != self.record_length:
+        if self.record_length < 0 or len(result) != self.record_length:
             raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.record_length, len(result)))
-
         return result
 
+    def output_text(self):
+        fields = self.get_sorted_fields()[1:]  # skip record identifier
+        fields = [field for field in fields if not field.is_read_only]
+        header = ''.join(['---', self.__class__.__name__, '\n'])
+        return header + '\n'.join([f.name + ': ' + (str(f.value) if f.value else '') for f in fields]) + '\n\n'
+
     def read(self, fp):
         # Skip the first record, since that's an identifier
         for field in self.get_sorted_fields()[1:]:
             field.read(fp)
+            print(field.name, '"' + (str(field.value) or '') + '"', field.max_length, field._orig_value)
 
     def toJSON(self):
         return {
@@ -77,6 +98,9 @@ class Model(object):
     def fromJSON(self, o):
         fields = o['fields']
 
+        identifier, fields = fields[0], fields[1:]
+        assert(identifier.value == self.record_identifier)
+
         for f in fields:
             target = self.__dict__[f.name]
 
@@ -84,7 +108,7 @@ class Model(object):
                 or target.max_length != f.max_length):
                 print("Warning: value mismatch on import")
 
-            target._value = f._value
+            target.value = f.value
 
         return self
 
diff --git a/pyaccuwage/modeldef.py b/pyaccuwage/modeldef.py
index c6c9110..6d4ce35 100644
--- a/pyaccuwage/modeldef.py
+++ b/pyaccuwage/modeldef.py
@@ -2,7 +2,7 @@ import re
 
 
 class ClassEntryCommentSequence(object):
-    re_rangecomment = re.compile('#\s+(\d+)\-?(\d*)$')
+    re_rangecomment = re.compile(r'#\s+(\d+)\-?(\d*)$')
 
     def __init__(self, classname, line):
         self.classname = classname,
@@ -72,7 +72,7 @@ class ModelDefParser(object):
 
             classmatch = self.re_classdef.match(line)
             if classmatch:
-                classname, subclass = classmatch.groups()
+                classname, _subclass = classmatch.groups()
                 self.beginclass(classname, self.line)
                 continue
 
diff --git a/pyaccuwage/parser.py b/pyaccuwage/parser.py
index 250d122..c0fe399 100644
--- a/pyaccuwage/parser.py
+++ b/pyaccuwage/parser.py
@@ -109,7 +109,7 @@ class RangeToken(BaseToken):
 
 
 class NumericToken(BaseToken):
-    regexp = re.compile('^(\d+)$')
+    regexp = re.compile(r'^(\d+)$')
 
     @property
     def value(self):
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 9293acd..3d8fd3e 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -1,17 +1,15 @@
 import unittest
-import decimal
 from pyaccuwage.fields import TextField
-from pyaccuwage.fields import IntegerField
-from pyaccuwage.fields import StateField
-from pyaccuwage.fields import BlankField
-from pyaccuwage.fields import ZeroField
-from pyaccuwage.fields import MoneyField
+# from pyaccuwage.fields import IntegerField
+# from pyaccuwage.fields import StateField
+# from pyaccuwage.fields import BlankField
+# from pyaccuwage.fields import ZeroField
+# from pyaccuwage.fields import MoneyField
 from pyaccuwage.fields import ValidationError
 from pyaccuwage.model import Model
 
 
 class TestTextField(unittest.TestCase):
-
     def testStringShortOptional(self):
         field = TextField(max_length=6, required=False)
         field.validate()  # optional
@@ -30,43 +28,6 @@ class TestTextField(unittest.TestCase):
     def testStringLongOptional(self):
         field = TextField(max_length=6, required=False)
         field.value = 'Hello, World!'  # too long
-        self.assertEqual(len(field.get_data()), field.max_length)
-
-
-class TestModelOutput(unittest.TestCase):
-    class TestModel(Model):
-        record_length = 128
-        record_identifier = 'TEST'  # 4 bytes
-        field1 = TextField(max_length=16)
-        field2 = IntegerField(max_length=16)
-        blank1 = BlankField(max_length=16)
-        zero1 = ZeroField(max_length=16)
-        money = MoneyField(max_length=32)
-        state_txt = StateField()
-        state_num = StateField(use_numeric=True)
-        blank2 = BlankField(max_length=24)
-
-    def setUp(self):
-        self.model = TestModelOutput.TestModel()
-
-    def testModelOutput(self):
-        model = self.model
-        model.field1.value = 'Hello, sir!'
-        model.field2.value = 12345
-        model.money.value = decimal.Decimal('1234.56')
-        model.state_txt.value = 'IA'
-        model.state_num.value = 'IA'
-
-        expected = b''.join([
-            b'TEST',
-            b'HELLO, SIR!'.ljust(16),
-            b'12345'.zfill(16),
-            b' ' * 16,
-            b'0' * 16,
-            b'123456'.zfill(32),
-            b'IA',
-            b'19',
-            b' ' * 24,
-            ])
-
-        self.assertEqual(model.output(), expected)
+        data = field.get_data()
+        self.assertEqual(len(data), field.max_length)
+        self.assertEqual(data, b'HELLO,')
diff --git a/tests/test_records.py b/tests/test_records.py
new file mode 100644
index 0000000..a6485ac
--- /dev/null
+++ b/tests/test_records.py
@@ -0,0 +1,127 @@
+import unittest
+import decimal
+import pyaccuwage
+from pyaccuwage.fields import BlankField
+from pyaccuwage.fields import IntegerField
+from pyaccuwage.fields import MoneyField
+from pyaccuwage.fields import StateField
+from pyaccuwage.fields import TextField
+from pyaccuwage.fields import ZeroField
+from pyaccuwage.model import Model
+
+class TestModelOutput(unittest.TestCase):
+    class TestModel(Model):
+        record_length = 128
+        record_identifier = 'TEST'  # 4 bytes
+        field1 = TextField(max_length=16)
+        field2 = IntegerField(max_length=16)
+        blank1 = BlankField(max_length=16)
+        zero1 = ZeroField(max_length=16)
+        money = MoneyField(max_length=32)
+        state_txt = StateField()
+        state_num = StateField(use_numeric=True)
+        blank2 = BlankField(max_length=24)
+
+    def setUp(self):
+        self.model = TestModelOutput.TestModel()
+
+    def testModelBinaryOutput(self):
+        model = self.model
+        model.field1.value = 'Hello, sir!'
+        model.field2.value = 12345
+        model.money.value = decimal.Decimal('3133.77')
+        model.state_txt.value = 'IA'
+        model.state_num.value = 'IA'
+
+        expected = b''.join([
+            b'TEST',
+            b'HELLO, SIR!'.ljust(16),
+            b'12345'.zfill(16),
+            b' ' * 16,
+            b'0' * 16,
+            b'313377'.zfill(32),
+            b'IA',
+            b'19',
+            b' ' * 24,
+            ])
+
+        output = model.output()
+        self.assertEqual(len(output), TestModelOutput.TestModel.record_length)
+        self.assertEqual(output, expected)
+
+    def testModelTextOutput(self):
+        model = self.model
+        model.field1.value = 'Hello, sir!'
+        model.field2.value = 12345
+        model.money.value = decimal.Decimal('3133.77')
+        model.state_txt.value = 'IA'
+        model.state_num.value = 'IA'
+        output = model.output(format='text')
+
+        self.assertEqual(output, '''---TestModel
+field1: Hello, sir!
+field2: 12345
+money: 3133.77
+state_txt: IA
+state_num: IA
+
+''')
+
+
+class TestFileFormats(unittest.TestCase):
+    class TestModelA(pyaccuwage.model.Model):
+        record_length = 128
+        record_identifier = 'A'  # 1 byte
+        field1 = TextField(max_length=16)
+        field2 = IntegerField(max_length=16)
+        blank1 = BlankField(max_length=16)
+        zero1 = ZeroField(max_length=16)
+        money = MoneyField(max_length=32)
+        state_txt = StateField()
+        state_num = StateField(use_numeric=True)
+        blank2 = BlankField(max_length=27)
+
+    class TestModelB(pyaccuwage.model.Model):
+        record_length = 128
+        record_identifier = 'B'  # 1 byte
+        zero1 = ZeroField(max_length=32)
+        text1 = TextField(max_length=71)
+        blank2 = BlankField(max_length=24)
+
+    record_types = [TestModelA, TestModelB]
+
+    def createExampleRecords(self):
+        model_a = TestFileFormats.TestModelA()
+        model_a.field1.value = 'I am model a'
+        model_a.field2.value = 5522
+        model_a.money.value = decimal.Decimal('23.00')
+        model_a.state_txt.value = 'IA'
+        model_a.state_num.value = 'IA'
+
+        model_b = TestFileFormats.TestModelB()
+        model_b.text1.value = 'hey I am model b and I have a big text field'
+
+        return [
+            model_a,
+            model_b,
+        ]
+
+    def testJSONSerialization(self):
+        records = self.createExampleRecords()
+        record_types = self.record_types
+        json_data = pyaccuwage.json_dumps(records)
+        records_loaded = pyaccuwage.json_loads(json_data, record_types)
+
+        original_bytes = pyaccuwage.dumps(records)
+        reloaded_bytes = pyaccuwage.dumps(records_loaded)
+        self.assertEqual(original_bytes, reloaded_bytes)
+
+    def testTxtSerialization(self):
+        records = self.createExampleRecords()
+        record_types = self.record_types
+        text_data = pyaccuwage.text_dumps(records)
+        records_loaded = pyaccuwage.text_loads(text_data, record_types)
+
+        original_bytes = pyaccuwage.dumps(records)
+        reloaded_bytes = pyaccuwage.dumps(records_loaded)
+        self.assertEqual(original_bytes, reloaded_bytes)

From 431b594c1e3a4bac344b409038e5d91f958e55fa Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 12 Jun 2020 13:10:13 -0500
Subject: [PATCH 05/20] add pyaccuwage-convert

---
 scripts/pyaccuwage-convert | 75 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100755 scripts/pyaccuwage-convert

diff --git a/scripts/pyaccuwage-convert b/scripts/pyaccuwage-convert
new file mode 100755
index 0000000..9591760
--- /dev/null
+++ b/scripts/pyaccuwage-convert
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import pyaccuwage
+import argparse
+import os, os.path
+import sys
+
+"""
+Command line tool for converting IRS e-file fixed field records
+to/from JSON or a simple text format.
+
+Attempts to load record types from a python module in the current working
+directory named record_types.py
+
+The module must export a RECORD_TYPES list with the names of the classes to
+import as valid record types.
+"""
+
+def get_record_types():
+    try:
+        sys.path.append(os.getcwd())
+        import record_types
+        r = {}
+        for record_type in record_types.RECORD_TYPES:
+            r[record_type] = getattr(record_types, record_type)
+        return r
+    except ImportError:
+        print('warning: using default record types (failed to import record_types.py)')
+    return pyaccuwage.get_record_types()
+
+
+def read_file(fd, filename, record_types):
+    filename, extension = os.path.splitext(filename)
+    if extension == '.json':
+        return pyaccuwage.json_load(fd, record_types)
+    elif extension == '.txt':
+        return pyaccuwage.text_load(fd, record_types)
+    else:
+        return pyaccuwage.load(fd, record_types)
+
+
+def write_file(outfile, filename, records):
+    filename, extension = os.path.splitext(filename)
+    if extension == '.json':
+        pyaccuwage.json_dump(outfile, records)
+    elif extension == '.txt':
+        pyaccuwage.text_dump(outfile, records)
+    else:
+        pyaccuwage.dump(outfile, records)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Convert accuwage efile data between different formats."
+    )
+
+    parser.add_argument("-i", '--input',
+                        nargs=1,
+                        required=True,
+                        metavar="file",
+                        type=argparse.FileType('r'),
+                        help="Source file to convert")
+
+    parser.add_argument("-o", "--output",
+                        nargs=1,
+                        required=True,
+                        metavar="file",
+                        type=argparse.FileType('w'),
+                        help="Destination file to output")
+
+    args = parser.parse_args()
+    in_file = args.input[0]
+    out_file = args.output[0]
+
+    records = list(read_file(in_file, in_file.name, get_record_types()))
+    write_file(out_file, out_file.name, records)

From bfd43b7448b327f39799d12f9c679c2dc1d47f3a Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 12 Jun 2020 14:45:08 -0500
Subject: [PATCH 06/20] release 0.2018.2

---
 pyaccuwage/model.py        | 2 --
 scripts/pyaccuwage-convert | 1 +
 setup.py                   | 7 ++++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py
index becd3ce..94c7d4a 100644
--- a/pyaccuwage/model.py
+++ b/pyaccuwage/model.py
@@ -36,7 +36,6 @@ class Model(object):
             self.__dict__[key] = value
 
     def set_field_value(self, field_name, value):
-        print('setfieldval: ' + field_name + ' ' + value)
         getattr(self, field_name).value = value
 
     def get_fields(self):
@@ -87,7 +86,6 @@ class Model(object):
         # Skip the first record, since that's an identifier
         for field in self.get_sorted_fields()[1:]:
             field.read(fp)
-            print(field.name, '"' + (str(field.value) or '') + '"', field.max_length, field._orig_value)
 
     def toJSON(self):
         return {
diff --git a/scripts/pyaccuwage-convert b/scripts/pyaccuwage-convert
index 9591760..9239cac 100755
--- a/scripts/pyaccuwage-convert
+++ b/scripts/pyaccuwage-convert
@@ -73,3 +73,4 @@ if __name__ == '__main__':
 
     records = list(read_file(in_file, in_file.name, get_record_types()))
     write_file(out_file, out_file.name, records)
+    print("wrote {} records to {}".format(len(records), out_file.name))
diff --git a/setup.py b/setup.py
index b543ddb..156465d 100644
--- a/setup.py
+++ b/setup.py
@@ -7,13 +7,14 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2018.1',
+    version='0.2018.2',
     packages=['pyaccuwage'],
     scripts=[
+		'scripts/pyaccuwage-checkseq',
+		'scripts/pyaccuwage-convert',
+		'scripts/pyaccuwage-genfieldfill',
 		'scripts/pyaccuwage-parse',
 		'scripts/pyaccuwage-pdfparse',
-		'scripts/pyaccuwage-checkseq',
-		'scripts/pyaccuwage-genfieldfill'
 		],
     zip_safe=True,
     test_suite='setup.pyaccuwage_tests',

From 7867a52a0c3ed5da0af89995b917ff20a3c90a1c Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 29 Jan 2021 16:26:26 -0500
Subject: [PATCH 07/20] fliped args around like a simpleton

---
 pyaccuwage/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index 513b8c6..c7db35c 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -64,7 +64,7 @@ def dump(fp, records, delim=None):
 def dumps(records, delim=None):
     import io
     fp = io.BytesIO()
-    dump(records, fp, delim=delim)
+    dump(fp, records, delim=delim)
     fp.seek(0)
     return fp.read()
 

From 558e3fd23286059d07eb3b03d43cc3fe42ca55b9 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Thu, 2 Sep 2021 17:40:35 -0500
Subject: [PATCH 08/20] hopefully fix STaticField

---
 pyaccuwage/fields.py | 6 ++++--
 tests/test_fields.py | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index 0d6fcd3..27c10dc 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -198,12 +198,14 @@ class StaticField(TextField):
     def __init__(self, name=None, required=True, value=None):
         super(StaticField, self).__init__(name=name, required=required,
                                             max_length=len(value))
-        self._value = value
+        self._static_value = value
+
+    def get_data(self):
+        return str(self._static_value).encode('ascii')[:self.max_length]
 
     def parse(self, s):
         pass
 
-
 class BlankField(TextField):
     is_read_only = True
 
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 3d8fd3e..2d32755 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -1,5 +1,6 @@
 import unittest
 from pyaccuwage.fields import TextField
+from pyaccuwage.fields import StaticField
 # from pyaccuwage.fields import IntegerField
 # from pyaccuwage.fields import StateField
 # from pyaccuwage.fields import BlankField
@@ -31,3 +32,8 @@ class TestTextField(unittest.TestCase):
         data = field.get_data()
         self.assertEqual(len(data), field.max_length)
         self.assertEqual(data, b'HELLO,')
+
+class TestStaticField(unittest.TestCase):
+    def test_static_field(self):
+        field = StaticField(value='TEST')
+        self.assertEqual(field.get_data(), b'TEST')

From 0bd82e09c451f3fa338362e266b9d39164eedca6 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 3 Sep 2021 05:45:01 -0500
Subject: [PATCH 09/20] Fix StaticField + tests for StaticField and unset
 optional TextField

---
 pyaccuwage/fields.py  | 15 ++++++++-------
 pyaccuwage/model.py   |  2 +-
 tests/test_fields.py  |  5 +++++
 tests/test_records.py |  8 ++++++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index 27c10dc..2a78fd5 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -21,6 +21,7 @@ class ValidationError(Exception):
 class Field(object):
     creation_counter = 0
     is_read_only = False
+    _value = None
 
     def __init__(self, name=None, max_length=0, required=True, uppercase=True, creation_counter=None):
         self.name = name
@@ -122,7 +123,7 @@ class TextField(Field):
             raise ValidationError("value is too long", field=self)
 
     def get_data(self):
-        value = str(self.value).encode('ascii') or b''
+        value = str(self.value or '').encode('ascii') or b''
         if self.uppercase:
             value = value.upper()
         return value.ljust(self.max_length)[:self.max_length]
@@ -195,13 +196,13 @@ class IntegerField(TextField):
 
 
 class StaticField(TextField):
-    def __init__(self, name=None, required=True, value=None):
-        super(StaticField, self).__init__(name=name, required=required,
-                                            max_length=len(value))
+    def __init__(self, name=None, required=True, value=None, uppercase=False):
+        super(StaticField, self).__init__(name=name,
+                required=required,
+                max_length=len(value),
+                uppercase=uppercase)
         self._static_value = value
-
-    def get_data(self):
-        return str(self._static_value).encode('ascii')[:self.max_length]
+        self._value = value
 
     def parse(self, s):
         pass
diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py
index 94c7d4a..1d7e4ed 100644
--- a/pyaccuwage/model.py
+++ b/pyaccuwage/model.py
@@ -25,7 +25,7 @@ class Model(object):
                     setattr(src_field, 'parent_name', self.__class__.__name__)
                 new_field_instance = copy.copy(src_field)
                 new_field_instance._orig_value = None
-                new_field_instance._value = None
+                new_field_instance._value = new_field_instance.value
                 self.__dict__[key] = new_field_instance
 
     def __setattr__(self, key, value):
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 2d32755..050285e 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -33,6 +33,11 @@ class TestTextField(unittest.TestCase):
         self.assertEqual(len(data), field.max_length)
         self.assertEqual(data, b'HELLO,')
 
+    def testStringUnsetOptional(self):
+        field = TextField(max_length=6, required=False)
+        field.validate()
+        self.assertEqual(field.get_data(), b' ' * 6)
+
 class TestStaticField(unittest.TestCase):
     def test_static_field(self):
         field = StaticField(value='TEST')
diff --git a/tests/test_records.py b/tests/test_records.py
index a6485ac..dfa25fa 100644
--- a/tests/test_records.py
+++ b/tests/test_records.py
@@ -7,6 +7,7 @@ from pyaccuwage.fields import MoneyField
 from pyaccuwage.fields import StateField
 from pyaccuwage.fields import TextField
 from pyaccuwage.fields import ZeroField
+from pyaccuwage.fields import StaticField
 from pyaccuwage.model import Model
 
 class TestModelOutput(unittest.TestCase):
@@ -20,7 +21,8 @@ class TestModelOutput(unittest.TestCase):
         money = MoneyField(max_length=32)
         state_txt = StateField()
         state_num = StateField(use_numeric=True)
-        blank2 = BlankField(max_length=24)
+        blank2 = BlankField(max_length=12)
+        static1 = StaticField(value='hey mister!!')
 
     def setUp(self):
         self.model = TestModelOutput.TestModel()
@@ -42,7 +44,8 @@ class TestModelOutput(unittest.TestCase):
             b'313377'.zfill(32),
             b'IA',
             b'19',
-            b' ' * 24,
+            b' ' * 12,
+            b'hey mister!!',
             ])
 
         output = model.output()
@@ -64,6 +67,7 @@ field2: 12345
 money: 3133.77
 state_txt: IA
 state_num: IA
+static1: hey mister!!
 
 ''')
 

From f28cd6edf245da2c5f99355e7acffb8565e12a48 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 3 Sep 2021 07:48:24 -0500
Subject: [PATCH 10/20] bump version 0.2020.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 156465d..7a4907b 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2018.2',
+    version='0.2020.0',
     packages=['pyaccuwage'],
     scripts=[
 		'scripts/pyaccuwage-checkseq',

From 042de7ecb0d9e09110347536ee0da2a73dd02fb9 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Sat, 18 Dec 2021 08:56:43 -0500
Subject: [PATCH 11/20] import typing.Callable (python 3.10+)

---
 pyaccuwage/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index c7db35c..d438817 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -1,4 +1,7 @@
-from collections import Callable
+try:
+    from collections import Callable
+except:
+    from typing import Callable # Python 3.10+
 
 VERSION = (0, 2012, 0)
 

From 86f8861da1d038407127ee431ce1cd71782f116f Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Sun, 6 Feb 2022 11:06:51 -0600
Subject: [PATCH 12/20] encode record delimiter as ascii bytes when str is
 passed

---
 pyaccuwage/__init__.py | 2 ++
 tests/test_records.py  | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index d438817..3db24fc 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -58,6 +58,8 @@ def loads(s, record_types=get_record_types()):
 
 
 def dump(fp, records, delim=None):
+    if type(delim) is str:
+        delim = delim.encode('ascii')
     for r in records:
         fp.write(r.output())
         if delim:
diff --git a/tests/test_records.py b/tests/test_records.py
index dfa25fa..166b6d5 100644
--- a/tests/test_records.py
+++ b/tests/test_records.py
@@ -90,7 +90,8 @@ class TestFileFormats(unittest.TestCase):
         record_identifier = 'B'  # 1 byte
         zero1 = ZeroField(max_length=32)
         text1 = TextField(max_length=71)
-        blank2 = BlankField(max_length=24)
+        text2 = TextField(max_length=20)
+        blank2 = BlankField(max_length=4)
 
     record_types = [TestModelA, TestModelB]
 

From 66573e4d1deca1c264cf1f386b4e6870f770adc7 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 29 Mar 2024 10:46:01 -0400
Subject: [PATCH 13/20] update for 2023 p1220 parsing, stupid irs

---
 pyaccuwage/pdfextract.py    | 391 +++++++++---------------------------
 requirements.txt            |   1 +
 scripts/pyaccuwage-pdfparse |  51 +----
 3 files changed, 97 insertions(+), 346 deletions(-)
 create mode 100644 requirements.txt

diff --git a/pyaccuwage/pdfextract.py b/pyaccuwage/pdfextract.py
index 2903b5d..352c400 100644
--- a/pyaccuwage/pdfextract.py
+++ b/pyaccuwage/pdfextract.py
@@ -3,313 +3,102 @@
 
 import subprocess
 import re
-import pdb
+import itertools
+import fitz
 
 """ pdftotext -layout -nopgbrk p1220.pdf - """
 
+def strip_values(items):
+    expr_non_alphanum = re.compile(r'[^\w\s]*', re.MULTILINE)
+    return [expr_non_alphanum.sub(x, '').strip().replace('\n', ' ') for x in items if x]
 
 class PDFRecordFinder(object):
-    def __init__(self, src, heading_exp=None):
-        if not heading_exp:
-            heading_exp = re.compile('(\s+Record Name: (.*))|Record\ Layout')
+    field_range_expr = re.compile(r'^(\d+)[-]?(\d*)$')
 
-        field_heading_exp = re.compile('^Field.*Field.*Length.*Description')
+    def __init__(self, src):
+        self.document = fitz.open(src)
 
-        opts = ["pdftotext", "-layout", "-nopgbrk", "-eol", "unix", src, '-']
-        pdftext = subprocess.check_output(opts)
-        self.textrows = pdftext.split('\n')
-        self.heading_exp = heading_exp
-        self.field_heading_exp = field_heading_exp
+    def find_record_table_ranges(self):
+        matches = []
+        for (page_number, page) in enumerate(self.document):
+            header_rects = page.search_for("Record Name:")
+            for header_match_rect in header_rects:
+                header_match_rect.x0 = header_match_rect.x1  # Start after match of "Record Name: "
+                header_match_rect.x1 = page.bound().x1  # Extend to right side of page
+                header_text = page.get_textbox(header_match_rect)
+                record_name = re.sub(r'[^\w\s\n]*', '', header_text).strip()
+                matches.append((record_name, {
+                    'page': page_number,
+                    'y': header_match_rect.y1 - 5,  # Back up a hair to include header more reliably
+                }))
+        return matches
+
+    def find_records(self):
+        record_ranges = self.find_record_table_ranges()
+        for record_index, (record_name, record_details) in enumerate(record_ranges):
+            current_rows = []
+            next_index = record_index+1
+            (_, next_record_details) = record_ranges[next_index] if next_index < len(record_ranges) else (None, {'page': self.document.page_count-1})
+            for page_number in range(record_details['page'], next_record_details['page']):
+                page = self.document[page_number]
+                table_search_rect = page.bound()
+                if page_number == record_details['page']:
+                    table_search_rect.y0 = record_details['y']
+                tables = page.find_tables(
+                        clip = table_search_rect,
+                        min_words_horizontal = 1,
+                        min_words_vertical = 1,
+                        horizontal_strategy = "lines_strict",
+                        intersection_tolerance = 1,
+                        )
+                for table in tables:
+                    if table.col_count == 4:
+                        table = table.extract()
+                        # Parse field position (sometimes a cell has multiple
+                        # values because IRS employees apparently smoke crack
+                        for row in table:
+                            first_column_lines = row[0].strip().split('\n')
+                            if len(first_column_lines) > 1:
+                                for sub_row in self.split_row(row):
+                                    current_rows.append(strip_values(sub_row))
+                            else:
+                                current_rows.append(strip_values(row))
+            consecutive_rows = self.filter_nonconsecutive_rows(current_rows)
+            yield(record_name, consecutive_rows)
+
+    def split_row(self, row):
+        if not row[1]:
+            return []
+        split_rows = list(itertools.zip_longest(*[x.strip().split('\n') for x in row[:3]], fillvalue=None))
+        description = strip_values([row[3]])[0]
+        rows = []
+        for row in split_rows:
+            if len(row) < 3 or not row[2]:
+                row = self.infer_field_length(row)
+            rows.append([*row, description])
+        return rows
+
+    def infer_field_length(self, row):
+        matches = PDFRecordFinder.field_range_expr.match(row[0])
+        if not matches:
+            return row
+        (start, end) = ([int(x) for x in list(matches.groups()) if x] + [None])[:2]
+        length = str(end-start+1) if end and start else '1'
+        return (*row[:2], length)
+
+    def filter_nonconsecutive_rows(self, rows):
+        consecutive_rows = []
+        last_position = 0
+        for row in rows:
+            matches = PDFRecordFinder.field_range_expr.match(row[0])
+            if not matches:
+                continue
+            (start, end) = ([int(x) for x in list(matches.groups()) if x] + [None])[:2]
+            if start != last_position + 1:
+                continue
+            last_position = end if end else start
+            consecutive_rows.append(row)
+        return consecutive_rows
 
     def records(self):
-        headings = self.locate_heading_rows_by_field()
-
-        #for x in headings:
-        #    print x
-
-        for (start, end, name) in headings:
-            name = name.decode('ascii', 'ignore')
-            yield (name, list(self.find_fields(iter(self.textrows[start+1:end]))), (start+1, end))
-
-
-    def locate_heading_rows_by_field(self):
-        results = []
-        record_break = []
-        line_is_whitespace_exp = re.compile('^(\s*)$')
-        record_begin_exp = self.heading_exp #re.compile('Record\ Name')
-
-        for (i, row) in enumerate(self.textrows):
-            match = self.field_heading_exp.match(row)
-            if match:
-                # work backwards until we think the header is fully copied
-                space_count_exp = re.compile('^(\s*)')
-                position = i - 1
-                spaces = 0
-                #last_spaces = 10000
-                complete = False
-                header = None
-                while not complete:
-                    line_is_whitespace = True if line_is_whitespace_exp.match(self.textrows[position]) else False
-                    is_record_begin = record_begin_exp.search(self.textrows[position])
-                    if is_record_begin or line_is_whitespace:
-                        header = self.textrows[position-1:i]
-                        complete = True
-                    position -= 1
-
-                name = ''.join(header).strip().decode('ascii','ignore')
-                print((name, position))
-                results.append((i, name, position))
-            else:
-                # See if this row forces us to break from field reading.
-                if re.search('Record\ Layout', row):
-                    record_break.append(i)
-
-        merged = []
-        for (a, b) in zip(results, results[1:] + [(len(self.textrows), None)]):
-            end_pos = None
-
-            #print a[0], record_break[0], b[0]-1
-
-            while record_break and record_break[0] < a[0]:
-                record_break = record_break[1:]
-
-            if record_break[0] < b[0]-1:
-                end_pos = record_break[0]
-                record_break = record_break[1:]
-            else:
-                end_pos = b[0]-1
-
-            merged.append( (a[0], end_pos-1, a[1]) )
-        return merged
-
-    """
-    def locate_heading_rows(self):
-        results = []
-        for (i, row) in enumerate(self.textrows):
-            match = self.heading_exp.match(row)
-            if match:
-                results.append((i, ''.join(match.groups())))
-
-        merged = []
-        for (a, b) in zip(results, results[1:] + [(len(self.textrows),None)]):
-            merged.append( (a[0], b[0]-1, a[1]) )
-
-        return merged
-
-    def locate_layout_block_rows(self):
-        # Search for rows that contain "Record Layout", as these are not fields
-        # we are interested in because they contain the crazy blocks of field definitions
-        # and not the nice 4-column ones that we're looking for.
-
-        results = []
-        for (i, row) in enumerate(self.textrows):
-            match = re.match("Record Layout", row)
-
-    """
-
-    def find_fields(self, row_iter):
-        cc = ColumnCollector()
-        blank_row_counter = 0
-
-        for r in row_iter:
-            row = r.decode('UTF-8')
-            #print row
-            row_columns = self.extract_columns_from_row(row)
-
-            if not row_columns:
-                if cc.data and len(list(cc.data.keys())) > 1 and len(row.strip()) > list(cc.data.keys())[-1]:
-                    yield cc
-                    cc = ColumnCollector()
-                else:
-                    cc.empty_row()
-                continue
-
-            try:
-                cc.add(row_columns)
-
-            except IsNextField as e:
-                yield cc
-                cc = ColumnCollector()
-                cc.add(row_columns)
-            except UnknownColumn as e:
-                raise StopIteration
-
-        yield cc
-
-
-    def extract_columns_from_row(self, row):
-        re_multiwhite = re.compile(r'\s{2,}')
-
-        # IF LINE DOESN'T CONTAIN MULTIPLE WHITESPACES, IT'S LIKELY NOT A TABLE
-        if not re_multiwhite.search(row):
-            return None
-
-        white_ranges = [0,]
-        pos = 0
-        while pos < len(row):
-            match = re_multiwhite.search(row[pos:])
-            if match:
-                white_ranges.append(pos + match.start())
-                white_ranges.append(pos + match.end())
-                pos += match.end()
-            else:
-                white_ranges.append(len(row))
-                pos = len(row)
-
-        row_result = []
-        white_iter = iter(white_ranges)
-        while white_iter:
-            try:
-                start = next(white_iter)
-                end = next(white_iter)
-                if start != end:
-                    row_result.append(
-                        (start, row[start:end].encode('ascii','ignore'))
-                    )
-
-            except StopIteration:
-                white_iter = None
-
-        #print row_result
-        return row_result
-
-
-class UnknownColumn(Exception):
-    pass
-
-class IsNextField(Exception):
-    pass
-
-class ColumnCollector(object):
-    def __init__(self, initial=None):
-        self.data = None
-        self.column_widths = None
-        self.max_data_length = 0
-        self.adjust_pad = 3
-        self.empty_rows = 0
-        pass
-
-    def __repr__(self):
-        return "<%s: %s>" % (
-                self.__class__.__name__,
-                [x if len(x) < 25 else x[:25] + '..' for x in list(self.data.values()) if self.data else ''])
-
-    def add(self, data):
-        #if self.empty_rows > 2:
-        #    raise IsNextField()
-
-        if not self.data:
-            self.data = dict(data)
-        else:
-            data = self.adjust_columns(data)
-            if self.is_next_field(data):
-                raise IsNextField()
-            for col_id, value in data:
-                self.merge_column(col_id, value)
-
-        self.update_column_widths(data)
-
-    def empty_row(self):
-        self.empty_rows += 1
-
-    def update_column_widths(self, data):
-        self.last_data_length = len(data)
-        self.max_data_length = max(self.max_data_length, len(data))
-
-        if not self.column_widths:
-            self.column_widths = dict([[column_value[0], column_value[0] + len(column_value[1])] for column_value in data])
-        else:
-            for col_id, value in data:
-                try:
-                    self.column_widths[col_id] = max(self.column_widths[col_id], col_id + len(value.strip()))
-                except KeyError:
-                    pass
-
-    def add_old(self, data):
-        if not self.data:
-            self.data = dict(data)
-        else:
-            if self.is_next_field(data):
-                raise IsNextField()
-            for col_id, value in data:
-                self.merge_column(col_id, value)
-
-
-    def adjust_columns(self, data):
-        adjusted_data = {}
-        for col_id, value in data:
-            if col_id in list(self.data.keys()):
-                adjusted_data[col_id] = value.strip()
-            else:
-                for col_start, col_end in list(self.column_widths.items()):
-                    if (col_start - self.adjust_pad) <= col_id and (col_end + self.adjust_pad) >= col_id:
-                        if col_start in adjusted_data:
-                            adjusted_data[col_start] += ' ' + value.strip()
-                        else:
-                            adjusted_data[col_start] = value.strip()
-
-        return list(adjusted_data.items())
-
-
-    def merge_column(self, col_id, value):
-        if col_id in list(self.data.keys()):
-            self.data[col_id] += ' ' + value.strip()
-        else:
-            # try adding a wiggle room value?
-            # FIXME:
-            # Sometimes description columns contain column-like
-            # layouts, and this causes the ColumnCollector to become
-            # confused. Perhaps we could check to see if a column occurs
-            # after the maximum column, and assume it's part of the
-            # max column?
-
-            """
-            for col_start, col_end in self.column_widths.items():
-                if col_start <= col_id and (col_end) >= col_id:
-                    self.data[col_start] += ' ' + value.strip()
-                    return
-            """
-            raise UnknownColumn
-
-    def is_next_field(self, data):
-        """
-        If the first key value contains a string
-        and we already have some data in the record,
-        then this row is probably the beginning of
-        the next field. Raise an exception and continue
-        on with a fresh ColumnCollector.
-        """
-
-        """ If the length of the value in column_id is less than the position of the next column_id,
-            then this is probably a continuation.
-        """
-
-        if self.data and data:
-            keys = list(dict(self.column_widths).keys())
-            keys.sort()
-            keys += [None]
-
-            if self.last_data_length < len(data):
-                return True
-
-            first_key, first_value = list(dict(data).items())[0]
-            if list(self.data.keys())[0] == first_key:
-
-                position = keys.index(first_key)
-                max_length = keys[position + 1]
-                if max_length:
-                    return len(first_value) > max_length or len(data) == self.max_data_length
-
-        return False
-
-
-    @property
-    def tuple(self):
-        #try:
-        if self.data:
-            return tuple([self.data[k] for k in sorted(self.data.keys())])
-        return ()
-        #except:
-        #    import pdb
-        #    pdb.set_trace()
-
+        return self.find_records()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..82813ad
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+PyMuPDF==1.24.0
diff --git a/scripts/pyaccuwage-pdfparse b/scripts/pyaccuwage-pdfparse
index 6a35387..d80abaa 100755
--- a/scripts/pyaccuwage-pdfparse
+++ b/scripts/pyaccuwage-pdfparse
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from pyaccuwage.parser import RecordBuilder
 from pyaccuwage.pdfextract import PDFRecordFinder
 import argparse
@@ -29,48 +29,9 @@ doc = PDFRecordFinder(source_file)
 records = doc.records()
 builder = RecordBuilder()
 
-def record_begins_at(field):
-    return int(fields[0].data.values()[0].split('-')[0], 10)
-
-def record_ends_at(fields):
-    return int(fields[-1].data.values()[0].split('-')[-1], 10)
-
-last_record_begins_at = -1
-last_record_ends_at = -1
-
-for rec in records:
-    #if not rec[1]:
-    #    continue # no actual fields detected
-    fields = rec[1]
-
-    # strip out fields that are not 4 items long
-    fields = filter(lambda x:len(x.tuple) == 4, fields)
-
-    # strip fields that don't begin at position 0
-    fields = filter(lambda x: 0 in x.data, fields)
-
-    # strip fields that don't have a length-range type item in position 0
-    fields = filter(lambda x: re.match('^\d+[-]?\d*$', x.data[0]), fields)
-
-    if not fields:
-        continue
-
-    begins_at = record_begins_at(fields)
-    ends_at = record_ends_at(fields)
-
-    # FIXME record_ends_at is randomly exploding due to record data being
-    # a lump of text and not necessarily a field entry. I assume
-    # this is cleaned out by the record builder class.
-
-    #print last_record_ends_at + 1, begins_at
-    if last_record_ends_at + 1 != begins_at:
-        name = re.sub('^[^a-zA-Z]*','',rec[0].split(':')[-1])
-        name = re.sub('[^\w]*', '', name)
-        sys.stdout.write("\nclass %s(pyaccuwagemodel.Model):\n" % name)
-
-    for field in builder.load(map(lambda x:x.tuple, rec[1][0:])):
+for (name, fields) in records:
+    name = re.sub(r'^[^a-zA-Z]*','', name.split(':')[-1])
+    name = re.sub(r'[^\w]*', '', name)
+    sys.stdout.write("\nclass %s(pyaccuwagemodel.Model):\n" % name)
+    for field in builder.load(map(lambda x: x, fields[0:])):
         sys.stdout.write('\t' + field + '\n')
-        #print field
-
-    last_record_ends_at = ends_at
-

From 74b7935ceda47c5f5c2197e32bed4cdd920d477b Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Fri, 29 Mar 2024 10:50:25 -0400
Subject: [PATCH 14/20] bump version to 2024

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7a4907b..078072e 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2020.0',
+    version='0.2024.0',
     packages=['pyaccuwage'],
     scripts=[
 		'scripts/pyaccuwage-checkseq',

From 5f4dc8b80f98924a450486c1ffc51481d72e39e0 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Sun, 31 Mar 2024 11:14:16 -0400
Subject: [PATCH 15/20] add 'blank' field option to allow empty text in
 required fields (default: false)

---
 pyaccuwage/__init__.py |  5 ++++-
 pyaccuwage/fields.py   | 18 +++++++++++-----
 pyaccuwage/model.py    |  6 +++++-
 setup.py               |  2 +-
 tests/test_records.py  | 49 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index 3db24fc..e9c3878 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -66,9 +66,12 @@ def dump(fp, records, delim=None):
             fp.write(delim)
 
 
-def dumps(records, delim=None):
+def dumps(records, delim=None, skip_validation=False):
     import io
     fp = io.BytesIO()
+    if not skip_validation:
+        for record in records:
+            record.validate()
     dump(fp, records, delim=delim)
     fp.seek(0)
     return fp.read()
diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index 2a78fd5..18aaabf 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -23,11 +23,12 @@ class Field(object):
     is_read_only = False
     _value = None
 
-    def __init__(self, name=None, max_length=0, required=True, uppercase=True, creation_counter=None):
+    def __init__(self, name=None, max_length=0, blank=False, required=True, uppercase=True, creation_counter=None):
         self.name = name
         self._value = None
         self._orig_value = None
         self.max_length = max_length
+        self.blank = blank
         self.required = required
         self.uppercase = uppercase
         self.creation_counter = creation_counter or Field.creation_counter
@@ -97,9 +98,9 @@ class Field(object):
         wrapper.width = 100
         value = wrapper.wrap(value)
         value = list([(" " * 9) + ('"' + x + '"') for x in value])
-        value.append(" " * 10 + ('_' * 10) * (wrapper.width / 10))
-        value.append(" " * 10 + ('0123456789') * (wrapper.width / 10))
-        value.append(" " * 10 + ''.join(([str(x) + (' ' * 9) for x in range(wrapper.width / 10 )])))
+        value.append(" " * 10 + ('_' * 10) * int(wrapper.width / 10))
+        value.append(" " * 10 + ('0123456789') * int(wrapper.width / 10))
+        value.append(" " * 10 + ''.join(([str(x) + (' ' * 9) for x in range(int(wrapper.width / 10))])))
 
         start = counter['c']
         counter['c'] += len(self._orig_value or self.value)
@@ -121,6 +122,9 @@ class TextField(Field):
             raise ValidationError("value required", field=self)
         if len(self.get_data()) > self.max_length:
             raise ValidationError("value is too long", field=self)
+        if len(self.get_data().strip()) == 0 and (not self.blank and self.required):
+            print(self.name, 'blank', self.blank, self.required)
+            raise ValidationError("field cannot be blank", field=self)
 
     def get_data(self):
         value = str(self.value or '').encode('ascii') or b''
@@ -144,7 +148,7 @@ class TextField(Field):
 
 class StateField(TextField):
     def __init__(self, name=None, required=True, use_numeric=False, max_length=2):
-        super(StateField, self).__init__(name=name, max_length=2, required=required)
+        super(StateField, self).__init__(name=name, max_length=max_length, required=required)
         self.use_numeric = use_numeric
 
     def get_data(self):
@@ -219,6 +223,10 @@ class BlankField(TextField):
     def parse(self, s):
         pass
 
+    def validate(self):
+        if len(self.get_data()) != self.max_length:
+            raise ValidationError("blank field did not match expected length", field=self)
+
 
 class ZeroField(BlankField):
     is_read_only = True
diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py
index 1d7e4ed..c950055 100644
--- a/pyaccuwage/model.py
+++ b/pyaccuwage/model.py
@@ -39,7 +39,11 @@ class Model(object):
         getattr(self, field_name).value = value
 
     def get_fields(self):
-        identifier = TextField("record_identifier", max_length=len(self.record_identifier), creation_counter=-1)
+        identifier = TextField(
+                "record_identifier",
+                max_length = len(self.record_identifier),
+                blank = len(self.record_identifier) == 0,
+                creation_counter=-1)
         identifier.value = self.record_identifier
         fields = [identifier]
 
diff --git a/setup.py b/setup.py
index 078072e..c3d830a 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2024.0',
+    version='0.2024.1',
     packages=['pyaccuwage'],
     scripts=[
 		'scripts/pyaccuwage-checkseq',
diff --git a/tests/test_records.py b/tests/test_records.py
index 166b6d5..67ce3ce 100644
--- a/tests/test_records.py
+++ b/tests/test_records.py
@@ -8,6 +8,7 @@ from pyaccuwage.fields import StateField
 from pyaccuwage.fields import TextField
 from pyaccuwage.fields import ZeroField
 from pyaccuwage.fields import StaticField
+from pyaccuwage.fields import ValidationError
 from pyaccuwage.model import Model
 
 class TestModelOutput(unittest.TestCase):
@@ -90,7 +91,7 @@ class TestFileFormats(unittest.TestCase):
         record_identifier = 'B'  # 1 byte
         zero1 = ZeroField(max_length=32)
         text1 = TextField(max_length=71)
-        text2 = TextField(max_length=20)
+        text2 = TextField(max_length=20, required=False)
         blank2 = BlankField(max_length=4)
 
     record_types = [TestModelA, TestModelB]
@@ -130,3 +131,49 @@ class TestFileFormats(unittest.TestCase):
         original_bytes = pyaccuwage.dumps(records)
         reloaded_bytes = pyaccuwage.dumps(records_loaded)
         self.assertEqual(original_bytes, reloaded_bytes)
+
+
+class TestRequiredFields(unittest.TestCase):
+    def createTestRecord(self, required=False, blank=False):
+        class Record(pyaccuwage.model.Model):
+            record_length = 16
+            record_identifier = ''
+            test_field = TextField(max_length=16, required=required, blank=blank)
+        record = Record()
+        def dump():
+            return pyaccuwage.dumps([record])
+        return (record, dump)
+
+    def testRequiredBlankField(self):
+        (record, dump) = self.createTestRecord(required=True, blank=True)
+        record.test_field.value   # if nothing is ever assigned, raise error
+        self.assertRaises(ValidationError, dump)
+        record.test_field.value = ''  # value may be empty string
+        dump()
+
+    def testRequiredNonblankField(self):
+        (record, dump) = self.createTestRecord(required=True, blank=False)
+        record.test_field.value   # if nothing is ever assigned, raise error
+        self.assertRaises(ValidationError, dump)
+        record.test_field.value = ''  # value must not be empty string
+        self.assertRaises(ValidationError, dump)
+        record.test_field.value = 'hello'
+        dump()
+
+    def testOptionalBlankField(self):
+        (record, dump) = self.createTestRecord(required=False, blank=True)
+        record.test_field.value  # OK if nothing is ever assigned
+        dump()
+        record.test_field.value = ''  # OK if empty string is assigned
+        dump()
+        record.test_field.value = 'hello'
+        dump()
+
+    def testOptionalNonBlankField(self):
+        (record, dump) = self.createTestRecord(required=False, blank=False)
+        record.test_field.value  # OK if nothing is ever assigned
+        dump()
+        record.test_field.value = ''  # OK if empty string is assigned
+        dump()
+        record.test_field.value = 'hello'
+        dump()

From e0e4c1291d9a8e91eb8428dab868cc30c4826727 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Sun, 31 Mar 2024 11:52:22 -0400
Subject: [PATCH 16/20] add min_length option to TextField for SSNs and stuff
 like that

---
 pyaccuwage/fields.py | 12 ++++++++----
 setup.py             |  2 +-
 tests/test_fields.py | 23 +++++++++++++++++++++++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py
index 18aaabf..8291d63 100644
--- a/pyaccuwage/fields.py
+++ b/pyaccuwage/fields.py
@@ -23,10 +23,11 @@ class Field(object):
     is_read_only = False
     _value = None
 
-    def __init__(self, name=None, max_length=0, blank=False, required=True, uppercase=True, creation_counter=None):
+    def __init__(self, name=None, min_length=0, max_length=0, blank=False, required=True, uppercase=True, creation_counter=None):
         self.name = name
         self._value = None
         self._orig_value = None
+        self.min_length = min_length
         self.max_length = max_length
         self.blank = blank
         self.required = required
@@ -120,10 +121,13 @@ class TextField(Field):
     def validate(self):
         if self.value is None and self.required:
             raise ValidationError("value required", field=self)
-        if len(self.get_data()) > self.max_length:
+        data = self.get_data()
+        if len(data) > self.max_length:
             raise ValidationError("value is too long", field=self)
-        if len(self.get_data().strip()) == 0 and (not self.blank and self.required):
-            print(self.name, 'blank', self.blank, self.required)
+        stripped_data_length = len(data.strip())
+        if stripped_data_length < self.min_length:
+            raise ValidationError("value is too short", field=self)
+        if stripped_data_length == 0 and (not self.blank and self.required):
             raise ValidationError("field cannot be blank", field=self)
 
     def get_data(self):
diff --git a/setup.py b/setup.py
index c3d830a..ca6d8c5 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2024.1',
+    version='0.2024.2',
     packages=['pyaccuwage'],
     scripts=[
 		'scripts/pyaccuwage-checkseq',
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 050285e..2707e6f 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -38,6 +38,29 @@ class TestTextField(unittest.TestCase):
         field.validate()
         self.assertEqual(field.get_data(), b' ' * 6)
 
+    def testStringRequiredUnassigned(self):
+        field = TextField(max_length=6)
+        self.assertRaises(ValidationError, lambda: field.validate())
+
+    def testStringRequiredNonBlank(self):
+        field = TextField(max_length=6)
+        field.value = ''
+        self.assertRaises(ValidationError, lambda: field.validate())
+
+    def testStringRequiredBlank(self):
+        field = TextField(max_length=6, blank=True)
+        field.value = ''
+        field.validate()
+        self.assertEqual(len(field.get_data()), 6)
+
+    def testStringMinimumLength(self):
+        field = TextField(max_length=6, min_length=6, blank=True) # blank has no effect
+        field.value = '' # one character too short
+        self.assertRaises(ValidationError, lambda: field.validate())
+        field.value = '12345' # one character too short
+        self.assertRaises(ValidationError, lambda: field.validate())
+        field.value = '123456' # one character too short
+
 class TestStaticField(unittest.TestCase):
     def test_static_field(self):
         field = StaticField(value='TEST')

From 4408da71a92d3acadfb53c52c01f4c639d19928f Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Wed, 10 Apr 2024 09:41:10 -0400
Subject: [PATCH 17/20] mark some fields as optional

---
 pyaccuwage/enums.py  |  1 +
 pyaccuwage/record.py | 25 ++++++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pyaccuwage/enums.py b/pyaccuwage/enums.py
index 8c96ebf..66a8722 100644
--- a/pyaccuwage/enums.py
+++ b/pyaccuwage/enums.py
@@ -323,6 +323,7 @@ employment_codes = (
         )
 
 tax_jurisdiction_codes = (
+        (' ', 'W-2'),
         ('V', 'Virgin Islands'),
         ('G', 'Guam'),
         ('S', 'American Samoa'),
diff --git a/pyaccuwage/record.py b/pyaccuwage/record.py
index 9e46217..a4fe89b 100644
--- a/pyaccuwage/record.py
+++ b/pyaccuwage/record.py
@@ -105,8 +105,8 @@ class EmployerRecord(EFW2Record):
     zipcode_ext =                   TextField(max_length=4, required=False)
     kind_of_employer =              TextField(max_length=1)
     blank1 =                        BlankField(max_length=4)
-    foreign_state_province =        TextField(max_length=23)
-    foreign_postal_code =           TextField(max_length=15)
+    foreign_state_province =        TextField(max_length=23, required=False)
+    foreign_postal_code =           TextField(max_length=15, required=False)
     country_code =                  TextField(max_length=2, required=False)
     employment_code =               TextField(max_length=1)
     tax_jurisdiction_code =         TextField(max_length=1, required=False)
@@ -150,7 +150,7 @@ class EmployeeWageRecord(EFW2Record):
 
     ssn =                           IntegerField(max_length=9, required=False)
     employee_first_name =           TextField(max_length=15)
-    employee_middle_name =          TextField(max_length=15)
+    employee_middle_name =          TextField(max_length=15, required=False)
     employee_last_name =            TextField(max_length=20)
     employee_suffix =               TextField(max_length=4, required=False)
     location_address =              TextField(max_length=22)
@@ -163,7 +163,7 @@ class EmployeeWageRecord(EFW2Record):
     blank1 =                        BlankField(max_length=5)
     foreign_state =                 TextField(max_length=23, required=False)
     foreign_postal_code =           TextField(max_length=15, required=False)
-    country =                       TextField(max_length=2)
+    country =                       TextField(max_length=2, required=True, blank=True)
     wages_tips =                    MoneyField(max_length=11)
     federal_income_tax_withheld =   MoneyField(max_length=11)
     social_security_wages =         MoneyField(max_length=11)
@@ -199,8 +199,10 @@ class EmployeeWageRecord(EFW2Record):
     blank6 =                        BlankField(max_length=23)
 
     def validate_ssn(self, f):
-        if str(f.value).startswith('666','9'):
-            raise ValidationError("ssn cannot start with 666 or 9", field=f)
+        if str(f.value).startswith('666'):
+            raise ValidationError("ssn cannot start with 666", field=f)
+        if str(f.value).startswith('9'):
+            raise ValidationError("ssn cannot start with 9", field=f)
 
 
 
@@ -243,7 +245,7 @@ class StateWageRecord(EFW2Record):
     taxing_entity_code =            TextField(max_length=5, required=False)
     ssn =                           IntegerField(max_length=9, required=False)
     employee_first_name =           TextField(max_length=15)
-    employee_middle_name =          TextField(max_length=15)
+    employee_middle_name =          TextField(max_length=15, required=False)
     employee_last_name =            TextField(max_length=20)
     employee_suffix =               TextField(max_length=4, required=False)
     location_address =              TextField(max_length=22)
@@ -257,10 +259,10 @@ class StateWageRecord(EFW2Record):
     foreign_postal_code =           TextField(max_length=15, required=False)
     country_code =                  TextField(max_length=2, required=False)
     optional_code =                 TextField(max_length=2, required=False)
-    reporting_period =              MonthYearField()
+    reporting_period =              MonthYearField(required=False)
     quarterly_unemp_ins_wages =             MoneyField(max_length=11)
     quarterly_unemp_ins_taxable_wages =     MoneyField(max_length=11)
-    number_of_weeks_worked =        IntegerField(max_length=2)
+    number_of_weeks_worked =        IntegerField(max_length=2, required=False)
     date_first_employed =           DateField(required=False)
     date_of_separation =            DateField(required=False)
     blank2 =                        BlankField(max_length=5)
@@ -270,7 +272,7 @@ class StateWageRecord(EFW2Record):
     state_taxable_wages =           MoneyField(max_length=11)
     state_income_tax_wh =           MoneyField(max_length=11)
     other_state_data =              TextField(max_length=10, required=False)
-    tax_type_code =                 TextField(max_length=1) # VALIDATE C, D, E, or F
+    tax_type_code =                 TextField(max_length=1, required=False) # VALIDATE C, D, E, or F
     local_taxable_wages =           MoneyField(max_length=11)
     local_income_tax_wh =           MoneyField(max_length=11)
     state_control_number =          IntegerField(max_length=7, required=False)
@@ -280,7 +282,8 @@ class StateWageRecord(EFW2Record):
 
     def validate_tax_type_code(self, field):
         choices = [x for x,y in enums.tax_type_codes]
-        if field.value.upper() not in choices:
+        value = field.value
+        if value and value.upper() not in choices:
             raise ValidationError("%s not one of %s" % (field.value,choices), field=f)
 
 

From fb8091fb095db088fafce3ea091586eaf5eb17e9 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Tue, 13 May 2025 12:09:49 -0500
Subject: [PATCH 18/20] change Iowa RS record state_employer_account_num from
 TextField to IntegerField

---
 pyaccuwage/record.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyaccuwage/record.py b/pyaccuwage/record.py
index a4fe89b..6b91123 100644
--- a/pyaccuwage/record.py
+++ b/pyaccuwage/record.py
@@ -266,7 +266,7 @@ class StateWageRecord(EFW2Record):
     date_first_employed =           DateField(required=False)
     date_of_separation =            DateField(required=False)
     blank2 =                        BlankField(max_length=5)
-    state_employer_account_num =    TextField(max_length=20)
+    state_employer_account_num =    IntegerField(max_length=20, required=False)
     blank3 =                        BlankField(max_length=6)
     state_code_2 =                  StateField(use_numeric=True)
     state_taxable_wages =           MoneyField(max_length=11)

From 1302de9df73af2202c4508df1bcae5c946a5c784 Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Tue, 13 May 2025 12:14:02 -0500
Subject: [PATCH 19/20] bump version to 0.2025.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ca6d8c5..cdef46b 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def pyaccuwage_tests():
     return test_suite
 
 setup(name='pyaccuwage',
-    version='0.2024.2',
+    version='0.2025.0',
     packages=['pyaccuwage'],
     scripts=[
 		'scripts/pyaccuwage-checkseq',

From 9029659f9881fa40b0cae7aef8de53b30d6e501d Mon Sep 17 00:00:00 2001
From: Mark Riedesel <mark@klowner.com>
Date: Tue, 13 May 2025 12:45:51 -0500
Subject: [PATCH 20/20] update internal VERSION property

---
 pyaccuwage/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py
index e9c3878..810de18 100644
--- a/pyaccuwage/__init__.py
+++ b/pyaccuwage/__init__.py
@@ -3,7 +3,7 @@ try:
 except:
     from typing import Callable # Python 3.10+
 
-VERSION = (0, 2012, 0)
+VERSION = (0, 2025, 0)
 
 RECORD_TYPES = [
     'SubmitterRecord',