Ranges in descriptions are ignored, except in cases where the
range matches the next expected range. The only way to get around this seems to be to manually remove the range value from the input. One idea is to iterate through the entire token set and look for range tokens. When a range token correctly continues the sequence, then it is assumed to be a new record. Instead, if we scan the whole list of tokens and look for out of order ranges and exclude them as possible field identifiers. 1 10* 10 20 30 90* 40 10* 50
This commit is contained in:
parent
04b3c3f273
commit
770aeb0d2b
1 changed files with 31 additions and 248 deletions
|
@ -102,6 +102,11 @@ class RangeToken(BaseToken):
|
||||||
def end_position(self):
|
def end_position(self):
|
||||||
return int(self._value.split('-')[1])
|
return int(self._value.split('-')[1])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start_position(self):
|
||||||
|
return int(self._value.split('-')[0])
|
||||||
|
|
||||||
|
|
||||||
class NumericToken(BaseToken):
|
class NumericToken(BaseToken):
|
||||||
regexp = re.compile('^(\d+)$')
|
regexp = re.compile('^(\d+)$')
|
||||||
|
|
||||||
|
@ -207,24 +212,34 @@ class PastedDefParser(object):
|
||||||
|
|
||||||
|
|
||||||
if isinstance(token, RangeToken) or token == None:
|
if isinstance(token, RangeToken) or token == None:
|
||||||
if current_range:
|
# IF THIS RANGETOKEN IS NOT THE BEGINNING OF A NEW
|
||||||
groups.append({
|
# FIELD, THEN ITS RANGES WILL NOT MATCH THE PREVIOUS
|
||||||
'byterange': current_range,
|
# byte_pos AND IS ASSUMED TO BE PART OF THE DESCRIPTION.
|
||||||
'name': current_name,
|
#if byte_pos and token and state == 'desc' and token.start_position != byte_pos:
|
||||||
'length': current_length,
|
# print token.start_position, byte_pos
|
||||||
'desc': current_desc,
|
# current_desc.append(token)
|
||||||
})
|
|
||||||
|
if token and byte_pos and token.start_position != byte_pos:
|
||||||
|
state = 'desc'
|
||||||
|
|
||||||
# UPDATE RANGE POSITION
|
else:
|
||||||
if token:
|
if current_range:
|
||||||
byte_pos = token.end_position + 1
|
groups.append({
|
||||||
|
'byterange': current_range,
|
||||||
|
'name': current_name,
|
||||||
|
'length': current_length,
|
||||||
|
'desc': current_desc,
|
||||||
|
})
|
||||||
|
|
||||||
current_range = token
|
# UPDATE RANGE POSITION
|
||||||
current_name = []
|
if token:
|
||||||
current_length = None
|
byte_pos = token.end_position + 1
|
||||||
current_desc = []
|
|
||||||
state = 'name'
|
current_range = token
|
||||||
|
current_name = []
|
||||||
|
current_length = None
|
||||||
|
current_desc = []
|
||||||
|
state = 'name'
|
||||||
|
|
||||||
elif state == 'name':
|
elif state == 'name':
|
||||||
if isinstance(token, StringToken) and current_name and isinstance(current_name[-1], NumericToken):
|
if isinstance(token, StringToken) and current_name and isinstance(current_name[-1], NumericToken):
|
||||||
|
@ -317,236 +332,4 @@ class PastedDefParser(object):
|
||||||
|
|
||||||
yield "".join(result)
|
yield "".join(result)
|
||||||
|
|
||||||
sdp = SimpleDefParser()
|
|
||||||
tokens = sdp.load([
|
|
||||||
"record type,text,1",
|
|
||||||
"payment year, year,2-5",
|
|
||||||
"corrected return indicator, 6",
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
pdp = PastedDefParser()
|
|
||||||
tokens2 = pdp.load("""
|
|
||||||
544 Second TIN
|
|
||||||
|
|
||||||
Notice
|
|
||||||
|
|
||||||
(Optional)
|
|
||||||
|
|
||||||
1 Enter “2” (two) to indicate notification by IRS twice within
|
|
||||||
|
|
||||||
three calendar years that the payee provided an incorrect name
|
|
||||||
|
|
||||||
and/or TIN combination; otherwise, enter a blank.
|
|
||||||
|
|
||||||
545-546 Blank 2 Enter blanks.
|
|
||||||
|
|
||||||
547-586 Foreign Country
|
|
||||||
|
|
||||||
or U.S.
|
|
||||||
|
|
||||||
Possession
|
|
||||||
|
|
||||||
40 Enter the name of the foreign country or U.S. possession to
|
|
||||||
|
|
||||||
which the withheld foreign tax (Amount Code 6) applies.
|
|
||||||
|
|
||||||
Otherwise, enter blanks.
|
|
||||||
|
|
||||||
587-599 CUSIP Number 13 Enter CUSIP Number. If the tax-exempt interest is reported in
|
|
||||||
|
|
||||||
the aggregate for multiple bonds or accounts, enter: VARIOUS.
|
|
||||||
|
|
||||||
Right-justify information and fill unused positions with blanks l.
|
|
||||||
|
|
||||||
600-662 Blank 63 Enter blanks.
|
|
||||||
|
|
||||||
663-722 Special Data
|
|
||||||
|
|
||||||
Entries
|
|
||||||
|
|
||||||
60 This portion of the “B” Record may be used to record
|
|
||||||
|
|
||||||
information for state or local government reporting or for the
|
|
||||||
|
|
||||||
filer's own purposes. Payers should contact the state or local
|
|
||||||
|
|
||||||
revenue departments for filing requirements. You may enter
|
|
||||||
|
|
||||||
your routing and transit number (RTN) here. If this field is not
|
|
||||||
|
|
||||||
utilized, enter blanks.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
103-114 Payment
|
|
||||||
|
|
||||||
Amount 5*
|
|
||||||
|
|
||||||
12 The amount reported in this field represents payments for
|
|
||||||
|
|
||||||
Amount Code 5 in the “A” Record.
|
|
||||||
|
|
||||||
115-126 Payment
|
|
||||||
|
|
||||||
Amount 6*
|
|
||||||
|
|
||||||
12 The amount reported in this field represents payments for
|
|
||||||
|
|
||||||
Amount Code 6 in the “A” Record.
|
|
||||||
|
|
||||||
127-138 Payment
|
|
||||||
|
|
||||||
Amount 7*
|
|
||||||
|
|
||||||
12 The amount reported in this field represents payments for
|
|
||||||
|
|
||||||
Amount Code 7 in the “A” Record.
|
|
||||||
|
|
||||||
139-150 Payment
|
|
||||||
|
|
||||||
Amount 8*
|
|
||||||
|
|
||||||
12 The amount reported in this field represents payments f
|
|
||||||
""")
|
|
||||||
|
|
||||||
tokens3 = pdp.load("""
|
|
||||||
|
|
||||||
544-546 Blank 3 Enter blanks.
|
|
||||||
|
|
||||||
547 Type of
|
|
||||||
|
|
||||||
Payment
|
|
||||||
|
|
||||||
Indicator
|
|
||||||
|
|
||||||
1 Enter the appropriate indicator from the following table;
|
|
||||||
|
|
||||||
otherwise, enter blanks.
|
|
||||||
|
|
||||||
Indicator Usage
|
|
||||||
|
|
||||||
1 Per diem
|
|
||||||
|
|
||||||
2 Reimbursed amount
|
|
||||||
|
|
||||||
548-556 Social Security
|
|
||||||
|
|
||||||
Number of
|
|
||||||
|
|
||||||
Insured
|
|
||||||
|
|
||||||
9 Required. Enter the Social Security Number of the insured.
|
|
||||||
|
|
||||||
557-596 Name of Insured 40 Required. Enter the name of the insured.
|
|
||||||
|
|
||||||
597-636 Address of
|
|
||||||
|
|
||||||
Insured
|
|
||||||
|
|
||||||
40 Required. Enter the address of the insured. The street address
|
|
||||||
|
|
||||||
should include number, street, apartment or suite number (or PO
|
|
||||||
|
|
||||||
Box if mail is not delivered to street address). Left-justify
|
|
||||||
|
|
||||||
information and fill unused positions with blanks. This field
|
|
||||||
|
|
||||||
must not contain any data other than the payee’s address.
|
|
||||||
|
|
||||||
637-676 City of Insured 40 Required. Enter the city, town, or post office. Left-justify and
|
|
||||||
|
|
||||||
fill unused positions with blanks. Enter APO or FPO, if
|
|
||||||
|
|
||||||
applicable. Do not enter state and ZIP Code information in this
|
|
||||||
|
|
||||||
field.
|
|
||||||
|
|
||||||
677-678 State of Insured 2 Required. Enter the valid U.S. Postal Service state
|
|
||||||
|
|
||||||
abbreviations for states or the appropriate postal identifier (AA,
|
|
||||||
|
|
||||||
AE, or AP) described in Part A, Sec. 12.
|
|
||||||
|
|
||||||
679-687 ZIP Code of
|
|
||||||
|
|
||||||
Insured
|
|
||||||
|
|
||||||
9 Required. Enter the valid nine-digit ZIP Code assigned by the
|
|
||||||
|
|
||||||
U.S. Postal Service. If only the first five-digits are known, leftjustify information and fill the unused positions with blanks.
|
|
||||||
|
|
||||||
For foreign countries, alpha characters are acceptable as long as
|
|
||||||
|
|
||||||
the filer has entered a “1” (one) in the Foreign Country
|
|
||||||
|
|
||||||
Indicator, located in position 247 of the “B” Record.
|
|
||||||
|
|
||||||
688 Status of Illness
|
|
||||||
|
|
||||||
Indicator
|
|
||||||
|
|
||||||
(Optional)
|
|
||||||
|
|
||||||
1 Enter the appropriate code from the table below to indicate the
|
|
||||||
|
|
||||||
status of the illness of the insured; otherwise, enter blank.
|
|
||||||
|
|
||||||
Indicator Usage
|
|
||||||
|
|
||||||
1 Chronically ill
|
|
||||||
|
|
||||||
2 Terminally ill
|
|
||||||
|
|
||||||
689-696 Date Certified
|
|
||||||
|
|
||||||
(Optional)
|
|
||||||
|
|
||||||
8 Enter the latest date of a doctor's certification of the status of the
|
|
||||||
|
|
||||||
insured's illness. The format of the date is YYYYMMDD (e.g.,
|
|
||||||
|
|
||||||
January 5, 2011, would be 20110105). Do not enter hyphens
|
|
||||||
|
|
||||||
or slashes.
|
|
||||||
|
|
||||||
697 Qualified
|
|
||||||
|
|
||||||
Contract
|
|
||||||
|
|
||||||
Indicator
|
|
||||||
|
|
||||||
(Optional)
|
|
||||||
|
|
||||||
1 Enter a “1” (one) if benefits were from a qualified long-term
|
|
||||||
|
|
||||||
care insurance contract; otherwise, enter a blank.
|
|
||||||
|
|
||||||
698-722 Blank 25 Enter blanks.
|
|
||||||
|
|
||||||
723-734 State Income
|
|
||||||
|
|
||||||
Tax Withheld
|
|
||||||
|
|
||||||
12 State income tax withheld is for the convenience of the filers.
|
|
||||||
|
|
||||||
This information does not need to be reported to IRS. Rightjustify information and fill unused positions with zeros.
|
|
||||||
|
|
||||||
735-746 Local Income
|
|
||||||
|
|
||||||
Tax Withheld
|
|
||||||
|
|
||||||
12 Local income tax withheld is for the convenience of the filers.
|
|
||||||
|
|
||||||
This information does not need to be reported to IRS. The
|
|
||||||
|
|
||||||
payment amount must be right-justify information and fill
|
|
||||||
|
|
||||||
unused positions with zeros.
|
|
||||||
|
|
||||||
747-748 Blank 2 Enter blanks.
|
|
||||||
|
|
||||||
749-750 Blank 2 Enter blanks or carriage return/line feed (CR/LF) characters.
|
|
||||||
|
|
||||||
|
|
||||||
""")
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue