######################################################################
# Copyright
# John Holland <john@zoner.org>
# All rights reserved.
#
# This software is licensed as described in the file LICENSE.txt, which
# you should have received as part of this distribution.
#
######################################################################
"""
X12 data element validation
"""
from __future__ import annotations
import re
# Intrapackage imports
from .errors import EngineError, IsValidError
[docs]
REGEX_MODE = re.S | re.ASCII
[docs]
def IsValidDataType(
str_val: str,
data_type: str,
charset: str = "B",
icvn: str = "00401",
) -> bool:
"""
Is str_val a valid X12 data value
:param str_val: data value to validate
:type str_val: string
:param data_type: X12 data element identifier
:type data_type: string
:param charset: [optional] - 'B' for Basic X12 character set, 'E' for extended
:type charset: string
:rtype: boolean
TODO: need to generalize control character validation
"""
if not data_type:
return True
if not isinstance(str_val, str):
return False
try:
if data_type[0] == "N":
if not match_re("N", str_val):
raise IsValidError # not a number
elif data_type == "R":
if not match_re("R", str_val):
raise IsValidError # not a number
elif data_type in ("ID", "AN"):
if not_match_re("ID", str_val, charset, icvn):
raise IsValidError
elif data_type == "RD8":
if "-" in str_val:
start, end = str_val.split("-")
return IsValidDataType(start, "D8", charset) and IsValidDataType(end, "D8", charset)
else:
return False
elif data_type in ("DT", "D8", "D6"):
if not is_valid_date(data_type, str_val):
raise IsValidError
elif data_type == "TM":
if not is_valid_time(str_val):
raise IsValidError
elif data_type == "B":
pass
else:
raise IsValidError("Unknown data type %s" % data_type)
except IsValidError:
return False
return True
[docs]
rec_N: re.Pattern[str] = re.compile(r"^-?[0-9]+", REGEX_MODE)
[docs]
rec_R: re.Pattern[str] = re.compile(r"^-?[0-9]*(\.[0-9]+)?", REGEX_MODE)
[docs]
rec_ID_E: re.Pattern[str] = re.compile(
r"""[^A-Z0-9!"&'()*+,\-\./:;?= a-z%~@\[\]_{}\\|<>#$]""", REGEX_MODE
)
[docs]
rec_ID_E5: re.Pattern[str] = re.compile(
r"""[^A-Z0-9!"&'()*+,\-\./:;?= a-z%~@\[\]_{}\\|<>^`#$]""", REGEX_MODE
)
[docs]
rec_ID_B: re.Pattern[str] = re.compile(r"""[^A-Z0-9!"&'()*+,\-\./:;?= ]""", REGEX_MODE)
[docs]
rec_DT: re.Pattern[str] = re.compile(r"[^0-9]+", REGEX_MODE)
[docs]
rec_TM: re.Pattern[str] = re.compile(r"[^0-9]+", REGEX_MODE)
[docs]
def match_re(short_data_type: str, val: str) -> bool:
"""
:param short_data_type: simplified data type
:type short_data_type: string
:param val: data value to be verified
:type val: string
:return: True if matched, False if not
:rtype: boolean
:raises EngineError: If short_data_type is not 'N' or 'R'
"""
if short_data_type == "N":
rec = rec_N
elif short_data_type == "R":
rec = rec_R
else:
raise EngineError("Unknown data type %s" % (short_data_type))
m = rec.search(val)
if not m:
return False
if m.group(0) != val: # matched substring != original, bad
return False # nothing matched
return True
[docs]
def not_match_re(
short_data_type: str,
val: str,
charset: str = "B",
icvn: str = "00401",
) -> bool:
"""
:param short_data_type: simplified data type
:type short_data_type: string
:param val: data value to be verified
:type val: string
:param charset: [optional] - 'B' for Basic X12 character set, 'E' for extended, E5 for 5010 Extended
:type charset: string
:return: True if found invalid characters, False if none
:rtype: boolean
:raises EngineError: If short_data_type or charset is unrecognized
"""
if short_data_type in ("ID", "AN"):
if charset == "E": # extended charset
if icvn == "00501":
rec = rec_ID_E5
else:
rec = rec_ID_E
elif charset == "B": # basic charset:
rec = rec_ID_B
else:
raise EngineError("Unknown charset %r for data type %s" % (charset, short_data_type))
elif short_data_type == "DT":
rec = rec_DT
elif short_data_type == "TM":
rec = rec_TM
else:
raise EngineError("Unknown data type %s" % (short_data_type))
m = rec.search(val)
if m and m.group(0):
return True # Invalid char matched
return False
[docs]
def is_valid_date(data_type: str, val: str) -> bool:
"""
:param data_type: Date type
:type data_type: string
:param val: data value to be verified
:type val: string
:return: True if valid, False if not
:rtype: boolean
"""
try:
if data_type == "D8" and len(val) != 8:
raise IsValidError
if data_type == "D6" and len(val) != 6:
raise IsValidError
if not_match_re("DT", val):
raise IsValidError
if len(val) in (6, 8, 12): # valid lengths for date
try:
if 6 == len(val): # if 2 digit year, add CC
val = "20" + val if int(val[0:2]) < 50 else "19" + val
# print("IXVALID:", data_type, val, int(val[0:4]), int(val[4:6]))
year = int(val[0:4]) # get year
month = int(val[4:6])
day = int(val[6:8])
# Should not have dates before 1/1/1800
if year < 1800:
raise IsValidError
# check month
if month < 1 or month > 12:
raise IsValidError
if month in (1, 3, 5, 7, 8, 10, 12): # 31 day month
if day < 1 or day > 31:
raise IsValidError
elif month in (4, 6, 9, 11): # 30 day month
if day < 1 or day > 30:
raise IsValidError
else: # else 28 day
if not year % 4 and not (not year % 100 and year % 400):
# if not (year % 4) and ((year % 100) or (not (year % 400)) ): # leap year
if day < 1 or day > 29:
raise IsValidError
elif day < 1 or day > 28:
raise IsValidError
if len(val) == 12:
if not is_valid_time(val[8:12]):
raise IsValidError
except TypeError:
raise IsValidError from None
else:
raise IsValidError
except IsValidError:
return False
return True
[docs]
def is_valid_time(val: str) -> bool:
"""
:param val: time value to be verified
:type val: string
:rtype: boolean
"""
try:
if not_match_re("TM", val):
raise IsValidError
if val[0:2] > "23" or val[2:4] > "59": # check hour, minute segment
raise IsValidError
elif len(val) > 4: # time contains seconds
if len(val) < 6: # length is munted
raise IsValidError
elif val[4:6] > "59": # check seconds
raise IsValidError
# check decimal seconds here in the future
elif len(val) > 8:
# print('unhandled decimal seconds encountered')
raise IsValidError
except (IsValidError, ValueError):
return False
return True
[docs]
def contains_control_character(
str_val: str,
charset: str = "B",
icvn: str = "00401",
) -> tuple[bool, str | None]:
control_base = {
chr(0x07): "BEL",
chr(0x09): "HT",
chr(0x0A): "LF",
chr(0x0B): "VT",
chr(0x0C): "FF",
chr(0x0D): "CR",
chr(0x1C): "FS",
chr(0x1D): "GS",
chr(0x1E): "RS",
chr(0x1F): "US",
}
extended_base = {
chr(0x01): "SOH",
chr(0x02): "STX",
chr(0x03): "ETX",
chr(0x04): "EOT",
chr(0x05): "ENQ",
chr(0x06): "ACK",
chr(0x11): "DC1",
chr(0x12): "DC2",
chr(0x13): "DC3",
chr(0x14): "DC4",
chr(0x15): "NAK",
chr(0x16): "SYN",
chr(0x17): "ETB",
}
for k, v in control_base.items():
if k in str_val:
return (True, f"<{v}>")
for k, v in extended_base.items():
if k in str_val:
return (True, f"<{v}>")
return (False, None)