# This is a dummy implementation of the proposed field splitting
# algorithm (witten in sh, so hopefully sh people can follow it)
# to demonstrate that the algorithm as presented generates the
# expected output (that generated by almost every shell).

# This code knows that in the tests IFS=' ,' (space and comma)
# and rather than handling that generically, which would be possible,
# but messy, simply builds those two characters (literally) into the
# implementation (space, as a IFS white space char, and comma as an
# IFS char that is not white space).

# Similarly the code "knows" that if there is a prefix in the field
# (chars not to be treated as generated by an expansion, and hence
# exepmt fmom splitting) that will be simply a single 'p' always, and
# siumilarly a suffix will be 'q' - because of that we do not need to
# have any method to indicate what part of the field is to be subject
# to field splitting

# In the following comments that start '##' are text lifted directly
# from my proposed section 2.6.5 ("Field Splitting") text, which might
# allow readers to match this algorithm with what is described there.

# The results from this test match exactly the results from all shells
# considered to operate correctly (the same output routine is used, and
# the results compared with diff - with zero differences).

S=' '
C=','

field_split() {
	ARG=$1		# the field that needs to be split
	set --		# the set of output fields, initially empty

	# IFS is defined (IFS=' ,') and not empty, IFS white space is ' '
	# We simply know that!

	# C is our candidate field,
	# CD indicates the delimiter that terminated the candidate field
	#	' ' indicates the delimiter was IFS white space alone
	#	',' indicates the delimuter was a ',' (perhaps with white space)
	#	'' indicates there has been no delimiter
	C= CD=

	## Each expansion, or substitution shall be processed in order
	## as follows [...]

	## While the input is not empty...
	while test -n "${ARG}"
	do
		## Consider the first remaining character of the input.
		## If it is:

		## a.  A character that did not result from an unquoted
		##     expansion or substitution:
		## b.  A character in the input that is not a character in IFS:

		# since we know exactly what the IFS chars are, and that
		# chars that did not result from an expandion (etc) are not
		# IFS chars (our test cases ensure that) we don't need to
		# treat those two differently, just skip forward until we
		# get to an IFS char, or we run out, appending the non-IFS
		# chars to the candidate and removing them from the input.

		# here we only care about the current first char in ${ARG}
		while case "${ARG}" in
			'')	break 2		# the end of the input, done
				;;
			[\ ,]*)	false		# delimiter located, exit loop
				;;
			*)	TAIL=${ARG#?}	# something else
				C=${C}${ARG%"${TAIL}"}	# appended to candidate
				ARG=${TAIL}		# removed from input
				;;
		      esac
		do
		      :
		done

		# Now we are at the start of a delimiter in ARG, and the
		# candidate field is C

		# which kind of delimiter do we have?

		## c.  An IFS white space character:

		# assume the delim will be just IFS white space (case 'c')
		CD=' '
		# and then skip any of that we find (repeating 'c' over & over)
		while case "${ARG}" in
			' '*)	ARG=${ARG#* };;
			*)	false;;
			esac
		do :; done

		## d.  Another IFS character, not IFS white space:

		# Next if we have a non white space IFS char,
		# then it is the other kind of delimiter (case 'd' in the algo)

		case "${ARG}" in
		,*)	CD=, ; ARG=${ARG#,}   # Remember we saw it, then remove
			# and skip any following IFS white space
			while case "${ARG}" in
				' '*)	ARG=${ARG#* };;
				*)	false;;
				esac
			do :; done
			;;
		esac

		# now a field has been delimited so we are subject to:

		## At this point, if the candidate is not empty, or if a
		## non IFS white space character was seen at step d, then
		## the candidate becomes an output field.  
		## In either case, empty the candidate, and perform the
		## next iteration.

		if test -n "${C}" # candicate is not empty (or...) => output
		then
			## if the candidate is not empty
			## then the candidate becomes an output field.
			set -- "$@" "'${C}'"

		# otherwise The candidate is empty, if it was delimited
		# by only IFS white space, then candidate is dropped

		elif test "${CD}" != ' '
		then
			## or if a non IFS white space character was seen
			## then the candidate becomes an output field.
			set -- "$@" "''"	# no need for $C, it is ""
		fi

		## In either case, empty the candidate, and perform
		## the next iteration.

		CD=
		C=
	done

	## When the input is empty, if the candidate is not empty, it
	## becomes an output field.

	if test -n "${C}"
	then
		# not an empty field after last delim, so it is included
		set -- "$@" "'${C}'"
	fi

	# return the split field, as a list of quoted words (to become fields)
	printf %s "$*"
}

args()
{
	name=$1; shift

	printf '%s:\t%d:\t' "$name" "$#"
	printf '<%s>' "$@"
	printf '\n'
}

tst()
{
	N=$1

	eval set -- $(field_split "$2")

	args "$N" "$@"
}

W='abc'
SW=' abc'
WS='abc '
SWS=' abc '
CW=',abc'
WC='abc,'
CWC=',abc,'
WSW='abc def'
WSSW='abd  def'
WCW='abc,def'
WCCW='abc,,def'
WSCW='abc ,def'
WCSW='abc, def'
WSCSW='abc , def'
WSCSCSW='abc , , def'
WSCSCSWS='abc , , def '
WSCSCSWC='abc , , def,'
SS='  '
SSS='   '
CC=',,'
CCC=',,,'
SC=' ,'
CS=', '
SCCS=' ,, '
CSSC=',  ,'
SCWSCWCS=' ,abc ,def, '
SSCSSCSSCSS='  ,  ,  ,  '

tst W "$W"
tst SW "$SW"
tst WS "$WS"
tst SWS "$SWS"
tst CW "$CW"
tst WC "$WC"
tst CWC "$CWC"
tst WSW "$WSW"
tst WSSW "$WSSW"
tst WCW "$WCW"
tst WCCW "$WCCW"
tst WSCW "$WSCW"
tst WCSW "$WCSW"
tst WSCSW "$WSCW"
tst WSCSCSW "$WSCSCSW"
tst WSCSCSWS "$WSCSCSWS"
tst WSCSCSWC "$WSCSCSWC"
tst S "$S"
tst C "$C"
tst SS "$SS"
tst SSS "$SSS"
tst CC "$CC"
tst CCC "$CCC"
tst SC "$SC"
tst CS "$CS"
tst SCCS "$SCCS"
tst CSSC "$CSSC"
tst SCWSCWCS "$SCWSCWCS"
tst SSCSSCSSCSS "$SSCSSCSSCSS"

tst pW "p${W}"
tst pSW "p${SW}"
tst pWS "p${WS}"
tst pSWS "p${SWS}"
tst pCW "p${CW}"
tst pWC "p${WC}"
tst pCWC "p${CWC}"
tst pWSW "p${WSW}"
tst pWSSW "p${WSSW}"
tst pWCW "p${WCW}"
tst pWCCW "p${WCCW}"
tst pWSCW "p${WSCW}"
tst pWCSW "p${WCSW}"
tst pWSCSW "p${WSCW}"
tst pWSCSCSW "p${WSCSCSW}"
tst pWSCSCSWS "p${WSCSCSWS}"
tst pWSCSCSWC "p${WSCSCSWC}"
tst pS "p${S}"
tst pC "p${C}"
tst pSS "p${SS}"
tst pSSS "p${SSS}"
tst pSC "p${SC}"
tst pCS "p${CS}"
tst pCSSC "p${CSSC}"
tst pSSS "p${SSS}"
tst pCCC "p${CCC}"
tst pSCCS "p${SCCS}"
tst pSCWSCWCS "p${SCWSCWCS}"
tst pSSCSSCSSCSS "p${SSCSSCSSCSS}"

tst Wq "${W}q"
tst SWq "${SW}q"
tst WSq "${WS}q"
tst SWSq "${SWS}q"
tst CWq "${CW}q"
tst WCq "${WC}q"
tst CWCq "${CWC}q"
tst WSWq "${WSW}q"
tst WSSWq "${WSSW}q"
tst WCWq "${WCW}q"
tst WCCWq "${WCCW}q"
tst WSCWq "${WSCW}q"
tst WCSWq "${WCSW}q"
tst WSCSWq "${WSCW}q"
tst WSCSCSWq "${WSCSCSW}q"
tst WSCSCSWSq "${WSCSCSWS}q"
tst WSCSCSWCq "${WSCSCSWC}q"
tst Sq "${S}q"
tst Cq "${C}q"
tst SSq "${SS}q"
tst SSSq "${SSS}q"
tst SCq "${SC}q"
tst CSq "${CS}q"
tst CSSCq "${CSSC}q"
tst SSSq "${SSS}q"
tst CCCq "${CCC}q"
tst SCCSq "${SCCS}q"
tst SCWSCWCSq "${SCWSCWCS}q"
tst SSCSSCSSCSSq "${SSCSSCSSCSS}q"

tst pWq "p${W}q"
tst pSWq "p${SW}q"
tst pWSq "p${WS}q"
tst pSWSq "p${SWS}q"
tst pCWq "p${CW}q"
tst pWCq "p${WC}q"
tst pCWCq "p${CWC}q"
tst pWSWq "p${WSW}q"
tst pWSSWq "p${WSSW}q"
tst pWCWq "p${WCW}q"
tst pWCCWq "p${WCCW}q"
tst pWSCWq "p${WSCW}q"
tst pWCSWq "p${WCSW}q"
tst pWSCSWq "p${WSCW}q"
tst pWSCSCSWq "p${WSCSCSW}q"
tst pWSCSCSWSq "p${WSCSCSWS}q"
tst pWSCSCSWCq "p${WSCSCSWC}q"
tst pSq "p${S}q"
tst pCq "p${C}q"
tst pSSq "p${SS}q"
tst pSSSq "p${SSS}q"
tst pSCq "p${SC}q"
tst pCSq "p${CS}q"
tst pCSSCq "p${CSSC}q"
tst pSSSq "p${SSS}q"
tst pCCCq "p${CCC}q"
tst pSCCSq "p${SCCS}q"
tst pSCWSCWCSq "p${SCWSCWCS}q"
tst pSSCSSCSSCSSq "p${SSCSSCSSCSS}q"