#!/bin/sh
#
# Copyright (C) 2015 William Ahern
#
# This work is free software distributed under the terms of the MIT license
# as published at http://opensource.org/licenses/MIT.
# ----------------------------------------------------------------------------
# DESCRIPTION
#
# The principle function in this file is the glob function defined
# immediately below, which implements a routine substantially similar to the
# POSIX C function of the same name, but with optional recursion. It is
# intended to be copy+pasted with minimal changes. It may be used in tandem
# with the functions glob_print8, glob_echo8, and glob_decode8 in order to
# communicate file names across subshells in an 8-bit clean manner. However,
# the easiest mode is to handle each result directly by specifying the name
# of an application-specific user-defined function to be executed for each
# glob result.
#
# glob_main implements a function with command-line utility semantics. The
# entire file can be source-included and glob_main used from shell code, or
# the file can be executed as a command-line utility directly.
#

# glob PATTERN [MAXDEPTH] [EXEC-COMMAND] [INTERNAL:GLOB-COUNT]
#
# Prints one octal-encoded file name per line. Use glob_decode8 to decode
# each line.
#
glob() {
	glob_N="${4:-0}"

	IFS=
	set +f
	for F in ${1}; do
		[ -e "${F}" ] || continue
		if eval "${3:-glob_echo8} \"\${F}\""; then
			glob_N=$((${glob_N} + 1))
		fi
	done
	set -f
	unset IFS

	if [ "${2-0}" -gt 0 ]; then
		glob "${1%/*}/*/${1##*/}" "$((${2} - 1))" "${3:-glob_echo8}" "${glob_N}" || :
	fi

	[ "${glob_N}" -gt 0 ]
} # glob

# glob_print8 STRING
#
# Prints octal-encoded STRING
#
glob_print8() {
	LC_ALL=C printf "%s" "${1}" | \
	LC_ALL=C od -An -to1 -v | \
	LC_ALL=C sed -e 's/\([0123456789][0123456789]*\)/\\0\1/g' | \
	LC_ALL=C tr -cd '\\0123456789'
}

# glob_echo8 STRING
#
# Prints octal-encoded STRING followed by newline
#
glob_echo8() {
	glob_print8 "${1}"
	LC_ALL=C printf "\n"
}

# glob_decode8 VARIABLE
#
# Decodes string encoded by glob_print8
#
glob_decode8() {
	IFS=
	eval "${1}=\"\$(printf \"%bx\" "\${${1}}")\""
	eval "${1}=\${${1}%x}"
	unset IFS
}

#
# glob command-line utility
#
glob_main() {
	GLOB_EVAL=no     # Use eval callback
	GLOB_FIFO=no     # Use named fifo strategy
	GLOB_QUIET=no    # Be quiet
	GLOB_RECURSE=0   # Maximum recursion depth
	GLOB_SUBSHELL=no # Run glob in subshell
	GLOB_EOL="\n"    # EOL marker
	GLOB_ENCODE=no   # Print octal-encoded file names
	GLOB_TOTAL=0     # Running count of file names printed

	GLOB_SAVEDIFS="${IFS+x}${IFS-}"
	GLOB_SAVEDSET="$(set +o)"
	GLOB_RESTORE=no

	set -e # strict error
	set -u # don't expand unbound variable
	set -f # disable pathname expansion
	set -C # noclobber
	unset IFS

	glob_restore() {
		#
		# ksh88 and ksh93 choke on their own set +o output,
		# so only do this if requested.
		#
		[ "GLOB_RESTORE" = "yes" ] || return 0

		if [ "${GLOB_SAVEDIFS+isset}" = "isset" ]; then
			if [ -n "${GLOB_SAVEDIFS}" ]; then
				IFS="${GLOB_SAVEDIFS#x}"
			else
				unset IFS || :
			fi
		fi

		if [ "${GLOB_SAVEDSET+isset}" = "isset" ]; then
			eval "${GLOB_SAVEDSET}"
		fi
	}

	glob_warn() {
		printf "%s: %.0s${1}\n" "${0##*/}" "$@" >&2
	}

	glob_info() {
		if [ "${GLOB_QUIET}" = "no" ]; then
			printf "%s: %.0s${1}\n" "${0##*/}" "$@" >&2
		fi
	}

	glob_tempnam() {
		printf "%s-%s\n" "${1}" "$(LC_ALL=C od -An -N8 -tx1 -v /dev/urandom 2>>/dev/null | LC_ALL=C tr -cd '0123456789abcdef')"
	}

	glob_isnumber() {
	        I="${1}"

	        [ "${#I}" -gt 0 ] || return 1

	        while [ "${#I}" -gt 0 ]; do
	                [ "${I##[0123456789]}" != "${I}" ] || return 1
	                I="${I##[0123456789]}"
	        done
	}

	glob_result() {
		tmp_F="${1}"

		[ ${#tmp_F} -gt 0 ] || return 1

		if [ "${GLOB_EVAL}" != "yes" ]; then
			glob_decode8 tmp_F
		fi
	
		if [ "${GLOB_ENCODE}" = "yes" ]; then
			tmp_F="$(glob_print8 "${tmp_F}")"
		fi

		printf "%s%b" "${tmp_F}" "${GLOB_EOL}"

		GLOB_TOTAL="$((${GLOB_TOTAL} + 1))"
	}

	glob_run() {
		if [ "${GLOB_EVAL}" = "yes" ]; then
			glob "${G}" "${GLOB_RECURSE}" glob_result
		elif [ "${GLOB_FIFO}" = "yes" ]; then
			tmp_FIFO="$(glob_tempnam "${TMPDIR:-/tmp}/${0##*/}")"
			mkfifo -m 0600 -- "${tmp_FIFO}"
			exec 4<>"${tmp_FIFO}" 3<"${tmp_FIFO}"
			rm -f -- "${tmp_FIFO}"
			glob "${G}" "${GLOB_RECURSE}" >&4 3<&- 4<&- &
			exec 4<&-

			while read -r F; do
				glob_result "${F}"
			done <&3 3<&-
		else
			while read -r F; do
				glob_result "${F}"
			done <<-EOF
				$(glob "${G}" "${GLOB_RECURSE}")
			EOF
		fi
	}

	glob_usage() {
		cat <<-EOF
			Usage: ${0##*/} [-efqr:s08h] PATTERN [PATTERN ...]
			  -e    Use eval callback to avoid output buffering
			  -f    Use fifo to partially parallelize search
			  -q    Do not print information about results
			  -r N  Recursively search up to N directories from dirname of PATTERN
			  -s    Run in subshell mode
			  -0    \0-terminate rather than \n-terminate file names
			  -8    octal-encode file names
			  -h    print this usage information

			Report bugs to <william@25thandClement.com>
		EOF
	}

	OPTIND=0
	while getopts "efqr:s08h" OPTC; do
		case "${OPTC}" in
		e)
			GLOB_EVAL=yes
			;;
		f)
			GLOB_FIFO=yes
			;;
		q)
			GLOB_QUIET=yes
			;;
		r)	
			if glob_isnumber "${OPTARG}"; then
				GLOB_RECURSE="${OPTARG}"
			else
				glob_warn "%s: not a number" "${OPTARG}"
				glob_usage >&2
				exit 1
			fi
			;;
		s)
			GLOB_SUBSHELL=yes
			;;
		0)
			GLOB_EOL="\0"
			;;
		8)
			GLOB_ENCODE=yes
			;;
		h)
			glob_usage
			glob_restore
			return 0
			;;
		*)
			glob_usage >&2
			glob_restore
			return 1
			;;
		esac
	done

	shift $((${OPTIND} - 1))

	for G; do
		if [ "${GLOB_SUBSHELL}" = "yes" ]; then
			if (glob_run); then
				GLOB_TOTAL="$((${GLOB_TOTAL} - 1))"
			fi
		else
			glob_run || :
		fi
	done

	if [ "${GLOB_TOTAL}" -ge 0 ]; then
		glob_info "found %d files" "${GLOB_TOTAL}"
	fi

	glob_restore

	[ "${GLOB_TOTAL}" -ne 0 ]
}

if [ "${0##*/}" = "glob.sh" ]; then
	glob_main "$@"
	exit $?
fi
