Merge pull request #2135 from bimbashrestha/grep
Revert to old zstdgrep because of potential license issue
This commit is contained in:
commit
0208eeccd6
@ -272,19 +272,6 @@ It's used the same way as normal `grep`, for example :
|
|||||||
`zstdgrep pattern file.zst`
|
`zstdgrep pattern file.zst`
|
||||||
|
|
||||||
`zstdgrep` is _not_ compatible with dictionary compression.
|
`zstdgrep` is _not_ compatible with dictionary compression.
|
||||||
`zstdgrep` does not support the following grep options
|
|
||||||
|
|
||||||
```
|
|
||||||
--dereference-recursive (-R)
|
|
||||||
--directories (-d)
|
|
||||||
--exclude
|
|
||||||
--exclude-from
|
|
||||||
--exclude-dir
|
|
||||||
--include
|
|
||||||
--null (-Z),
|
|
||||||
--null-data (-z)
|
|
||||||
--recursive (-r)
|
|
||||||
```
|
|
||||||
|
|
||||||
To search into a file compressed with a dictionary,
|
To search into a file compressed with a dictionary,
|
||||||
it's necessary to decompress it using `zstd` or `zstdcat`,
|
it's necessary to decompress it using `zstd` or `zstdcat`,
|
||||||
|
@ -28,6 +28,8 @@ zcat=${ZCAT:-zstdcat}
|
|||||||
endofopts=0
|
endofopts=0
|
||||||
pattern_found=0
|
pattern_found=0
|
||||||
grep_args=""
|
grep_args=""
|
||||||
|
hyphen=0
|
||||||
|
silent=0
|
||||||
|
|
||||||
prog=${0##*/}
|
prog=${0##*/}
|
||||||
|
|
||||||
@ -41,158 +43,92 @@ esac
|
|||||||
# skip all options and pass them on to grep taking care of options
|
# skip all options and pass them on to grep taking care of options
|
||||||
# with arguments, and if -e was supplied
|
# with arguments, and if -e was supplied
|
||||||
|
|
||||||
escape='
|
|
||||||
s/'\''/'\''\\'\'''\''/g
|
|
||||||
$s/$/'\''/
|
|
||||||
'
|
|
||||||
|
|
||||||
# We might want to create a c program in the future
|
|
||||||
# and replace this file with that if these
|
|
||||||
# unsupported options become necessary
|
|
||||||
usage="Usage: $0 [OPTION]... [-e] PATTERN [FILE]...
|
|
||||||
OPTIONs are the same as grep with the exception of
|
|
||||||
the following unsupported options:
|
|
||||||
--dereference-recursive (-R)
|
|
||||||
--directories (-d)
|
|
||||||
--exclude
|
|
||||||
--exclude-from
|
|
||||||
--exclude-dir
|
|
||||||
--include
|
|
||||||
--null (-Z),
|
|
||||||
--null-data (-z)
|
|
||||||
--recursive (-r)
|
|
||||||
grep --help below:
|
|
||||||
"
|
|
||||||
|
|
||||||
operands=
|
|
||||||
files_with_matches=0
|
|
||||||
files_without_matches=0
|
|
||||||
no_filename=0
|
|
||||||
with_filename=0
|
|
||||||
|
|
||||||
while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
|
while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
|
||||||
option=$1
|
case "$1" in
|
||||||
shift
|
# from GNU grep-2.5.1 -- keep in sync!
|
||||||
optarg=
|
-[ABCDXdefm])
|
||||||
|
if [ "$#" -lt 2 ]; then
|
||||||
case $option in
|
printf '%s: missing argument for %s flag\n' "${prog}" "$1" >&2
|
||||||
(-[0123456789EFGHIKLPRTUVZabchilnoqrsuvwxyz]?*)
|
exit 1
|
||||||
arg2=-\'$(expr "X$option" : 'X-.[0-9]*\(.*\)' | sed "$escape")
|
fi
|
||||||
eval "set -- $arg2 "'${1+"$@"}'
|
case "$1" in
|
||||||
option=$(expr "X$option" : 'X\(-.[0-9]*\)');;
|
-e)
|
||||||
(--binary-*=* | --[lm]a*=* | --reg*=*) ;;
|
pattern="$2"
|
||||||
(-[ABCDXdefm] | binary-* | --file | --[lm]a* | --reg*)
|
pattern_found=1
|
||||||
case ${1?"$option option requires an argument"} in
|
shift 2
|
||||||
(*\'*) optarg=" '"$(printf '%s\n' "$1" | sed "$escape");;
|
break
|
||||||
(*) optarg=" '$1'";;
|
;;
|
||||||
|
-f)
|
||||||
|
pattern_found=2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
shift;;
|
grep_args="${grep_args} $1 $2"
|
||||||
(-f?*\'*) optarg=" '"$(expr "X$option" : 'X-f\(.*\)' | sed "$escape"); option=-f;;
|
shift 2
|
||||||
(-f?*) optarg=" '"$(expr "X$option" : 'X-f\(.*\)')\'; option=-f;;
|
;;
|
||||||
(--file=*\'*) optarg=" '"$(expr "X$option" : 'X--file=\(.*\)' | sed "$escape"); option=--file;;
|
--)
|
||||||
(--file=*) optarg=" '"$(expr "X$option" : 'X--file=\(.*\)')\'; option=--file;;
|
shift
|
||||||
(--) endofopts=1; break;;
|
|
||||||
(-?*) ;;
|
|
||||||
(*)
|
|
||||||
case $option in
|
|
||||||
(*\'*) operands="$operands '"$(printf '%s\n' "$option" | sed "$escape");;
|
|
||||||
(*) operands="$operands '$option'";;
|
|
||||||
esac
|
|
||||||
${POSIXLY_CORRECT+break}
|
|
||||||
endofopts=1
|
endofopts=1
|
||||||
continue;;
|
;;
|
||||||
esac
|
-)
|
||||||
|
hyphen=1
|
||||||
case $option in
|
shift
|
||||||
(-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*)
|
;;
|
||||||
printf >&2 '%s: %s: option not supported\n' "$0" "$option"
|
-h)
|
||||||
exit 2;;
|
silent=1
|
||||||
(-e* | --reg*) pattern_found=1;;
|
shift
|
||||||
(-f | --file)
|
;;
|
||||||
case $optarg in
|
-*)
|
||||||
(" '-'" | " '/dev/stdin'" | " '/dev/fd/0'")
|
grep_args="${grep_args} $1"
|
||||||
option=-e
|
shift
|
||||||
optarg=" '"$(sed "$escape") || exit 2;;
|
;;
|
||||||
esac
|
*)
|
||||||
pattern_found=1;;
|
# pattern to grep for
|
||||||
(--h | --he | --hel | --help) echo "$usage"; eval "$grep --help" || exit 2; exit;;
|
endofopts=1
|
||||||
(-H | --wi | --wit | --with | --with- | --with-f | --with-fi \
|
;;
|
||||||
| --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \
|
|
||||||
| --with-filename)
|
|
||||||
with_filename=1
|
|
||||||
continue;;
|
|
||||||
(-l | --files-with-*) files_with_matches=1;;
|
|
||||||
(-L | --files-witho*) files_without_matches=1;;
|
|
||||||
(-h | --no-f*) no_filename=1;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
case $option in
|
|
||||||
(*\'?*) option=\'$(printf '%s\n' "$option" | sed "$escape");;
|
|
||||||
(*) option="'$option'";;
|
|
||||||
esac
|
|
||||||
|
|
||||||
grep_args="$option$optarg"
|
|
||||||
grep="$grep $grep_args"
|
|
||||||
done
|
|
||||||
|
|
||||||
eval "set -- $operands "'${1+"$@"}'
|
|
||||||
|
|
||||||
if test $pattern_found -eq 0; then
|
|
||||||
case ${1?"missing pattern; try \`$0 --help' for help"} in
|
|
||||||
(*\'*) grep="$grep -- '"$(printf '%s\n' "$1" | sed "$escape");;
|
|
||||||
(*) grep="$grep -- '$1'";;
|
|
||||||
esac
|
esac
|
||||||
shift
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test $# -eq 0; then
|
|
||||||
set -- -
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec 3>&1
|
|
||||||
res=0
|
|
||||||
|
|
||||||
for i do
|
|
||||||
zcat_status=$(
|
|
||||||
exec 5>&1
|
|
||||||
($zcat -- "$i" 5>&-; echo $? >&5) 3>&- |
|
|
||||||
if test $files_with_matches -eq 1; then
|
|
||||||
eval "$grep" >/dev/null && { printf '%s\n' "$i" || exit 2; }
|
|
||||||
elif test $files_without_matches -eq 1; then
|
|
||||||
eval "$grep" >/dev/null || {
|
|
||||||
r=$?
|
|
||||||
if test $r -eq 1; then
|
|
||||||
printf '%s\n' "$i" || r=2
|
|
||||||
fi
|
|
||||||
exit $r
|
|
||||||
}
|
|
||||||
elif test $with_filename -eq 0 && { test $# -eq 1 || test $no_filename -eq 1; }; then
|
|
||||||
eval "$grep"
|
|
||||||
else
|
|
||||||
case $i in
|
|
||||||
(*'
|
|
||||||
'* | *'&'* | *'\'* | *'|'*)
|
|
||||||
i=$(printf '%s\n' "$i" |
|
|
||||||
sed '
|
|
||||||
$!N
|
|
||||||
$s/[&\|]/\\&/g
|
|
||||||
$s/\n/\\n/g
|
|
||||||
');;
|
|
||||||
esac
|
|
||||||
sed_script="s|^|$i:|"
|
|
||||||
|
|
||||||
# Fail if grep or sed fails.
|
|
||||||
r=$(
|
|
||||||
exec 4>&1
|
|
||||||
(eval "$grep" 4>&-; echo $? >&4) 3>&- | sed "$sed_script" >&3 4>&-
|
|
||||||
) && exit $r
|
|
||||||
r=$?
|
|
||||||
test 1 -lt $r && exit $r || exit 2
|
|
||||||
fi >&3 5>&-
|
|
||||||
)
|
|
||||||
r=$?
|
|
||||||
test 128 -lt $r && exit $r
|
|
||||||
test "$zcat_status" -eq 0 || test "$zcat_status" -eq 2 || r=2
|
|
||||||
test $res -lt $r && res=$r
|
|
||||||
done
|
done
|
||||||
exit $res
|
|
||||||
|
# if no -e option was found, take next argument as grep-pattern
|
||||||
|
if [ "${pattern_found}" -lt 1 ]; then
|
||||||
|
if [ "$#" -ge 1 ]; then
|
||||||
|
pattern="$1"
|
||||||
|
shift
|
||||||
|
elif [ "${hyphen}" -gt 0 ]; then
|
||||||
|
pattern="-"
|
||||||
|
else
|
||||||
|
printf '%s: missing pattern\n' "${prog}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
EXIT_CODE=0
|
||||||
|
# call grep ...
|
||||||
|
if [ "$#" -lt 1 ]; then
|
||||||
|
# ... on stdin
|
||||||
|
set -f # Disable file name generation (globbing).
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
"${zcat}" - | "${grep}" ${grep_args} -- "${pattern}" -
|
||||||
|
EXIT_CODE=$?
|
||||||
|
set +f
|
||||||
|
else
|
||||||
|
# ... on all files given on the command line
|
||||||
|
if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then
|
||||||
|
grep_args="-H ${grep_args}"
|
||||||
|
fi
|
||||||
|
set -f
|
||||||
|
while [ "$#" -gt 0 ]; do
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
if [ $pattern_found -eq 2 ]; then
|
||||||
|
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
|
||||||
|
else
|
||||||
|
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
|
||||||
|
fi
|
||||||
|
[ "$?" -ne 0 ] && EXIT_CODE=1
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
set +f
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit "${EXIT_CODE}"
|
||||||
|
@ -254,30 +254,6 @@ ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst && die "Should have failed
|
|||||||
ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst | grep "No such file or directory" || true
|
ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst | grep "No such file or directory" || true
|
||||||
rm -f tmp_grep*
|
rm -f tmp_grep*
|
||||||
|
|
||||||
println "\n===> zstdgrep --regexp= multiple"
|
|
||||||
echo "start" > tmp_grep
|
|
||||||
echo "stop" >> tmp_grep
|
|
||||||
ZCAT=./zstdcat $ZSTDGREP --regexp=start --regexp=stop tmp_grep > tmp_grep_out1
|
|
||||||
grep -e start -e stop tmp_grep > tmp_grep_out2
|
|
||||||
$DIFF tmp_grep_out1 tmp_grep_out2
|
|
||||||
rm -f tmp_grep*
|
|
||||||
|
|
||||||
println "\n===> zstdgrep multiple -e"
|
|
||||||
echo "start" > tmp_grep
|
|
||||||
echo "stop" >> tmp_grep
|
|
||||||
ZCAT=./zstdcat $ZSTDGREP -e start -e stop tmp_grep > tmp_grep_out1
|
|
||||||
grep -e start -e stop tmp_grep > tmp_grep_out2
|
|
||||||
$DIFF tmp_grep_out1 tmp_grep_out2
|
|
||||||
rm -f tmp_grep*
|
|
||||||
|
|
||||||
println "\n===> zstdgrep multiple --regexp"
|
|
||||||
echo "start" > tmp_grep
|
|
||||||
echo "stop" >> tmp_grep
|
|
||||||
ZCAT=./zstdcat $ZSTDGREP --regexp start --regexp stop tmp_grep > tmp_grep_out1
|
|
||||||
grep -e start -e stop tmp_grep > tmp_grep_out2
|
|
||||||
$DIFF tmp_grep_out1 tmp_grep_out2
|
|
||||||
rm -f tmp_grep*
|
|
||||||
|
|
||||||
println "\n===> --exclude-compressed flag"
|
println "\n===> --exclude-compressed flag"
|
||||||
rm -rf precompressedFilterTestDir
|
rm -rf precompressedFilterTestDir
|
||||||
mkdir -p precompressedFilterTestDir
|
mkdir -p precompressedFilterTestDir
|
||||||
|
Loading…
Reference in New Issue
Block a user