Merge pull request #2135 from bimbashrestha/grep

Revert to old zstdgrep because of potential license issue
This commit is contained in:
Bimba Shrestha 2020-05-13 16:08:59 -05:00 committed by GitHub
commit 0208eeccd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 187 deletions

View File

@ -272,19 +272,6 @@ It's used the same way as normal `grep`, for example :
`zstdgrep pattern file.zst` `zstdgrep pattern file.zst`
`zstdgrep` is _not_ compatible with dictionary compression. `zstdgrep` is _not_ compatible with dictionary compression.
`zstdgrep` does not support the following grep options
```
--dereference-recursive (-R)
--directories (-d)
--exclude
--exclude-from
--exclude-dir
--include
--null (-Z),
--null-data (-z)
--recursive (-r)
```
To search into a file compressed with a dictionary, To search into a file compressed with a dictionary,
it's necessary to decompress it using `zstd` or `zstdcat`, it's necessary to decompress it using `zstd` or `zstdcat`,

View File

@ -28,6 +28,8 @@ zcat=${ZCAT:-zstdcat}
endofopts=0 endofopts=0
pattern_found=0 pattern_found=0
grep_args="" grep_args=""
hyphen=0
silent=0
prog=${0##*/} prog=${0##*/}
@ -41,158 +43,92 @@ esac
# skip all options and pass them on to grep taking care of options # skip all options and pass them on to grep taking care of options
# with arguments, and if -e was supplied # with arguments, and if -e was supplied
escape='
s/'\''/'\''\\'\'''\''/g
$s/$/'\''/
'
# We might want to create a c program in the future
# and replace this file with that if these
# unsupported options become necessary
usage="Usage: $0 [OPTION]... [-e] PATTERN [FILE]...
OPTIONs are the same as grep with the exception of
the following unsupported options:
--dereference-recursive (-R)
--directories (-d)
--exclude
--exclude-from
--exclude-dir
--include
--null (-Z),
--null-data (-z)
--recursive (-r)
grep --help below:
"
operands=
files_with_matches=0
files_without_matches=0
no_filename=0
with_filename=0
while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
option=$1 case "$1" in
shift # from GNU grep-2.5.1 -- keep in sync!
optarg= -[ABCDXdefm])
if [ "$#" -lt 2 ]; then
case $option in printf '%s: missing argument for %s flag\n' "${prog}" "$1" >&2
(-[0123456789EFGHIKLPRTUVZabchilnoqrsuvwxyz]?*) exit 1
arg2=-\'$(expr "X$option" : 'X-.[0-9]*\(.*\)' | sed "$escape") fi
eval "set -- $arg2 "'${1+"$@"}' case "$1" in
option=$(expr "X$option" : 'X\(-.[0-9]*\)');; -e)
(--binary-*=* | --[lm]a*=* | --reg*=*) ;; pattern="$2"
(-[ABCDXdefm] | binary-* | --file | --[lm]a* | --reg*) pattern_found=1
case ${1?"$option option requires an argument"} in shift 2
(*\'*) optarg=" '"$(printf '%s\n' "$1" | sed "$escape");; break
(*) optarg=" '$1'";; ;;
-f)
pattern_found=2
;;
*)
;;
esac esac
shift;; grep_args="${grep_args} $1 $2"
(-f?*\'*) optarg=" '"$(expr "X$option" : 'X-f\(.*\)' | sed "$escape"); option=-f;; shift 2
(-f?*) optarg=" '"$(expr "X$option" : 'X-f\(.*\)')\'; option=-f;; ;;
(--file=*\'*) optarg=" '"$(expr "X$option" : 'X--file=\(.*\)' | sed "$escape"); option=--file;; --)
(--file=*) optarg=" '"$(expr "X$option" : 'X--file=\(.*\)')\'; option=--file;; shift
(--) endofopts=1; break;;
(-?*) ;;
(*)
case $option in
(*\'*) operands="$operands '"$(printf '%s\n' "$option" | sed "$escape");;
(*) operands="$operands '$option'";;
esac
${POSIXLY_CORRECT+break}
endofopts=1 endofopts=1
continue;; ;;
esac -)
hyphen=1
case $option in shift
(-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*) ;;
printf >&2 '%s: %s: option not supported\n' "$0" "$option" -h)
exit 2;; silent=1
(-e* | --reg*) pattern_found=1;; shift
(-f | --file) ;;
case $optarg in -*)
(" '-'" | " '/dev/stdin'" | " '/dev/fd/0'") grep_args="${grep_args} $1"
option=-e shift
optarg=" '"$(sed "$escape") || exit 2;; ;;
esac *)
pattern_found=1;; # pattern to grep for
(--h | --he | --hel | --help) echo "$usage"; eval "$grep --help" || exit 2; exit;; endofopts=1
(-H | --wi | --wit | --with | --with- | --with-f | --with-fi \ ;;
| --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \
| --with-filename)
with_filename=1
continue;;
(-l | --files-with-*) files_with_matches=1;;
(-L | --files-witho*) files_without_matches=1;;
(-h | --no-f*) no_filename=1;;
esac
case $option in
(*\'?*) option=\'$(printf '%s\n' "$option" | sed "$escape");;
(*) option="'$option'";;
esac
grep_args="$option$optarg"
grep="$grep $grep_args"
done
eval "set -- $operands "'${1+"$@"}'
if test $pattern_found -eq 0; then
case ${1?"missing pattern; try \`$0 --help' for help"} in
(*\'*) grep="$grep -- '"$(printf '%s\n' "$1" | sed "$escape");;
(*) grep="$grep -- '$1'";;
esac esac
shift
fi
if test $# -eq 0; then
set -- -
fi
exec 3>&1
res=0
for i do
zcat_status=$(
exec 5>&1
($zcat -- "$i" 5>&-; echo $? >&5) 3>&- |
if test $files_with_matches -eq 1; then
eval "$grep" >/dev/null && { printf '%s\n' "$i" || exit 2; }
elif test $files_without_matches -eq 1; then
eval "$grep" >/dev/null || {
r=$?
if test $r -eq 1; then
printf '%s\n' "$i" || r=2
fi
exit $r
}
elif test $with_filename -eq 0 && { test $# -eq 1 || test $no_filename -eq 1; }; then
eval "$grep"
else
case $i in
(*'
'* | *'&'* | *'\'* | *'|'*)
i=$(printf '%s\n' "$i" |
sed '
$!N
$s/[&\|]/\\&/g
$s/\n/\\n/g
');;
esac
sed_script="s|^|$i:|"
# Fail if grep or sed fails.
r=$(
exec 4>&1
(eval "$grep" 4>&-; echo $? >&4) 3>&- | sed "$sed_script" >&3 4>&-
) && exit $r
r=$?
test 1 -lt $r && exit $r || exit 2
fi >&3 5>&-
)
r=$?
test 128 -lt $r && exit $r
test "$zcat_status" -eq 0 || test "$zcat_status" -eq 2 || r=2
test $res -lt $r && res=$r
done done
exit $res
# if no -e option was found, take next argument as grep-pattern
if [ "${pattern_found}" -lt 1 ]; then
if [ "$#" -ge 1 ]; then
pattern="$1"
shift
elif [ "${hyphen}" -gt 0 ]; then
pattern="-"
else
printf '%s: missing pattern\n' "${prog}" >&2
exit 1
fi
fi
EXIT_CODE=0
# call grep ...
if [ "$#" -lt 1 ]; then
# ... on stdin
set -f # Disable file name generation (globbing).
# shellcheck disable=SC2086
"${zcat}" - | "${grep}" ${grep_args} -- "${pattern}" -
EXIT_CODE=$?
set +f
else
# ... on all files given on the command line
if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then
grep_args="-H ${grep_args}"
fi
set -f
while [ "$#" -gt 0 ]; do
# shellcheck disable=SC2086
if [ $pattern_found -eq 2 ]; then
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
else
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
fi
[ "$?" -ne 0 ] && EXIT_CODE=1
shift
done
set +f
fi
exit "${EXIT_CODE}"

View File

@ -254,30 +254,6 @@ ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst && die "Should have failed
ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst | grep "No such file or directory" || true ZCAT=./zstdcat $ZSTDGREP 2>&1 "1234" tmp_grep_bad.zst | grep "No such file or directory" || true
rm -f tmp_grep* rm -f tmp_grep*
println "\n===> zstdgrep --regexp= multiple"
echo "start" > tmp_grep
echo "stop" >> tmp_grep
ZCAT=./zstdcat $ZSTDGREP --regexp=start --regexp=stop tmp_grep > tmp_grep_out1
grep -e start -e stop tmp_grep > tmp_grep_out2
$DIFF tmp_grep_out1 tmp_grep_out2
rm -f tmp_grep*
println "\n===> zstdgrep multiple -e"
echo "start" > tmp_grep
echo "stop" >> tmp_grep
ZCAT=./zstdcat $ZSTDGREP -e start -e stop tmp_grep > tmp_grep_out1
grep -e start -e stop tmp_grep > tmp_grep_out2
$DIFF tmp_grep_out1 tmp_grep_out2
rm -f tmp_grep*
println "\n===> zstdgrep multiple --regexp"
echo "start" > tmp_grep
echo "stop" >> tmp_grep
ZCAT=./zstdcat $ZSTDGREP --regexp start --regexp stop tmp_grep > tmp_grep_out1
grep -e start -e stop tmp_grep > tmp_grep_out2
$DIFF tmp_grep_out1 tmp_grep_out2
rm -f tmp_grep*
println "\n===> --exclude-compressed flag" println "\n===> --exclude-compressed flag"
rm -rf precompressedFilterTestDir rm -rf precompressedFilterTestDir
mkdir -p precompressedFilterTestDir mkdir -p precompressedFilterTestDir