2001-06-12 22:05:57 +00:00
|
|
|
#!/bin/sh
|
2003-12-04 23:51:16 +00:00
|
|
|
# Copyright (C) 2001 - 2003, International Business Machines Corporation.
|
2002-12-06 01:40:42 +00:00
|
|
|
# All Rights Reserved.
|
2001-06-12 22:05:57 +00:00
|
|
|
#
|
|
|
|
# Authors:
|
|
|
|
# Ami Fixler
|
|
|
|
# Steven R. Loomis <srl@jtcsv.com>
|
|
|
|
# George Rhoten
|
|
|
|
#
|
|
|
|
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
|
|
|
|
# After extracting to EBCDIC, binary files are re-extracted without the
|
|
|
|
# EBCDIC conversion, thus restoring them to original codepage.
|
|
|
|
#
|
|
|
|
# Set the following variable to the list of binary file suffixes (extensions)
|
|
|
|
|
2001-08-10 16:36:50 +00:00
|
|
|
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
|
|
|
|
#ICU specific binary files
|
2003-12-05 00:18:26 +00:00
|
|
|
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP'
|
2001-06-12 22:05:57 +00:00
|
|
|
|
|
|
|
usage()
|
|
|
|
{
|
|
|
|
echo "Enter archive filename as a parameter: $0 icu-archive.tar [strip]"
|
|
|
|
echo "(strip is an option to remove hex '0D' carraige returns)"
|
|
|
|
}
|
|
|
|
# first make sure we at least one arg and it's a file we can read
|
|
|
|
if [ $# -eq 0 ]; then
|
|
|
|
usage
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
if [ ! -r $1 ]; then
|
|
|
|
echo "$1 does not exist or cannot be read."
|
|
|
|
usage
|
|
|
|
exit
|
|
|
|
fi
|
|
|
|
# set up a few variables
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
echo "Extracting from $1 ..."
|
|
|
|
echo ""
|
2001-08-10 16:36:50 +00:00
|
|
|
# extract files while converting them to EBCDIC
|
2001-06-12 22:05:57 +00:00
|
|
|
pax -rvf $1 -o to=IBM-1047,from=ISO8859-1
|
|
|
|
|
|
|
|
if [ $# -gt 1 ]; then
|
|
|
|
if [ $2 -eq strip ]; then
|
|
|
|
echo ""
|
|
|
|
echo "Stripping hex 0d characters ..."
|
|
|
|
for i in $(pax -f $1 2>/dev/null)
|
|
|
|
do
|
|
|
|
case $i in
|
|
|
|
*/)
|
|
|
|
# then this entry is a directory
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
# then this entry is NOT a directory
|
|
|
|
tr -d
|
|
|
|
<$i >@@@icu@tmp
|
|
|
|
chmod +w $i
|
|
|
|
rm $i
|
|
|
|
mv @@@icu@tmp $i
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
echo "Determining binary files ..."
|
|
|
|
echo ""
|
|
|
|
|
2001-08-10 16:36:50 +00:00
|
|
|
#for dir in `find ./icu -type d \( -name CVS -o -print \)`; do
|
|
|
|
# if [ -f $dir/CVS/Entries ]; then
|
|
|
|
# binary_files="$binary_files`cat $dir/CVS/Entries | fgrep -- -kb \
|
|
|
|
# | cut -d / -f2 | sed -e "s%^%$dir/%" \
|
|
|
|
# | sed -e "s%^\./%%" | tr '\n' ' '`"
|
|
|
|
# fi
|
|
|
|
#done
|
|
|
|
#echo "Detecting Unicode files"
|
|
|
|
for file in `find icu \( -name \*.txt -print \)`; do
|
|
|
|
bom8=`head -c 3 $file|\
|
|
|
|
od -t x1|\
|
|
|
|
head -n 1|\
|
|
|
|
sed 's/ */ /g'|\
|
|
|
|
cut -f2-4 -d ' '|\
|
|
|
|
tr 'A-Z' 'a-z'`;
|
|
|
|
#Find a converted UTF-8 BOM
|
|
|
|
if test "$bom8" = "57 8b ab"
|
|
|
|
then
|
|
|
|
binary_files="$binary_files $file";
|
2001-06-12 22:05:57 +00:00
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
for i in $(pax -f $1 2>/dev/null)
|
|
|
|
do
|
|
|
|
case $i in
|
|
|
|
*/)
|
|
|
|
# then this entry is a directory
|
|
|
|
;;
|
|
|
|
*.*)
|
|
|
|
# then this entry has a dot in the filename
|
|
|
|
for j in $binary_suffixes
|
|
|
|
do
|
|
|
|
suf=${i#*.*}
|
|
|
|
if [ "$suf" = "$j" ]
|
|
|
|
then
|
|
|
|
binary_files="$binary_files $i"
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
# then this entry does not have a dot in it
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|
|
|
|
|
|
|
|
# now see if a re-extract of binary files is necessary
|
|
|
|
if [ ${#binary_files} -eq 0 ]; then
|
|
|
|
echo ""
|
|
|
|
echo "There are no binary files to restore."
|
|
|
|
else
|
|
|
|
echo "Restoring binary files ..."
|
|
|
|
echo ""
|
|
|
|
rm $binary_files
|
|
|
|
pax -rvf $1 $binary_files
|
|
|
|
fi
|
|
|
|
echo ""
|
|
|
|
echo "$0 has completed extracting ICU from $1."
|