29d7ab227d
X-SVN-Rev: 39385
130 lines
4.0 KiB
Bash
Executable File
130 lines
4.0 KiB
Bash
Executable File
#!/bin/sh
|
|
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
# Copyright (C) 2001-2010, International Business Machines
|
|
# Corporation and others. All Rights Reserved.
|
|
#
|
|
# Authors:
|
|
# Ami Fixler
|
|
# Steven R. Loomis
|
|
# George Rhoten
|
|
#
|
|
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
|
|
# After extracting to EBCDIC, binary files are re-extracted without the
|
|
# EBCDIC conversion, thus restoring them to original codepage.
|
|
#
|
|
# Set the following variable to the list of binary file suffixes (extensions)
|
|
|
|
#ICU specific binary files
|
|
#****************************************************************************
|
|
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE'
|
|
data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
|
|
|
|
#****************************************************************************
|
|
# Function: usage
|
|
# Description: Prints out text that describes how to call this script
|
|
# Input: None
|
|
# Output: None
|
|
#****************************************************************************
|
|
usage()
|
|
{
|
|
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
|
|
}
|
|
|
|
#****************************************************************************
|
|
# first make sure we at least one arg and it's a file we can read
|
|
#****************************************************************************
|
|
|
|
# check for no arguments
|
|
if [ $# -eq 0 ]; then
|
|
usage
|
|
exit
|
|
fi
|
|
tar_file=$1
|
|
if [ ! -r $tar_file ]; then
|
|
echo "$tar_file does not exist or cannot be read."
|
|
usage
|
|
exit
|
|
fi
|
|
|
|
echo ""
|
|
echo "Extracting from $tar_file ..."
|
|
echo ""
|
|
# extract files while converting them to EBCDIC
|
|
pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
|
|
|
|
#****************************************************************************
|
|
# For files we have restored as CCSID 37, check the BOM to see if they
|
|
# should be processed as 819. Also handle files with special paths. Files
|
|
# that match will be added to binary files lists. The lists will in turn
|
|
# be processed to restore files as 819.
|
|
#****************************************************************************
|
|
echo ""
|
|
echo "Determining binary files by BOM ..."
|
|
echo ""
|
|
|
|
# When building in ASCII mode, text files are converted as ASCII
|
|
if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
|
|
binary_suffixes="$binary_suffixes txt TXT ucm UCM"
|
|
elif [ -f icu/as_is/bomlist.txt ];
|
|
then
|
|
echo 'Using icu/as_is/bomlist.txt'
|
|
binary_files=$(cat icu/as_is/bomlist.txt)
|
|
else
|
|
echo "Analyzing files .."
|
|
for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
|
|
bom8=`head -c 3 $file|\
|
|
od -t x1|\
|
|
head -n 1|\
|
|
sed 's/ */ /g'|\
|
|
cut -f2-4 -d ' '|\
|
|
tr 'A-Z' 'a-z'`;
|
|
#Find a converted UTF-8 BOM
|
|
if [ "$bom8" = "57 8b ab" ]
|
|
then
|
|
binary_files="$binary_files $file";
|
|
fi
|
|
done
|
|
fi
|
|
|
|
echo "Looking for binary suffixes.."
|
|
|
|
for i in $(pax -f $tar_file 2>/dev/null)
|
|
do
|
|
case $i in
|
|
*/) ;; # then this entry is a directory
|
|
*.*) # then this entry has a dot in the filename
|
|
for j in $binary_suffixes
|
|
do
|
|
# We substitute the suffix more than once
|
|
# to handle files like NormalizationTest-3.2.0.txt
|
|
suf=${i#*.*}
|
|
suf=${suf#*.*}
|
|
suf=${suf#*.*}
|
|
if [ "$suf" = "$j" ]
|
|
then
|
|
binary_files="$binary_files $i"
|
|
break
|
|
fi
|
|
done
|
|
;;
|
|
*) ;; # then this entry does not have a dot in it
|
|
esac
|
|
done
|
|
|
|
# now see if a re-extract of binary files is necessary
|
|
if [ ${#binary_files} -eq 0 ]; then
|
|
echo ""
|
|
echo "There are no binary files to restore."
|
|
else
|
|
echo "Restoring binary files ..."
|
|
echo ""
|
|
rm $binary_files
|
|
pax -rvf $tar_file $binary_files
|
|
# Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
|
|
# environment setting
|
|
chtag -b $binary_files
|
|
fi
|
|
echo ""
|
|
echo "$0 has completed extracting ICU from $tar_file."
|