diff --git a/icu4c/source/configure.in b/icu4c/source/configure.in index 4d2a7d3bed..d5a3a04995 100644 --- a/icu4c/source/configure.in +++ b/icu4c/source/configure.in @@ -1035,7 +1035,7 @@ AC_OUTPUT([icudefs.mk \ test/letest/Makefile \ test/threadtest/Makefile \ samples/Makefile samples/date/Makefile \ - samples/cal/Makefile samples/layout/Makefile \ + samples/cal/Makefile samples/csdet/Makefile samples/layout/Makefile \ common/unicode/platform.h]) if test $ICU_USE_THREADS = 0; then diff --git a/icu4c/source/samples/Makefile.in b/icu4c/source/samples/Makefile.in index 83d3340a1b..03923fec8b 100644 --- a/icu4c/source/samples/Makefile.in +++ b/icu4c/source/samples/Makefile.in @@ -1,5 +1,5 @@ ## Makefile.in for ICU samples -## Copyright (c) 1999-2005, International Business Machines Corporation and +## Copyright (c) 1999-2006, International Business Machines Corporation and ## others. All Rights Reserved. ## Install directory information @@ -20,7 +20,7 @@ include @platform_make_fragment@ CLEANFILES = *~ SUBDIRS = date cal -ALLSUBDIRS = break case datefmt msgfmt numfmt props translit ucnv udata ufortune uresb ustring citer uciter8 +ALLSUBDIRS = break case csdet datefmt msgfmt numfmt props translit ucnv udata ufortune uresb ustring citer uciter8 ## List of phony targets .PHONY : all all-local all-recursive install install-local \ diff --git a/icu4c/source/samples/csdet/Makefile.in b/icu4c/source/samples/csdet/Makefile.in new file mode 100644 index 0000000000..43f1ed4c1a --- /dev/null +++ b/icu4c/source/samples/csdet/Makefile.in @@ -0,0 +1,93 @@ +## Makefile.in for ICU - samples/csdet +## Copyright (c) 2006, International Business Machines Corporation and +## others. All Rights Reserved. + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = ../.. + +include $(top_builddir)/icudefs.mk + +## Platform-specific setup +include @platform_make_fragment@ + +## Build directory information +subdir = samples/csdet + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) + +## Target information +TARGET = csdet + +CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir) + +LIBS = $(LIBICUUC) $(LIBICUI18N) @LIBS@ @LIB_M@ + + +OBJECTS=csdet.o + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local dist dist-local check check-local + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +all-local: $(TARGET) + +install-local: + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(OBJECTS) $(TARGET) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: all-local + $(INVOKE) ./$(TARGET) + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +# The following two rules make it possible to +# compile scrptrun.cpp from the extra/scrptrun directory. +# they were copied from the default rules in mh-linux which +# is probably OK because this sample will only run on Linux... +scrptrun.d: $(top_srcdir)/extra/scrptrun/scrptrun.cpp + $(SHELL) -ec '$(GEN_DEPS.cc) $< \ + | sed '\''s%\($*\)\.o[ :]*%\1.o $@ : %g'\'' > $@; \ + [ -s $@ ] || rm -f $@' + +scrptrun.o: $(top_srcdir)/extra/scrptrun/scrptrun.cpp + $(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $< + +$(TARGET) : $(OBJECTS) + $(LINK.cc) -o $@ $^ $(LIBS) + +invoke: + ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION) + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif diff --git a/icu4c/source/samples/csdet/csdet.c b/icu4c/source/samples/csdet/csdet.c new file mode 100644 index 0000000000..c16a74c8cf --- /dev/null +++ b/icu4c/source/samples/csdet/csdet.c @@ -0,0 +1,71 @@ +/* + ******************************************************************************** + * Copyright (C) 2005-2006, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************** + */ + +#include "unicode/utypes.h" +#include "unicode/ucsdet.h" + +#include +#include + +#define BUFFER_SIZE 8192 + +int main(int argc, char *argv[]) +{ + static char buffer[BUFFER_SIZE]; + int32_t arg; + + if( argc <= 1 ) { + printf("Usage: %s [filename]...\n", argv[0]); + return -1; + } + + for(arg = 1; arg < argc; arg += 1) { + FILE *file; + char *filename = argv[arg]; + int32_t inputLength, match, matchCount = 0; + UCharsetDetector* csd; + const UCharsetMatch **csm; + UErrorCode status = U_ZERO_ERROR; + + if (arg > 1) { + printf("\n"); + } + + file = fopen(filename, "rb"); + + if (file == NULL) { + printf("Cannot open file \"%s\"\n\n", filename); + continue; + } + + printf("%s:\n", filename); + + inputLength = (int32_t) fread(buffer, 1, BUFFER_SIZE, file); + + fclose(file); + + csd = ucsdet_open(&status); + ucsdet_setText(csd, buffer, inputLength, &status); + + csm = ucsdet_detectAll(csd, &matchCount, &status); + + for(match = 0; match < matchCount; match += 1) { + const char *name = ucsdet_getName(csm[match], &status); + const char *lang = ucsdet_getLanguage(csm[match], &status); + int32_t confidence = ucsdet_getConfidence(csm[match], &status); + + if (lang == NULL || strlen(lang) == 0) { + lang = "**"; + } + + printf("%s (%s) %d\n", name, lang, confidence); + } + + ucsdet_close(csd); + } +} + diff --git a/icu4c/source/samples/csdet/csdet.sln b/icu4c/source/samples/csdet/csdet.sln new file mode 100644 index 0000000000..981a69fde8 --- /dev/null +++ b/icu4c/source/samples/csdet/csdet.sln @@ -0,0 +1,21 @@ +Microsoft Visual Studio Solution File, Format Version 8.00 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csdet", "csdet.vcproj", "{683745AD-3BC2-4B89-898B-93490D7F2757}" + ProjectSection(ProjectDependencies) = postProject + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfiguration) = preSolution + Debug = Debug + Release = Release + EndGlobalSection + GlobalSection(ProjectConfiguration) = postSolution + {683745AD-3BC2-4B89-898B-93490D7F2757}.Debug.ActiveCfg = Debug|Win32 + {683745AD-3BC2-4B89-898B-93490D7F2757}.Debug.Build.0 = Debug|Win32 + {683745AD-3BC2-4B89-898B-93490D7F2757}.Release.ActiveCfg = Release|Win32 + {683745AD-3BC2-4B89-898B-93490D7F2757}.Release.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + EndGlobalSection + GlobalSection(ExtensibilityAddIns) = postSolution + EndGlobalSection +EndGlobal diff --git a/icu4c/source/samples/csdet/csdet.vcproj b/icu4c/source/samples/csdet/csdet.vcproj new file mode 100644 index 0000000000..86f40c2f6c --- /dev/null +++ b/icu4c/source/samples/csdet/csdet.vcproj @@ -0,0 +1,143 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +