ICU-20693 Make alt-path processing per-locale and remove source values.

remove sources
This commit is contained in:
David Beaumont 2019-11-06 15:41:22 +01:00 committed by David Beaumont
parent 16ab588517
commit 4c74b3456a
3 changed files with 93 additions and 20 deletions

View File

@ -305,8 +305,9 @@
This feature is typically used to select alternate translations (e.g. short forms)
for certain paths. -->
<!-- <altPath target="//path/to/value[@attr='foo']" source="//path/to/value[@attr='bar']"/> -->
<!-- <altPath target="//path/to/value[@attr='foo']"
source="//path/to/value[@attr='bar']"
locales="xx,yy_ZZ"/> -->
</convert>
</target>
</project>

View File

@ -4,8 +4,10 @@ package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static org.unicode.cldr.api.CldrDataType.LDML;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@ -17,6 +19,9 @@ import org.unicode.cldr.api.CldrPath;
import org.unicode.cldr.api.CldrValue;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Table;
/**
* A factory for data suppliers which can filter CLDR values by substituting values from one path
@ -46,32 +51,46 @@ public final class AlternateLocaleData {
* values are obtained. For each map entry, the target and source paths must be in the same
* namespace (i.e. have the same path element names).
*/
public static CldrDataSupplier transform(CldrDataSupplier src, Map<CldrPath, CldrPath> altPaths) {
return new CldrDataFilter(src, altPaths);
public static CldrDataSupplier transform(
CldrDataSupplier src,
Map<CldrPath, CldrPath> globalAltPaths,
Table<String, CldrPath, CldrPath> localeAltPaths) {
return new CldrDataFilter(src, globalAltPaths, localeAltPaths);
}
private static final class CldrDataFilter extends CldrDataSupplier {
private final CldrDataSupplier src;
// Mapping from target (destination) to source path. This is necessary since two targets
// could come from the same source).
private final ImmutableMap<CldrPath, CldrPath> altPaths;
private final ImmutableMap<CldrPath, CldrPath> globalAltPaths;
private final ImmutableTable<String, CldrPath, CldrPath> localeAltPaths;
CldrDataFilter(
CldrDataSupplier src, Map<CldrPath, CldrPath> altPaths) {
CldrDataSupplier src,
Map<CldrPath, CldrPath> globalAltPaths,
Table<String, CldrPath, CldrPath> localeAltPaths) {
this.src = checkNotNull(src);
this.altPaths = ImmutableMap.copyOf(altPaths);
altPaths.forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)),
this.globalAltPaths = ImmutableMap.copyOf(globalAltPaths);
this.localeAltPaths = ImmutableTable.copyOf(localeAltPaths);
this.globalAltPaths
.forEach((t, s) -> checkArgument(hasSameNamespace(checkLdml(t), checkLdml(s)),
"alternate paths must have the same namespace: target=%s, source=%s", t, s));
this.localeAltPaths.cellSet()
.forEach(c -> checkArgument(
hasSameNamespace(checkLdml(c.getColumnKey()), checkLdml(c.getValue())),
"alternate paths must have the same namespace: locale=%s, target=%s, source=%s",
c.getRowKey(), c.getColumnKey(), c.getValue()));
}
@Override
public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
return new CldrDataFilter(src.withDraftStatusAtLeast(draftStatus), altPaths);
return new CldrDataFilter(
src.withDraftStatusAtLeast(draftStatus), globalAltPaths, localeAltPaths);
}
@Override
public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
return new AltData(src.getDataForLocale(localeId, resolution));
return new AltData(src.getDataForLocale(localeId, resolution), localeId);
}
@Override
@ -85,8 +104,28 @@ public final class AlternateLocaleData {
}
private final class AltData extends FilteredData {
AltData(CldrData srcData) {
// Calculated per locale/data instance to make lokup as fast as possible.
private final ImmutableMap<CldrPath, CldrPath> altPaths;
// Any source paths which are not also target paths are removed. This is legacy
// behaviour inherited from the original build tools, the reason for which is not
// known. If it becomes desirable to retain the source values in their original
// locations, this can just be removed.
private final ImmutableSet<CldrPath> toRemove;
AltData(CldrData srcData, String localeId) {
super(srcData);
ImmutableMap<CldrPath, CldrPath> altPaths = globalAltPaths;
if (!localeAltPaths.row(localeId).isEmpty()) {
Map<CldrPath, CldrPath> combinedPaths = new HashMap<>();
// Locale specific path mappings overwrite global ones.
combinedPaths.putAll(globalAltPaths);
combinedPaths.putAll(localeAltPaths.row(localeId));
altPaths = ImmutableMap.copyOf(combinedPaths);
}
this.altPaths = altPaths;
this.toRemove = altPaths.values().stream()
.filter(p -> !this.altPaths.containsKey(p))
.collect(toImmutableSet());
}
@Override
@ -98,7 +137,7 @@ public final class AlternateLocaleData {
return altValue.replacePath(value.getPath());
}
}
return value;
return toRemove.contains(value.getPath()) ? null : value;
}
}
}

View File

@ -8,12 +8,17 @@ import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableTable.toImmutableTable;
import static com.google.common.collect.Tables.immutableCell;
import static java.util.stream.Collectors.joining;
import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
@ -38,9 +43,14 @@ import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
import com.google.common.collect.Table;
import com.google.common.collect.Table.Cell;
import com.google.common.collect.Tables;
// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed.
public final class ConvertIcuDataTask extends Task {
@ -60,7 +70,7 @@ public final class ConvertIcuDataTask extends Task {
private final SetMultimap<IcuLocaleDir, String> perDirectoryIds = HashMultimap.create();
private final IcuConverterConfig.Builder config = IcuConverterConfig.builder();
// Don't try and resolve actual paths until inside the execute method.
private final Map<String, String> altPathMap = new HashMap<>();
private final List<AltPath> altPaths = new ArrayList<>();
// TODO(CLDR-13381): Move into CLDR API; e.g. withPseudoLocales()
private boolean includePseudoLocales = false;
private Predicate<String> idFilter = id -> true;
@ -178,6 +188,7 @@ public final class ConvertIcuDataTask extends Task {
public static final class AltPath extends Task {
private String source = "";
private String target = "";
private ImmutableSet<String> localeIds = ImmutableSet.of();
@SuppressWarnings("unused")
public void setTarget(String target) {
@ -189,6 +200,11 @@ public final class ConvertIcuDataTask extends Task {
this.source = source.replace('\'', '"');
}
@SuppressWarnings("unused")
public void setLocales(String localeIds) {
this.localeIds = parseLocaleIds(localeIds);
}
@Override
public void init() throws BuildException {
checkBuild(!source.isEmpty(), "Source path not be empty");
@ -223,8 +239,7 @@ public final class ConvertIcuDataTask extends Task {
// Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs).
// Wait until the "execute()" method since in future we expect to use the configured CLDR
// directory explicitly there.
checkBuild(this.altPathMap.put(altPath.target, altPath.source) == null,
"Duplicate <altPath> elements (same target): %s", altPath.target);
altPaths.add(altPath);
}
@SuppressWarnings("unused")
@ -236,11 +251,8 @@ public final class ConvertIcuDataTask extends Task {
// We must do this wrapping of the data supplier _before_ creating the supplemental data
// instance since adding pseudo locales affects the set of available locales.
// TODO: Move some/all of this into the base converter and control it via the config.
if (!altPathMap.isEmpty()) {
Map<CldrPath, CldrPath> pathMap = new HashMap<>();
altPathMap.forEach(
(t, s) -> pathMap.put(parseDistinguishingPath(t), parseDistinguishingPath(s)));
src = AlternateLocaleData.transform(src, pathMap);
if (!altPaths.isEmpty()) {
src = AlternateLocaleData.transform(src, getGlobalAltPaths(), getLocaleAltPaths());
}
if (includePseudoLocales) {
src = PseudoLocales.addPseudoLocalesTo(src);
@ -257,6 +269,27 @@ public final class ConvertIcuDataTask extends Task {
LdmlConverter.convert(src, supplementalData, config.build());
}
private ImmutableMap<CldrPath, CldrPath> getGlobalAltPaths() {
// This fails if the same key appears more than once.
return altPaths.stream()
.filter(a -> a.localeIds.isEmpty())
.collect(toImmutableMap(
a -> parseDistinguishingPath(a.target),
a -> parseDistinguishingPath(a.source)));
}
private ImmutableTable<String, CldrPath, CldrPath> getLocaleAltPaths() {
return altPaths.stream()
.flatMap(
a -> a.localeIds.stream().map(
id -> immutableCell(
id,
parseDistinguishingPath(a.target),
parseDistinguishingPath(a.source))))
// Weirdly there's no collector method to just collect cells.
.collect(toImmutableTable(Cell::getRowKey, Cell::getColumnKey, Cell::getValue));
}
private static void checkBuild(boolean condition, String message, Object... args) {
if (!condition) {
throw new BuildException(String.format(message, args));