2022-05-10 10:06:48 +00:00
|
|
|
# Copyright (C) 2021 The Qt Company Ltd.
|
|
|
|
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
2021-11-22 14:56:53 +00:00
|
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from typing import Dict, Optional
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class LanguageCodeEntry:
|
|
|
|
part3Code: str
|
|
|
|
part2BCode: Optional[str]
|
|
|
|
part2TCode: Optional[str]
|
|
|
|
part1Code: Optional[str]
|
|
|
|
|
|
|
|
def id(self) -> str:
|
|
|
|
if self.part1Code:
|
|
|
|
return self.part1Code
|
|
|
|
if self.part2BCode:
|
|
|
|
return self.part2BCode
|
|
|
|
return self.part3Code
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
|
|
|
|
if self.part2BCode is not None and self.part2BCode != self.part3Code:
|
|
|
|
parts.append(f', part2BCode={self.part2BCode!r}')
|
|
|
|
if self.part2TCode != self.part2BCode:
|
|
|
|
parts.append(f', part2TCode={self.part2TCode!r}')
|
|
|
|
if self.part1Code is not None:
|
|
|
|
parts.append(f', part1Code={self.part1Code!r}')
|
|
|
|
parts.append(')')
|
|
|
|
return ''.join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
class LanguageCodeData:
|
|
|
|
"""
|
|
|
|
Representation of ISO639-2 language code data.
|
|
|
|
"""
|
|
|
|
def __init__(self, fileName: str):
|
|
|
|
"""
|
|
|
|
Construct the object populating the data from the given file.
|
|
|
|
"""
|
|
|
|
self.__codeMap: Dict[str, LanguageCodeEntry] = {}
|
|
|
|
|
|
|
|
with open(fileName, 'r', encoding='utf-8') as stream:
|
|
|
|
stream.readline() # skip the header
|
|
|
|
for line in stream.readlines():
|
|
|
|
part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
|
|
|
|
|
|
|
|
# sanity checks
|
|
|
|
assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
|
|
|
|
f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
|
|
|
|
f'{part2TCode!r} {part1Code!r}'
|
|
|
|
|
|
|
|
assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
|
|
|
|
assert not part1Code or len(part1Code) == 2, \
|
|
|
|
f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
|
|
|
|
assert not part2BCode or len(part2BCode) == 3, \
|
|
|
|
f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
|
|
|
|
assert not part2TCode or len(part2TCode) == 3, \
|
|
|
|
f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
|
|
|
|
|
|
|
|
assert (part2BCode == '') == (part2TCode == ''), \
|
|
|
|
f'Only one Part 2 code is specified for {part3Code!r}: ' \
|
|
|
|
f'{part2BCode!r} vs {part2TCode!r}'
|
|
|
|
assert not part2TCode or part2TCode == part3Code, \
|
|
|
|
f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
|
|
|
|
|
|
|
|
entry = LanguageCodeEntry(part3Code, part2BCode or None,
|
|
|
|
part2TCode or None, part1Code or None)
|
|
|
|
|
|
|
|
self.__codeMap[entry.id()] = entry
|
|
|
|
|
|
|
|
def query(self, code: str) -> Optional[LanguageCodeEntry]:
|
|
|
|
"""
|
|
|
|
Lookup the entry with the given code and return it.
|
|
|
|
|
|
|
|
The entries can be looked up by using either the Alpha2 code or the bibliographical
|
|
|
|
Alpha3 code.
|
|
|
|
"""
|
|
|
|
return self.__codeMap.get(code)
|