Skip to content

Commit 7c416ee

Browse files
committed
[Localization] Implement .def to .strings converter
Format is as follows: - Comment: /* ... */ - Diagnostic: "<id>" = "<translation>";
1 parent dc38375 commit 7c416ee

File tree

2 files changed

+204
-4
lines changed

2 files changed

+204
-4
lines changed

include/swift/Localization/LocalizationFormat.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,20 @@ class DefToYAMLConverter {
6464
void convert(llvm::raw_ostream &out);
6565
};
6666

67+
class DefToStringsConverter {
68+
llvm::ArrayRef<const char *> IDs;
69+
llvm::ArrayRef<const char *> Messages;
70+
71+
public:
72+
DefToStringsConverter(llvm::ArrayRef<const char *> ids,
73+
llvm::ArrayRef<const char *> messages)
74+
: IDs(ids), Messages(messages) {
75+
assert(IDs.size() == Messages.size());
76+
}
77+
78+
void convert(llvm::raw_ostream &out);
79+
};
80+
6781
class LocalizationWriterInfo {
6882
public:
6983
using key_type = uint32_t;
@@ -225,6 +239,31 @@ class YAMLLocalizationProducer final : public LocalizationProducer {
225239
llvm::StringRef getMessage(swift::DiagID id) const override;
226240
};
227241

242+
class StringsLocalizationProducer final : public LocalizationProducer {
243+
std::string filePath;
244+
245+
std::vector<std::string> diagnostics;
246+
247+
public:
248+
explicit StringsLocalizationProducer(llvm::StringRef filePath,
249+
bool printDiagnosticNames = false)
250+
: LocalizationProducer(printDiagnosticNames), filePath(filePath) {}
251+
252+
/// Iterate over all of the available (non-empty) translations
253+
/// maintained by this producer, callback gets each translation
254+
/// with its unique identifier.
255+
void forEachAvailable(
256+
llvm::function_ref<void(swift::DiagID, llvm::StringRef)> callback);
257+
258+
protected:
259+
bool initializeImpl() override;
260+
llvm::StringRef getMessage(swift::DiagID id) const override;
261+
262+
private:
263+
static void readStringsFile(llvm::MemoryBuffer *in,
264+
std::vector<std::string> &diagnostics);
265+
};
266+
228267
class SerializedLocalizationProducer final : public LocalizationProducer {
229268
using SerializedLocalizationTable =
230269
llvm::OnDiskIterableChainedHashTable<LocalizationReaderInfo>;

lib/Localization/LocalizationFormat.cpp

Lines changed: 165 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ void YAMLLocalizationProducer::forEachAvailable(
184184
std::unique_ptr<LocalizationProducer>
185185
LocalizationProducer::producerFor(llvm::StringRef locale, llvm::StringRef path,
186186
bool printDiagnosticNames) {
187-
std::unique_ptr<LocalizationProducer> producer;
188187
llvm::SmallString<128> filePath(path);
189188
llvm::sys::path::append(filePath, locale);
190189
llvm::sys::path::replace_extension(filePath, ".db");
@@ -193,20 +192,26 @@ LocalizationProducer::producerFor(llvm::StringRef locale, llvm::StringRef path,
193192
// fallback to the `YAML` file.
194193
if (llvm::sys::fs::exists(filePath)) {
195194
if (auto file = llvm::MemoryBuffer::getFile(filePath)) {
196-
producer = std::make_unique<diag::SerializedLocalizationProducer>(
195+
return std::make_unique<diag::SerializedLocalizationProducer>(
197196
std::move(file.get()), printDiagnosticNames);
198197
}
199198
} else {
200199
llvm::sys::path::replace_extension(filePath, ".yaml");
201200
// In case of missing localization files, we should fallback to messages
202201
// from `.def` files.
203202
if (llvm::sys::fs::exists(filePath)) {
204-
producer = std::make_unique<diag::YAMLLocalizationProducer>(
203+
return std::make_unique<diag::YAMLLocalizationProducer>(
204+
filePath.str(), printDiagnosticNames);
205+
}
206+
207+
llvm::sys::path::replace_extension(filePath, ".strings");
208+
if (llvm::sys::fs::exists(filePath)) {
209+
return std::make_unique<diag::StringsLocalizationProducer>(
205210
filePath.str(), printDiagnosticNames);
206211
}
207212
}
208213

209-
return producer;
214+
return std::unique_ptr<LocalizationProducer>();
210215
}
211216

212217
llvm::Optional<uint32_t> LocalizationInput::readID(llvm::yaml::IO &io) {
@@ -289,5 +294,161 @@ void DefToYAMLConverter::convert(llvm::raw_ostream &out) {
289294
}
290295
}
291296

297+
void DefToStringsConverter::convert(llvm::raw_ostream &out) {
298+
// "<id>" = "<msg>";
299+
for (auto i : swift::indices(IDs)) {
300+
out << "\"" << IDs[i] << "\"";
301+
out << " = ";
302+
303+
const std::string &msg = Messages[i];
304+
305+
out << "\"";
306+
for (unsigned j = 0; j < msg.length(); ++j) {
307+
// Escape '"' found in the message.
308+
if (msg[j] == '"')
309+
out << '\\';
310+
311+
out << msg[j];
312+
}
313+
314+
out << "\";\r\n";
315+
}
316+
}
317+
318+
bool StringsLocalizationProducer::initializeImpl() {
319+
auto FileBufOrErr = llvm::MemoryBuffer::getFileOrSTDIN(filePath);
320+
llvm::MemoryBuffer *document = FileBufOrErr->get();
321+
readStringsFile(document, diagnostics);
322+
return true;
323+
}
324+
325+
llvm::StringRef
326+
StringsLocalizationProducer::getMessage(swift::DiagID id) const {
327+
return diagnostics[(unsigned)id];
328+
}
329+
330+
void StringsLocalizationProducer::forEachAvailable(
331+
llvm::function_ref<void(swift::DiagID, llvm::StringRef)> callback) {
332+
initializeIfNeeded();
333+
if (getState() == FailedInitialization) {
334+
return;
335+
}
336+
337+
for (uint32_t i = 0, n = diagnostics.size(); i != n; ++i) {
338+
auto translation = diagnostics[i];
339+
if (!translation.empty())
340+
callback(static_cast<swift::DiagID>(i), translation);
341+
}
342+
}
343+
344+
void StringsLocalizationProducer::readStringsFile(
345+
llvm::MemoryBuffer *in, std::vector<std::string> &diagnostics) {
346+
std::map<std::string, unsigned> diagLocs;
347+
#define DIAG(KIND, ID, Options, Text, Signature) \
348+
diagLocs[#ID] = static_cast<unsigned>(LocalDiagID::ID);
349+
#include "swift/AST/DiagnosticsAll.def"
350+
#undef DIAG
351+
352+
// Allocate enough slots to fit all the possible diagnostics
353+
// this helps to identify which diagnostics are missing.
354+
diagnostics.resize(LocalDiagID::NumDiags);
355+
356+
// The format is as follows:
357+
//
358+
// - comment: /* ... */
359+
// - translation: "<id>" = "<message>";
360+
auto buffer = in->getBuffer();
361+
while (!buffer.empty()) {
362+
// consume comment.
363+
if (buffer.startswith("/*")) {
364+
auto endOfComment = buffer.find("*/");
365+
assert(endOfComment != std::string::npos);
366+
// Consume the comment and trailing `*/`
367+
buffer = buffer.drop_front(endOfComment + 2).ltrim();
368+
continue;
369+
}
370+
371+
assert(buffer.startswith("\"") && "malformed diagnostics file");
372+
373+
// Consume leading `"`
374+
buffer = buffer.drop_front();
375+
376+
// Valid diagnostic id cannot have any `"` in it.
377+
auto idSize = buffer.find_first_of('\"');
378+
assert(idSize != std::string::npos);
379+
380+
std::string id(buffer.data(), idSize);
381+
382+
// consume id and `" = "`. There could be a variable number of
383+
// spaces on each side of `=`.
384+
{
385+
// Consume id, trailing `"`, and all spaces before `=`
386+
buffer = buffer.drop_front(idSize + 1).ltrim(' ');
387+
388+
// Consume `=` and all trailing spaces until `"`
389+
{
390+
assert(!buffer.empty() && buffer.front() == '=');
391+
buffer = buffer.drop_front().ltrim(' ');
392+
}
393+
394+
// Consume `"` at the beginning of the diagnostic message.
395+
{
396+
assert(!buffer.empty() && buffer.front() == '\"');
397+
buffer = buffer.drop_front();
398+
}
399+
}
400+
401+
llvm::SmallString<64> msg;
402+
{
403+
bool isValid = false;
404+
// Look for `";` which denotes the end of message
405+
for (unsigned i = 0, n = buffer.size(); i != n; ++i) {
406+
if (buffer[i] != '\"') {
407+
msg.push_back(buffer[i]);
408+
continue;
409+
}
410+
411+
// Leading `"` has been comsumed.
412+
assert(i > 0);
413+
414+
// Let's check whether this `"` is escaped, and if so - continue
415+
// because `"` is part of the message.
416+
if (buffer[i - 1] == '\\') {
417+
// Drop `\` added for escaping.
418+
msg.pop_back();
419+
msg.push_back(buffer[i]);
420+
continue;
421+
}
422+
423+
// If current `"` was not escaped and it's followed by `;` -
424+
// we have reached the end of the message, otherwise
425+
// the input is malformed.
426+
if (i + 1 < n && buffer[i + 1] == ';') {
427+
// Consume the message and its trailing info.
428+
buffer = buffer.drop_front(i + 2).ltrim();
429+
// Mark message as valid.
430+
isValid = true;
431+
break;
432+
} else {
433+
llvm_unreachable("malformed diagnostics file");
434+
}
435+
}
436+
437+
assert(isValid && "malformed diagnostic message");
438+
}
439+
440+
// Check whether extracted diagnostic still exists in the
441+
// system and if not - record as unknown.
442+
{
443+
auto existing = diagLocs.find(id);
444+
if (existing != diagLocs.end()) {
445+
diagnostics[existing->second] = std::string(msg);
446+
} else {
447+
llvm::errs() << "[!] Unknown diagnostic: " << id << '\n';
448+
}
449+
}
450+
}
451+
}
452+
292453
} // namespace diag
293454
} // namespace swift

0 commit comments

Comments
 (0)