12 #include "../stdafx.h"
13 #include "../core/endian_func.hpp"
14 #include "../string_func.h"
15 #include "../table/control_codes.h"
20 #include "../table/strgen_tables.h"
22 #include "../safeguards.h"
28 const char *
_file =
"(unknown file)";
30 int _errors, _warnings, _show_todo;
34 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei);
43 caseidx(caseidx), string(
stredup(string)), next(next)
63 hash_next(0), index(index), line(line), translated_case(NULL)
122 for (; *s !=
'\0'; s++) hash =
ROL(hash, 3) ^ *s;
152 if (strcmp(ls->
name, s) == 0)
return ls;
166 for (; *s !=
'\0'; s++) {
167 hash =
ROL(hash, 3) ^ *s;
168 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
192 hash ^= i * 0x717239;
193 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
197 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
200 hash ^= (cs - _cmd_structs) * 0x1234567;
201 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
220 static const char *_cur_ident;
235 static int _cur_argidx;
256 }
else if (value < 0x800) {
257 *this->
Append() = 0xC0 +
GB(value, 6, 5);
258 *this->
Append() = 0x80 +
GB(value, 0, 6);
259 }
else if (value < 0x10000) {
260 *this->
Append() = 0xE0 +
GB(value, 12, 4);
261 *this->
Append() = 0x80 +
GB(value, 6, 6);
262 *this->
Append() = 0x80 +
GB(value, 0, 6);
263 }
else if (value < 0x110000) {
264 *this->
Append() = 0xF0 +
GB(value, 18, 3);
265 *this->
Append() = 0x80 +
GB(value, 12, 6);
266 *this->
Append() = 0x80 +
GB(value, 6, 6);
267 *this->
Append() = 0x80 +
GB(value, 0, 6);
269 strgen_warning(
"Invalid unicode value U+0x%X", value);
274 size_t Utf8Validate(
const char *s)
281 }
else if (
GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
283 c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
284 if (c >= 0x80)
return 2;
285 }
else if (
GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
287 c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
288 if (c >= 0x800)
return 3;
289 }
else if (
GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
291 c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
292 if (c >= 0x10000 && c <= 0x10FFFF)
return 4;
299 void EmitSingleChar(
Buffer *buffer,
char *buf,
int value)
301 if (*buf !=
'\0') strgen_warning(
"Ignoring trailing letters in command");
312 bool ParseRelNum(
char **buf,
int *value,
int *offset)
314 const char *s = *buf;
318 while (*s ==
' ' || *s ==
'\t') s++;
323 int v = strtol(s, &end, 0);
324 if (end == s)
return false;
330 if (offset != NULL && *end ==
':') {
333 *offset = strtol(s, &end, 0);
334 if (end == s)
return false;
341 char *ParseWord(
char **buf)
345 while (*s ==
' ' || *s ==
'\t') s++;
346 if (*s ==
'\0')
return NULL;
352 if (*s ==
'\0')
break;
363 if (*s ==
'\0')
break;
364 if (*s ==
' ' || *s ==
'\t') {
376 static int TranslateArgumentIdx(
int arg,
int offset = 0);
378 static void EmitWordList(
Buffer *buffer,
const char *
const *words, uint nw)
381 for (uint i = 0; i < nw; i++) buffer->
AppendByte((byte)strlen(words[i]) + 1);
382 for (uint i = 0; i < nw; i++) {
383 for (uint j = 0; words[i][j] !=
'\0'; j++) buffer->
AppendByte(words[i][j]);
388 void EmitPlural(
Buffer *buffer,
char *buf,
int value)
390 int argidx = _cur_argidx;
397 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
399 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
402 if (cmd == NULL || cmd->default_plural_offset < 0) {
403 strgen_fatal(
"Command '%s' has no (default) plural position", cmd == NULL ?
"<empty>" : cmd->cmd);
405 offset = cmd->default_plural_offset;
410 words[nw] = ParseWord(&buf);
411 if (words[nw] == NULL)
break;
415 strgen_fatal(
"%s: No plural words", _cur_ident);
418 if (expected != nw) {
420 strgen_fatal(
"%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
423 if ((_show_todo & 2) != 0) strgen_warning(
"'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
427 for (; nw < expected; nw++) {
428 words[nw] = words[nw - 1];
436 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
437 EmitWordList(buffer, words, nw);
441 void EmitGender(
Buffer *buffer,
char *buf,
int value)
443 int argidx = _cur_argidx;
452 if (nw >=
MAX_NUM_GENDERS) strgen_fatal(
"G argument '%s' invalid", buf);
462 if (!ParseRelNum(&buf, &argidx, &offset)) {}
464 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
465 if (cmd == NULL || (cmd->flags &
C_GENDER) == 0) {
466 strgen_fatal(
"Command '%s' can't have a gender", cmd == NULL ?
"<empty>" : cmd->cmd);
470 words[nw] = ParseWord(&buf);
471 if (words[nw] == NULL)
break;
473 if (nw != _lang.
num_genders) strgen_fatal(
"Bad # of arguments for gender command");
475 assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
477 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
478 EmitWordList(buffer, words, nw);
482 static const CmdStruct *FindCmd(
const char *s,
int len)
484 for (
const CmdStruct *cs = _cmd_structs; cs !=
endof(_cmd_structs); cs++) {
485 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] ==
'\0')
return cs;
490 static uint ResolveCaseName(
const char *str,
size_t len)
495 memcpy(case_str, str, len);
496 case_str[len] =
'\0';
499 if (case_idx >=
MAX_NUM_CASES) strgen_fatal(
"Invalid case-name '%s'", case_str);
506 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei)
508 const char *s = *str, *start;
515 for (; *s !=
'{'; s++) {
516 if (*s ==
'\0')
return NULL;
520 if (*s >=
'0' && *s <=
'9') {
523 *argno = strtoul(s, &end, 0);
524 if (*end !=
':') strgen_fatal(
"missing arg #");
532 }
while (c !=
'}' && c !=
' ' && c !=
'=' && c !=
'.' && c != 0);
534 const CmdStruct *cmd = FindCmd(start, s - start - 1);
536 strgen_error(
"Undefined command '%.*s'", (
int)(s - start - 1), start);
541 const char *casep = s;
543 if (!(cmd->flags &
C_CASE)) {
544 strgen_fatal(
"Command '%s' can't have a case", cmd->cmd);
549 }
while (c !=
'}' && c !=
' ' && c !=
'\0');
550 *casei = ResolveCaseName(casep, s - casep - 1);
554 strgen_error(
"Missing } from command '%s'", start);
567 strgen_error(
"Missing } from command '%s'", start);
589 data(data), file(
stredup(file)), master(master), translation(translation)
606 memset(p, 0,
sizeof(*p));
610 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
612 if (ar == NULL)
break;
615 if (argno != -1 && ar->consumes == 0) strgen_fatal(
"Non consumer param can't have a paramindex");
618 if (argno != -1) argidx = argno;
619 if (argidx < 0 || (uint)argidx >=
lengthof(p->cmd)) strgen_fatal(
"invalid param idx %d", argidx);
620 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal(
"duplicate param idx %d", argidx);
622 p->cmd[argidx++] = ar;
624 if (p->np >=
lengthof(p->pairs)) strgen_fatal(
"too many commands in string, max " PRINTF_SIZE,
lengthof(p->pairs));
625 p->pairs[p->np].a = ar;
626 p->pairs[p->np].v = param[0] !=
'\0' ?
stredup(param) :
"";
635 if (a == NULL)
return NULL;
637 if (strcmp(a->cmd,
"STRING1") == 0 ||
638 strcmp(a->cmd,
"STRING2") == 0 ||
639 strcmp(a->cmd,
"STRING3") == 0 ||
640 strcmp(a->cmd,
"STRING4") == 0 ||
641 strcmp(a->cmd,
"STRING5") == 0 ||
642 strcmp(a->cmd,
"STRING6") == 0 ||
643 strcmp(a->cmd,
"STRING7") == 0 ||
644 strcmp(a->cmd,
"RAW_STRING") == 0) {
645 return FindCmd(
"STRING", 6);
652 static bool CheckCommandsMatch(
char *a,
char *b,
const char *name)
664 ExtractCommandString(&templ, b,
true);
665 ExtractCommandString(&lang, a,
true);
668 if (templ.np != lang.np) {
669 strgen_warning(
"%s: template string and language string have a different # of commands", name);
673 for (uint i = 0; i < templ.np; i++) {
676 for (uint j = 0; j < lang.np; j++) {
677 if (templ.pairs[i].a == lang.pairs[j].a &&
678 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
680 lang.pairs[j].a = NULL;
687 strgen_warning(
"%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
694 for (uint i = 0; i <
lengthof(templ.cmd); i++) {
695 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
696 strgen_warning(
"%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
697 lang.cmd[i] == NULL ?
"<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
698 templ.cmd[i] == NULL ?
"<empty>" : templ.cmd[i]->cmd);
706 void StringReader::HandleString(
char *str)
709 if (str[1] ==
'#' && str[2] !=
'#') this->
HandlePragma(str + 2);
714 if (*str ==
';' || *str ==
' ' || *str ==
'\0')
return;
716 char *s = strchr(str,
':');
718 strgen_error(
"Line has no ':' delimiter");
725 for (t = s; t > str && (t[-1] ==
' ' || t[-1] ==
'\t'); t--) {}
731 for (tmp = s; *tmp !=
'\0';) {
732 size_t len = Utf8Validate(tmp);
733 if (len == 0) strgen_fatal(
"Invalid UTF-8 sequence in '%s'", s);
739 (c >= 0xE000 && c <= 0xF8FF) ||
740 (c >= 0xFFF0 && c <= 0xFFFF)) {
741 strgen_fatal(
"Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
749 char *casep = strchr(str,
'.');
750 if (casep != NULL) *casep++ =
'\0';
757 strgen_error(
"Cases in the base translation are not supported.");
762 strgen_error(
"String name '%s' is used multiple times", str);
766 if (this->
data.
strings[this->data.next_string_id] != NULL) {
767 strgen_error(
"String ID 0x%X for '%s' already in use by '%s'", this->
data.
next_string_id, str, this->data.strings[this->data.next_string_id]->name);
775 strgen_warning(
"String name '%s' does not exist in master file", str);
780 strgen_error(
"String name '%s' is used multiple times", str);
785 if (!CheckCommandsMatch(s, ent->
english, str))
return;
801 if (!memcmp(str,
"plural ", 7)) {
804 strgen_fatal(
"Invalid pluralform %d", _lang.
plural_form);
807 strgen_fatal(
"unknown pragma '%s'", str);
811 static void rstrip(
char *buf)
813 size_t i = strlen(buf);
814 while (i > 0 && (buf[i - 1] ==
'\r' || buf[i - 1] ==
'\n' || buf[i - 1] ==
' ')) i--;
821 _warnings = _errors = 0;
835 this->HandleString(buf);
857 static int TranslateArgumentIdx(
int argidx,
int offset)
861 if (argidx < 0 || (uint)argidx >=
lengthof(_cur_pcs.cmd)) {
862 strgen_fatal(
"invalid argidx %d", argidx);
864 const CmdStruct *cs = _cur_pcs.cmd[argidx];
865 if (cs != NULL && cs->consumes <= offset) {
866 strgen_fatal(
"invalid argidx offset %d:%d", argidx, offset);
869 if (_cur_pcs.cmd[argidx] == NULL) {
870 strgen_fatal(
"no command for this argidx %d", argidx);
873 for (
int i = sum = 0; i < argidx; i++) {
876 sum += (cs != NULL) ? cs->consumes : 1;
882 static void PutArgidxCommand(
Buffer *buffer)
885 buffer->
AppendByte(TranslateArgumentIdx(_cur_argidx));
889 static void PutCommandString(
Buffer *buffer,
const char *str)
893 while (*str !=
'\0') {
903 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
904 if (cs == NULL)
break;
912 if (cs->consumes > 0) {
914 if (argno != -1 && argno != _cur_argidx) {
916 PutArgidxCommand(buffer);
920 cs = _cur_pcs.cmd[_cur_argidx++];
922 strgen_fatal(
"%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
926 cs->proc(buffer, param, cs->value);
938 if (length >= 0x4000) {
939 strgen_fatal(
"string too long");
942 if (length >= 0xC0) {
943 buffer[offs++] = (length >> 8) | 0xC0;
945 buffer[offs++] = length & 0xFF;
946 this->
Write((byte*)buffer, offs);
956 for (
size_t tab = 0; tab < data.
tabs; tab++) {
960 _lang.
offsets[tab] = TO_LE16(n);
962 for (uint j = 0; j != in_use[tab]; j++) {
976 for (
size_t tab = 0; tab < data.
tabs; tab++) {
977 for (uint j = 0; j != in_use[tab]; j++) {
988 _cur_ident = ls->
name;
992 if (_show_todo > 0 && ls->
translated == NULL) {
993 if ((_show_todo & 2) != 0) {
994 strgen_warning(
"'%s' is untranslated", ls->
name);
996 if ((_show_todo & 1) != 0) {
997 const char *s =
"<TODO> ";
1003 ExtractCommandString(&_cur_pcs, ls->
english,
false);
1015 if (casep != NULL) {
1025 for (num = 0, c = casep; c; c = c->
next) num++;
1029 for (c = casep; c != NULL; c = c->
next) {
1032 uint pos = buffer.
Length();
1036 PutCommandString(&buffer, c->
string);
1039 uint size = buffer.
Length() - (pos + 2);
1040 buffer[pos + 0] =
GB(size, 8, 8);
1041 buffer[pos + 1] =
GB(size, 0, 8);
1045 if (cmdp != NULL) PutCommandString(&buffer, cmdp);