mod_translation_updater.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Script to generate Minetest translation template files and update
  5. # translation files.
  6. #
  7. # Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer,
  8. # 2023 Wuzzy.
  9. # License: LGPLv2.1 or later (see LICENSE file for details)
  10. import os, fnmatch, re, shutil, errno
  11. from sys import argv as _argv
  12. from sys import stderr as _stderr
  13. # Running params
  14. params = {"recursive": False,
  15. "help": False,
  16. "verbose": False,
  17. "folders": [],
  18. "old-file": False,
  19. "break-long-lines": False,
  20. "print-source": False,
  21. "truncate-unused": False,
  22. }
  23. # Available CLI options
  24. options = {"recursive": ['--recursive', '-r'],
  25. "help": ['--help', '-h'],
  26. "verbose": ['--verbose', '-v'],
  27. "old-file": ['--old-file', '-o'],
  28. "break-long-lines": ['--break-long-lines', '-b'],
  29. "print-source": ['--print-source', '-p'],
  30. "truncate-unused": ['--truncate-unused', '-t'],
  31. }
  32. # Strings longer than this will have extra space added between
  33. # them in the translation files to make it easier to distinguish their
  34. # beginnings and endings at a glance
  35. doublespace_threshold = 80
  36. # These symbols mark comment lines showing the source file name.
  37. # A comment may look like "##[ init.lua ]##".
  38. symbol_source_prefix = "##["
  39. symbol_source_suffix = "]##"
  40. # comment to mark the section of old/unused strings
  41. comment_unused = "##### not used anymore #####"
  42. def set_params_folders(tab: list):
  43. '''Initialize params["folders"] from CLI arguments.'''
  44. # Discarding argument 0 (tool name)
  45. for param in tab[1:]:
  46. stop_param = False
  47. for option in options:
  48. if param in options[option]:
  49. stop_param = True
  50. break
  51. if not stop_param:
  52. params["folders"].append(os.path.abspath(param))
  53. def set_params(tab: list):
  54. '''Initialize params from CLI arguments.'''
  55. for option in options:
  56. for option_name in options[option]:
  57. if option_name in tab:
  58. params[option] = True
  59. break
  60. def print_help(name):
  61. '''Prints some help message.'''
  62. print(f'''SYNOPSIS
  63. {name} [OPTIONS] [PATHS...]
  64. DESCRIPTION
  65. {', '.join(options["help"])}
  66. prints this help message
  67. {', '.join(options["recursive"])}
  68. run on all subfolders of paths given
  69. {', '.join(options["old-file"])}
  70. create *.old files
  71. {', '.join(options["break-long-lines"])}
  72. add extra line breaks before and after long strings
  73. {', '.join(options["print-source"])}
  74. add comments denoting the source file
  75. {', '.join(options["verbose"])}
  76. add output information
  77. {', '.join(options["truncate-unused"])}
  78. delete unused strings from files
  79. ''')
  80. def main():
  81. '''Main function'''
  82. set_params(_argv)
  83. set_params_folders(_argv)
  84. if params["help"]:
  85. print_help(_argv[0])
  86. else:
  87. # Add recursivity message
  88. print("Running ", end='')
  89. if params["recursive"]:
  90. print("recursively ", end='')
  91. # Running
  92. if len(params["folders"]) >= 2:
  93. print("on folder list:", params["folders"])
  94. for f in params["folders"]:
  95. if params["recursive"]:
  96. run_all_subfolders(f)
  97. else:
  98. update_folder(f)
  99. elif len(params["folders"]) == 1:
  100. print("on folder", params["folders"][0])
  101. if params["recursive"]:
  102. run_all_subfolders(params["folders"][0])
  103. else:
  104. update_folder(params["folders"][0])
  105. else:
  106. print("on folder", os.path.abspath("./"))
  107. if params["recursive"]:
  108. run_all_subfolders(os.path.abspath("./"))
  109. else:
  110. update_folder(os.path.abspath("./"))
  111. # Compile pattern for matching lua function call
  112. def compile_func_call_pattern(argument_pattern):
  113. return re.compile(
  114. # Look for beginning of file or anything that isn't a function identifier
  115. r'(?:^|[\.=,{\(\s])' +
  116. # Matches S, FS, NS, or NFS function call
  117. r'N?F?S\s*' +
  118. # The pattern to match argument
  119. argument_pattern,
  120. re.DOTALL)
  121. # Add parentheses around a pattern
  122. def parenthesize_pattern(pattern):
  123. return (
  124. # Start of argument: open parentheses and space (optional)
  125. r'\(\s*' +
  126. # The pattern to be parenthesized
  127. pattern +
  128. # End of argument or function call: space, comma, or close parentheses
  129. r'[\s,\)]')
  130. # Quoted string
  131. # Group 2 will be the string, group 1 and group 3 will be the delimiters (" or ')
  132. # See https://stackoverflow.com/questions/46967465/regex-match-text-in-either-single-or-double-quote
  133. pattern_lua_quoted_string = r'(["\'])((?:\\\1|(?:(?!\1)).)*)(\1)'
  134. # Double square bracket string (multiline)
  135. pattern_lua_square_bracket_string = r'\[\[(.*?)\]\]'
  136. # Handles the " ... " or ' ... ' string delimiters
  137. pattern_lua_quoted = compile_func_call_pattern(parenthesize_pattern(pattern_lua_quoted_string))
  138. # Handles the [[ ... ]] string delimiters
  139. pattern_lua_bracketed = compile_func_call_pattern(parenthesize_pattern(pattern_lua_square_bracket_string))
  140. # Handles like pattern_lua_quoted, but for single parameter (without parentheses)
  141. # See https://www.lua.org/pil/5.html for informations about single argument call
  142. pattern_lua_quoted_single = compile_func_call_pattern(pattern_lua_quoted_string)
  143. # Same as pattern_lua_quoted_single, but for [[ ... ]] string delimiters
  144. pattern_lua_bracketed_single = compile_func_call_pattern(pattern_lua_square_bracket_string)
  145. # Handles "concatenation" .. " of strings"
  146. pattern_concat = re.compile(r'["\'][\s]*\.\.[\s]*["\']', re.DOTALL)
  147. # Handles a translation line in *.tr file.
  148. # Group 1 is the source string left of the equals sign.
  149. # Group 2 is the translated string, right of the equals sign.
  150. pattern_tr = re.compile(
  151. r'(.*)' # Source string
  152. # the separating equals sign, if NOT preceded by @, unless
  153. # that @ is preceded by another @
  154. r'(?:(?<!(?<!@)@)=)'
  155. r'(.*)' # Translation string
  156. )
  157. pattern_name = re.compile(r'^name[ ]*=[ ]*([^ \n]*)')
  158. pattern_tr_filename = re.compile(r'\.tr$')
  159. # Matches bad use of @ signs in Lua string
  160. pattern_bad_luastring = re.compile(
  161. r'^@$|' # single @, OR
  162. r'[^@]@$|' # trailing unescaped @, OR
  163. r'(?<!@)@(?=[^@1-9n])' # an @ that is not escaped or part of a placeholder
  164. )
  165. # Attempt to read the mod's name from the mod.conf file or folder name. Returns None on failure
  166. def get_modname(folder):
  167. try:
  168. with open(os.path.join(folder, "mod.conf"), "r", encoding='utf-8') as mod_conf:
  169. for line in mod_conf:
  170. match = pattern_name.match(line)
  171. if match:
  172. return match.group(1)
  173. except FileNotFoundError:
  174. folder_name = os.path.basename(folder)
  175. # Special case when run in Minetest's builtin directory
  176. return "__builtin" if folder_name == "builtin" else folder_name
  177. # If there are already .tr files in /locale, returns a list of their names
  178. def get_existing_tr_files(folder):
  179. out = []
  180. for root, dirs, files in os.walk(os.path.join(folder, 'locale/')):
  181. for name in files:
  182. if pattern_tr_filename.search(name):
  183. out.append(name)
  184. return out
  185. # from https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python/600612#600612
  186. # Creates a directory if it doesn't exist, silently does
  187. # nothing if it already exists
  188. def mkdir_p(path):
  189. try:
  190. os.makedirs(path)
  191. except OSError as exc: # Python >2.5
  192. if exc.errno == errno.EEXIST and os.path.isdir(path):
  193. pass
  194. else: raise
  195. # Converts the template dictionary to a text to be written as a file
  196. # dKeyStrings is a dictionary of localized string to source file sets
  197. # dOld is a dictionary of existing translations and comments from
  198. # the previous version of this text
  199. def strings_to_text(dkeyStrings, dOld, mod_name, header_comments, textdomain, templ = None):
  200. # if textdomain is specified, insert it at the top
  201. if textdomain != None:
  202. lOut = [textdomain] # argument is full textdomain line
  203. # otherwise, use mod name as textdomain automatically
  204. else:
  205. lOut = [f"# textdomain: {mod_name}"]
  206. if templ is not None and templ[2] and (header_comments is None or not header_comments.startswith(templ[2])):
  207. # header comments in the template file
  208. lOut.append(templ[2])
  209. if header_comments is not None:
  210. lOut.append(header_comments)
  211. dGroupedBySource = {}
  212. for key in dkeyStrings:
  213. sourceList = list(dkeyStrings[key])
  214. sourceString = "\n".join(sourceList)
  215. listForSource = dGroupedBySource.get(sourceString, [])
  216. listForSource.append(key)
  217. dGroupedBySource[sourceString] = listForSource
  218. lSourceKeys = list(dGroupedBySource.keys())
  219. lSourceKeys.sort()
  220. for source in lSourceKeys:
  221. localizedStrings = dGroupedBySource[source]
  222. if params["print-source"]:
  223. if lOut[-1] != "":
  224. lOut.append("")
  225. lOut.append(source)
  226. for localizedString in localizedStrings:
  227. val = dOld.get(localizedString, {})
  228. translation = val.get("translation", "")
  229. comment = val.get("comment")
  230. templ_comment = None
  231. if templ:
  232. templ_val = templ[0].get(localizedString, {})
  233. templ_comment = templ_val.get("comment")
  234. if params["break-long-lines"] and len(localizedString) > doublespace_threshold and not lOut[-1] == "":
  235. lOut.append("")
  236. if templ_comment != None and templ_comment != "" and (comment is None or comment == "" or not comment.startswith(templ_comment)):
  237. lOut.append(templ_comment)
  238. if comment != None and comment != "" and not comment.startswith("# textdomain:"):
  239. lOut.append(comment)
  240. lOut.append(f"{localizedString}={translation}")
  241. if params["break-long-lines"] and len(localizedString) > doublespace_threshold:
  242. lOut.append("")
  243. unusedExist = False
  244. if not params["truncate-unused"]:
  245. for key in dOld:
  246. if key not in dkeyStrings:
  247. val = dOld[key]
  248. translation = val.get("translation")
  249. comment = val.get("comment")
  250. # only keep an unused translation if there was translated
  251. # text or a comment associated with it
  252. if translation != None and (translation != "" or comment):
  253. if not unusedExist:
  254. unusedExist = True
  255. lOut.append("\n\n" + comment_unused + "\n")
  256. if params["break-long-lines"] and len(key) > doublespace_threshold and not lOut[-1] == "":
  257. lOut.append("")
  258. if comment != None:
  259. lOut.append(comment)
  260. lOut.append(f"{key}={translation}")
  261. if params["break-long-lines"] and len(key) > doublespace_threshold:
  262. lOut.append("")
  263. return "\n".join(lOut) + '\n'
  264. # Writes a template.txt file
  265. # dkeyStrings is the dictionary returned by generate_template
  266. def write_template(templ_file, dkeyStrings, mod_name):
  267. # read existing template file to preserve comments
  268. existing_template = import_tr_file(templ_file)
  269. text = strings_to_text(dkeyStrings, existing_template[0], mod_name, existing_template[2], existing_template[3])
  270. mkdir_p(os.path.dirname(templ_file))
  271. with open(templ_file, "wt", encoding='utf-8') as template_file:
  272. template_file.write(text)
  273. # Gets all translatable strings from a lua file
  274. def read_lua_file_strings(lua_file):
  275. lOut = []
  276. with open(lua_file, encoding='utf-8') as text_file:
  277. text = text_file.read()
  278. strings = []
  279. for s in pattern_lua_quoted_single.findall(text):
  280. strings.append(s[1])
  281. for s in pattern_lua_bracketed_single.findall(text):
  282. strings.append(s)
  283. # Only concatenate strings after matching
  284. # single parameter call (without parantheses)
  285. text = re.sub(pattern_concat, "", text)
  286. for s in pattern_lua_quoted.findall(text):
  287. strings.append(s[1])
  288. for s in pattern_lua_bracketed.findall(text):
  289. strings.append(s)
  290. for s in strings:
  291. found_bad = pattern_bad_luastring.search(s)
  292. if found_bad:
  293. print("SYNTAX ERROR: Unescaped '@' in Lua string: " + s)
  294. continue
  295. s = s.replace('\\"', '"')
  296. s = s.replace("\\'", "'")
  297. s = s.replace("\n", "@n")
  298. s = s.replace("\\n", "@n")
  299. s = s.replace("=", "@=")
  300. lOut.append(s)
  301. return lOut
  302. # Gets strings from an existing translation file
  303. # returns both a dictionary of translations
  304. # and the full original source text so that the new text
  305. # can be compared to it for changes.
  306. # Returns also header comments in the third return value.
  307. def import_tr_file(tr_file):
  308. dOut = {}
  309. text = None
  310. in_header = True
  311. header_comments = None
  312. textdomain = None
  313. if os.path.exists(tr_file):
  314. with open(tr_file, "r", encoding='utf-8') as existing_file :
  315. # save the full text to allow for comparison
  316. # of the old version with the new output
  317. text = existing_file.read()
  318. existing_file.seek(0)
  319. # a running record of the current comment block
  320. # we're inside, to allow preceeding multi-line comments
  321. # to be retained for a translation line
  322. latest_comment_block = None
  323. for line in existing_file.readlines():
  324. line = line.rstrip('\n')
  325. # "##### not used anymore #####" comment
  326. if line == comment_unused:
  327. # Always delete the 'not used anymore' comment.
  328. # It will be re-added to the file if neccessary.
  329. latest_comment_block = None
  330. if header_comments != None:
  331. in_header = False
  332. continue
  333. # Comment lines
  334. elif line.startswith("#"):
  335. # Source file comments: ##[ file.lua ]##
  336. if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix):
  337. # This line marks the end of header comments.
  338. if params["print-source"]:
  339. in_header = False
  340. # Remove those comments; they may be added back automatically.
  341. continue
  342. # Store first occurance of textdomain
  343. # discard all subsequent textdomain lines
  344. if line.startswith("# textdomain:"):
  345. if textdomain == None:
  346. textdomain = line
  347. continue
  348. elif in_header:
  349. # Save header comments (normal comments at top of file)
  350. if not header_comments:
  351. header_comments = line
  352. else:
  353. header_comments = header_comments + "\n" + line
  354. else:
  355. # Save normal comments
  356. if line.startswith("# textdomain:") and textdomain == None:
  357. textdomain = line
  358. elif not latest_comment_block:
  359. latest_comment_block = line
  360. else:
  361. latest_comment_block = latest_comment_block + "\n" + line
  362. continue
  363. match = pattern_tr.match(line)
  364. if match:
  365. # this line is a translated line
  366. outval = {}
  367. outval["translation"] = match.group(2)
  368. if latest_comment_block:
  369. # if there was a comment, record that.
  370. outval["comment"] = latest_comment_block
  371. latest_comment_block = None
  372. in_header = False
  373. dOut[match.group(1)] = outval
  374. return (dOut, text, header_comments, textdomain)
  375. # like os.walk but returns sorted filenames
  376. def sorted_os_walk(folder):
  377. tuples = []
  378. t = 0
  379. for root, dirs, files in os.walk(folder):
  380. tuples.append( (root, dirs, files) )
  381. t = t + 1
  382. tuples = sorted(tuples)
  383. paths_and_files = []
  384. f = 0
  385. for tu in tuples:
  386. root = tu[0]
  387. dirs = tu[1]
  388. files = tu[2]
  389. files = sorted(files, key=str.lower)
  390. for filename in files:
  391. paths_and_files.append( (os.path.join(root, filename), filename) )
  392. f = f + 1
  393. return paths_and_files
  394. # Walks all lua files in the mod folder, collects translatable strings,
  395. # and writes it to a template.txt file
  396. # Returns a dictionary of localized strings to source file lists
  397. # that can be used with the strings_to_text function.
  398. def generate_template(folder, mod_name):
  399. dOut = {}
  400. paths_and_files = sorted_os_walk(folder)
  401. for paf in paths_and_files:
  402. fullpath_filename = paf[0]
  403. filename = paf[1]
  404. if fnmatch.fnmatch(filename, "*.lua"):
  405. found = read_lua_file_strings(fullpath_filename)
  406. if params["verbose"]:
  407. print(f"{fullpath_filename}: {str(len(found))} translatable strings")
  408. for s in found:
  409. sources = dOut.get(s, set())
  410. sources.add(os.path.relpath(fullpath_filename, start=folder))
  411. dOut[s] = sources
  412. if len(dOut) == 0:
  413. return None
  414. # Convert source file set to list, sort it and add comment symbols.
  415. # Needed because a set is unsorted and might result in unpredictable.
  416. # output orders if any source string appears in multiple files.
  417. for d in dOut:
  418. sources = dOut.get(d, set())
  419. sources = sorted(list(sources), key=str.lower)
  420. newSources = []
  421. for i in sources:
  422. i = i.replace("\\", "/")
  423. newSources.append(f"{symbol_source_prefix} {i} {symbol_source_suffix}")
  424. dOut[d] = newSources
  425. templ_file = os.path.join(folder, "locale/template.txt")
  426. write_template(templ_file, dOut, mod_name)
  427. new_template = import_tr_file(templ_file) # re-import to get all new data
  428. return (dOut, new_template)
  429. # Updates an existing .tr file, copying the old one to a ".old" file
  430. # if any changes have happened
  431. # dNew is the data used to generate the template, it has all the
  432. # currently-existing localized strings
  433. def update_tr_file(dNew, templ, mod_name, tr_file):
  434. if params["verbose"]:
  435. print(f"updating {tr_file}")
  436. tr_import = import_tr_file(tr_file)
  437. dOld = tr_import[0]
  438. textOld = tr_import[1]
  439. textNew = strings_to_text(dNew, dOld, mod_name, tr_import[2], tr_import[3], templ)
  440. if textOld and textOld != textNew:
  441. print(f"{tr_file} has changed.")
  442. if params["old-file"]:
  443. shutil.copyfile(tr_file, f"{tr_file}.old")
  444. with open(tr_file, "w", encoding='utf-8') as new_tr_file:
  445. new_tr_file.write(textNew)
  446. # Updates translation files for the mod in the given folder
  447. def update_mod(folder):
  448. if not os.path.exists(os.path.join(folder, "init.lua")):
  449. print(f"Mod folder {folder} is missing init.lua, aborting.")
  450. exit(1)
  451. assert not is_modpack(folder)
  452. modname = get_modname(folder)
  453. print(f"Updating translations for {modname}")
  454. (data, templ) = generate_template(folder, modname)
  455. if data == None:
  456. print(f"No translatable strings found in {modname}")
  457. else:
  458. for tr_file in get_existing_tr_files(folder):
  459. update_tr_file(data, templ, modname, os.path.join(folder, "locale/", tr_file))
  460. def is_modpack(folder):
  461. return os.path.exists(os.path.join(folder, "modpack.txt")) or os.path.exists(os.path.join(folder, "modpack.conf"))
  462. def is_game(folder):
  463. return os.path.exists(os.path.join(folder, "game.conf")) and os.path.exists(os.path.join(folder, "mods"))
  464. # Determines if the folder being pointed to is a game, mod or a mod pack
  465. # and then runs update_mod accordingly
  466. def update_folder(folder):
  467. if is_game(folder):
  468. run_all_subfolders(os.path.join(folder, "mods"))
  469. elif is_modpack(folder):
  470. run_all_subfolders(folder)
  471. else:
  472. update_mod(folder)
  473. print("Done.")
  474. def run_all_subfolders(folder):
  475. for modfolder in [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')]:
  476. update_folder(modfolder)
  477. main()