mod_translation_updater.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Script to generate Minetest translation template files and update
  5. # translation files.
  6. #
  7. # Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer,
  8. # 2023 Wuzzy.
  9. # License: LGPLv2.1 or later (see LICENSE file for details)
  10. import os, fnmatch, re, shutil, errno
  11. from sys import argv as _argv
  12. from sys import stderr as _stderr
  13. # Running params
  14. params = {"recursive": False,
  15. "help": False,
  16. "verbose": False,
  17. "folders": [],
  18. "old-file": False,
  19. "break-long-lines": False,
  20. "print-source": False,
  21. "truncate-unused": False,
  22. }
  23. # Available CLI options
  24. options = {"recursive": ['--recursive', '-r'],
  25. "help": ['--help', '-h'],
  26. "verbose": ['--verbose', '-v'],
  27. "old-file": ['--old-file', '-o'],
  28. "break-long-lines": ['--break-long-lines', '-b'],
  29. "print-source": ['--print-source', '-p'],
  30. "truncate-unused": ['--truncate-unused', '-t'],
  31. }
  32. # Strings longer than this will have extra space added between
  33. # them in the translation files to make it easier to distinguish their
  34. # beginnings and endings at a glance
  35. doublespace_threshold = 80
  36. # These symbols mark comment lines showing the source file name.
  37. # A comment may look like "##[ init.lua ]##".
  38. symbol_source_prefix = "##["
  39. symbol_source_suffix = "]##"
  40. # comment to mark the section of old/unused strings
  41. comment_unused = "##### not used anymore #####"
  42. def set_params_folders(tab: list):
  43. '''Initialize params["folders"] from CLI arguments.'''
  44. # Discarding argument 0 (tool name)
  45. for param in tab[1:]:
  46. stop_param = False
  47. for option in options:
  48. if param in options[option]:
  49. stop_param = True
  50. break
  51. if not stop_param:
  52. params["folders"].append(os.path.abspath(param))
  53. def set_params(tab: list):
  54. '''Initialize params from CLI arguments.'''
  55. for option in options:
  56. for option_name in options[option]:
  57. if option_name in tab:
  58. params[option] = True
  59. break
  60. def print_help(name):
  61. '''Prints some help message.'''
  62. print(f'''SYNOPSIS
  63. {name} [OPTIONS] [PATHS...]
  64. DESCRIPTION
  65. {', '.join(options["help"])}
  66. prints this help message
  67. {', '.join(options["recursive"])}
  68. run on all subfolders of paths given
  69. {', '.join(options["old-file"])}
  70. create *.old files
  71. {', '.join(options["break-long-lines"])}
  72. add extra line breaks before and after long strings
  73. {', '.join(options["print-source"])}
  74. add comments denoting the source file
  75. {', '.join(options["verbose"])}
  76. add output information
  77. {', '.join(options["truncate-unused"])}
  78. delete unused strings from files
  79. ''')
  80. def main():
  81. '''Main function'''
  82. set_params(_argv)
  83. set_params_folders(_argv)
  84. if params["help"]:
  85. print_help(_argv[0])
  86. else:
  87. # Add recursivity message
  88. print("Running ", end='')
  89. if params["recursive"]:
  90. print("recursively ", end='')
  91. # Running
  92. if len(params["folders"]) >= 2:
  93. print("on folder list:", params["folders"])
  94. for f in params["folders"]:
  95. if params["recursive"]:
  96. run_all_subfolders(f)
  97. else:
  98. update_folder(f)
  99. elif len(params["folders"]) == 1:
  100. print("on folder", params["folders"][0])
  101. if params["recursive"]:
  102. run_all_subfolders(params["folders"][0])
  103. else:
  104. update_folder(params["folders"][0])
  105. else:
  106. print("on folder", os.path.abspath("./"))
  107. if params["recursive"]:
  108. run_all_subfolders(os.path.abspath("./"))
  109. else:
  110. update_folder(os.path.abspath("./"))
  111. # Group 2 will be the string, groups 1 and 3 will be the delimiters (" or ')
  112. # See https://stackoverflow.com/questions/46967465/regex-match-text-in-either-single-or-double-quote
  113. pattern_lua_quoted = re.compile(
  114. r'(?:^|[\.=,{\(\s])' # Look for beginning of file or anything that isn't a function identifier
  115. r'N?F?S\s*\(\s*' # Matches S, FS, NS or NFS function call
  116. r'(["\'])((?:\\\1|(?:(?!\1)).)*)(\1)' # Quoted string
  117. r'[\s,\)]', # End of call or argument
  118. re.DOTALL)
  119. # Handles the [[ ... ]] string delimiters
  120. pattern_lua_bracketed = re.compile(
  121. r'(?:^|[\.=,{\(\s])' # Same as for pattern_lua_quoted
  122. r'N?F?S\s*\(\s*' # Same as for pattern_lua_quoted
  123. r'\[\[(.*?)\]\]' # [[ ... ]] string delimiters
  124. r'[\s,\)]', # Same as for pattern_lua_quoted
  125. re.DOTALL)
  126. # Handles "concatenation" .. " of strings"
  127. pattern_concat = re.compile(r'["\'][\s]*\.\.[\s]*["\']', re.DOTALL)
  128. # Handles a translation line in *.tr file.
  129. # Group 1 is the source string left of the equals sign.
  130. # Group 2 is the translated string, right of the equals sign.
  131. pattern_tr = re.compile(
  132. r'(.*)' # Source string
  133. # the separating equals sign, if NOT preceded by @, unless
  134. # that @ is preceded by another @
  135. r'(?:(?<!(?<!@)@)=)'
  136. r'(.*)' # Translation string
  137. )
  138. pattern_name = re.compile(r'^name[ ]*=[ ]*([^ \n]*)')
  139. pattern_tr_filename = re.compile(r'\.tr$')
  140. # Matches bad use of @ signs in Lua string
  141. pattern_bad_luastring = re.compile(
  142. r'^@$|' # single @, OR
  143. r'[^@]@$|' # trailing unescaped @, OR
  144. r'(?<!@)@(?=[^@1-9n])' # an @ that is not escaped or part of a placeholder
  145. )
  146. # Attempt to read the mod's name from the mod.conf file or folder name. Returns None on failure
  147. def get_modname(folder):
  148. try:
  149. with open(os.path.join(folder, "mod.conf"), "r", encoding='utf-8') as mod_conf:
  150. for line in mod_conf:
  151. match = pattern_name.match(line)
  152. if match:
  153. return match.group(1)
  154. except FileNotFoundError:
  155. folder_name = os.path.basename(folder)
  156. # Special case when run in Minetest's builtin directory
  157. return "__builtin" if folder_name == "builtin" else folder_name
  158. # If there are already .tr files in /locale, returns a list of their names
  159. def get_existing_tr_files(folder):
  160. out = []
  161. for root, dirs, files in os.walk(os.path.join(folder, 'locale/')):
  162. for name in files:
  163. if pattern_tr_filename.search(name):
  164. out.append(name)
  165. return out
  166. # from https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python/600612#600612
  167. # Creates a directory if it doesn't exist, silently does
  168. # nothing if it already exists
  169. def mkdir_p(path):
  170. try:
  171. os.makedirs(path)
  172. except OSError as exc: # Python >2.5
  173. if exc.errno == errno.EEXIST and os.path.isdir(path):
  174. pass
  175. else: raise
  176. # Converts the template dictionary to a text to be written as a file
  177. # dKeyStrings is a dictionary of localized string to source file sets
  178. # dOld is a dictionary of existing translations and comments from
  179. # the previous version of this text
  180. def strings_to_text(dkeyStrings, dOld, mod_name, header_comments, textdomain, templ = None):
  181. # if textdomain is specified, insert it at the top
  182. if textdomain != None:
  183. lOut = [textdomain] # argument is full textdomain line
  184. # otherwise, use mod name as textdomain automatically
  185. else:
  186. lOut = [f"# textdomain: {mod_name}"]
  187. if templ is not None and templ[2] and (header_comments is None or not header_comments.startswith(templ[2])):
  188. # header comments in the template file
  189. lOut.append(templ[2])
  190. if header_comments is not None:
  191. lOut.append(header_comments)
  192. dGroupedBySource = {}
  193. for key in dkeyStrings:
  194. sourceList = list(dkeyStrings[key])
  195. sourceString = "\n".join(sourceList)
  196. listForSource = dGroupedBySource.get(sourceString, [])
  197. listForSource.append(key)
  198. dGroupedBySource[sourceString] = listForSource
  199. lSourceKeys = list(dGroupedBySource.keys())
  200. lSourceKeys.sort()
  201. for source in lSourceKeys:
  202. localizedStrings = dGroupedBySource[source]
  203. if params["print-source"]:
  204. if lOut[-1] != "":
  205. lOut.append("")
  206. lOut.append(source)
  207. for localizedString in localizedStrings:
  208. val = dOld.get(localizedString, {})
  209. translation = val.get("translation", "")
  210. comment = val.get("comment")
  211. templ_comment = None
  212. if templ:
  213. templ_val = templ[0].get(localizedString, {})
  214. templ_comment = templ_val.get("comment")
  215. if params["break-long-lines"] and len(localizedString) > doublespace_threshold and not lOut[-1] == "":
  216. lOut.append("")
  217. if templ_comment != None and templ_comment != "" and (comment is None or comment == "" or not comment.startswith(templ_comment)):
  218. lOut.append(templ_comment)
  219. if comment != None and comment != "" and not comment.startswith("# textdomain:"):
  220. lOut.append(comment)
  221. lOut.append(f"{localizedString}={translation}")
  222. if params["break-long-lines"] and len(localizedString) > doublespace_threshold:
  223. lOut.append("")
  224. unusedExist = False
  225. if not params["truncate-unused"]:
  226. for key in dOld:
  227. if key not in dkeyStrings:
  228. val = dOld[key]
  229. translation = val.get("translation")
  230. comment = val.get("comment")
  231. # only keep an unused translation if there was translated
  232. # text or a comment associated with it
  233. if translation != None and (translation != "" or comment):
  234. if not unusedExist:
  235. unusedExist = True
  236. lOut.append("\n\n" + comment_unused + "\n")
  237. if params["break-long-lines"] and len(key) > doublespace_threshold and not lOut[-1] == "":
  238. lOut.append("")
  239. if comment != None:
  240. lOut.append(comment)
  241. lOut.append(f"{key}={translation}")
  242. if params["break-long-lines"] and len(key) > doublespace_threshold:
  243. lOut.append("")
  244. return "\n".join(lOut) + '\n'
  245. # Writes a template.txt file
  246. # dkeyStrings is the dictionary returned by generate_template
  247. def write_template(templ_file, dkeyStrings, mod_name):
  248. # read existing template file to preserve comments
  249. existing_template = import_tr_file(templ_file)
  250. text = strings_to_text(dkeyStrings, existing_template[0], mod_name, existing_template[2], existing_template[3])
  251. mkdir_p(os.path.dirname(templ_file))
  252. with open(templ_file, "wt", encoding='utf-8') as template_file:
  253. template_file.write(text)
  254. # Gets all translatable strings from a lua file
  255. def read_lua_file_strings(lua_file):
  256. lOut = []
  257. with open(lua_file, encoding='utf-8') as text_file:
  258. text = text_file.read()
  259. text = re.sub(pattern_concat, "", text)
  260. strings = []
  261. for s in pattern_lua_quoted.findall(text):
  262. strings.append(s[1])
  263. for s in pattern_lua_bracketed.findall(text):
  264. strings.append(s)
  265. for s in strings:
  266. found_bad = pattern_bad_luastring.search(s)
  267. if found_bad:
  268. print("SYNTAX ERROR: Unescaped '@' in Lua string: " + s)
  269. continue
  270. s = s.replace('\\"', '"')
  271. s = s.replace("\\'", "'")
  272. s = s.replace("\n", "@n")
  273. s = s.replace("\\n", "@n")
  274. s = s.replace("=", "@=")
  275. lOut.append(s)
  276. return lOut
  277. # Gets strings from an existing translation file
  278. # returns both a dictionary of translations
  279. # and the full original source text so that the new text
  280. # can be compared to it for changes.
  281. # Returns also header comments in the third return value.
  282. def import_tr_file(tr_file):
  283. dOut = {}
  284. text = None
  285. in_header = True
  286. header_comments = None
  287. textdomain = None
  288. if os.path.exists(tr_file):
  289. with open(tr_file, "r", encoding='utf-8') as existing_file :
  290. # save the full text to allow for comparison
  291. # of the old version with the new output
  292. text = existing_file.read()
  293. existing_file.seek(0)
  294. # a running record of the current comment block
  295. # we're inside, to allow preceeding multi-line comments
  296. # to be retained for a translation line
  297. latest_comment_block = None
  298. for line in existing_file.readlines():
  299. line = line.rstrip('\n')
  300. # "##### not used anymore #####" comment
  301. if line == comment_unused:
  302. # Always delete the 'not used anymore' comment.
  303. # It will be re-added to the file if neccessary.
  304. latest_comment_block = None
  305. if header_comments != None:
  306. in_header = False
  307. continue
  308. # Comment lines
  309. elif line.startswith("#"):
  310. # Source file comments: ##[ file.lua ]##
  311. if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix):
  312. # This line marks the end of header comments.
  313. if params["print-source"]:
  314. in_header = False
  315. # Remove those comments; they may be added back automatically.
  316. continue
  317. # Store first occurance of textdomain
  318. # discard all subsequent textdomain lines
  319. if line.startswith("# textdomain:"):
  320. if textdomain == None:
  321. textdomain = line
  322. continue
  323. elif in_header:
  324. # Save header comments (normal comments at top of file)
  325. if not header_comments:
  326. header_comments = line
  327. else:
  328. header_comments = header_comments + "\n" + line
  329. else:
  330. # Save normal comments
  331. if line.startswith("# textdomain:") and textdomain == None:
  332. textdomain = line
  333. elif not latest_comment_block:
  334. latest_comment_block = line
  335. else:
  336. latest_comment_block = latest_comment_block + "\n" + line
  337. continue
  338. match = pattern_tr.match(line)
  339. if match:
  340. # this line is a translated line
  341. outval = {}
  342. outval["translation"] = match.group(2)
  343. if latest_comment_block:
  344. # if there was a comment, record that.
  345. outval["comment"] = latest_comment_block
  346. latest_comment_block = None
  347. in_header = False
  348. dOut[match.group(1)] = outval
  349. return (dOut, text, header_comments, textdomain)
  350. # like os.walk but returns sorted filenames
  351. def sorted_os_walk(folder):
  352. tuples = []
  353. t = 0
  354. for root, dirs, files in os.walk(folder):
  355. tuples.append( (root, dirs, files) )
  356. t = t + 1
  357. tuples = sorted(tuples)
  358. paths_and_files = []
  359. f = 0
  360. for tu in tuples:
  361. root = tu[0]
  362. dirs = tu[1]
  363. files = tu[2]
  364. files = sorted(files, key=str.lower)
  365. for filename in files:
  366. paths_and_files.append( (os.path.join(root, filename), filename) )
  367. f = f + 1
  368. return paths_and_files
  369. # Walks all lua files in the mod folder, collects translatable strings,
  370. # and writes it to a template.txt file
  371. # Returns a dictionary of localized strings to source file lists
  372. # that can be used with the strings_to_text function.
  373. def generate_template(folder, mod_name):
  374. dOut = {}
  375. paths_and_files = sorted_os_walk(folder)
  376. for paf in paths_and_files:
  377. fullpath_filename = paf[0]
  378. filename = paf[1]
  379. if fnmatch.fnmatch(filename, "*.lua"):
  380. found = read_lua_file_strings(fullpath_filename)
  381. if params["verbose"]:
  382. print(f"{fullpath_filename}: {str(len(found))} translatable strings")
  383. for s in found:
  384. sources = dOut.get(s, set())
  385. sources.add(os.path.relpath(fullpath_filename, start=folder))
  386. dOut[s] = sources
  387. if len(dOut) == 0:
  388. return None
  389. # Convert source file set to list, sort it and add comment symbols.
  390. # Needed because a set is unsorted and might result in unpredictable.
  391. # output orders if any source string appears in multiple files.
  392. for d in dOut:
  393. sources = dOut.get(d, set())
  394. sources = sorted(list(sources), key=str.lower)
  395. newSources = []
  396. for i in sources:
  397. i = i.replace("\\", "/")
  398. newSources.append(f"{symbol_source_prefix} {i} {symbol_source_suffix}")
  399. dOut[d] = newSources
  400. templ_file = os.path.join(folder, "locale/template.txt")
  401. write_template(templ_file, dOut, mod_name)
  402. new_template = import_tr_file(templ_file) # re-import to get all new data
  403. return (dOut, new_template)
  404. # Updates an existing .tr file, copying the old one to a ".old" file
  405. # if any changes have happened
  406. # dNew is the data used to generate the template, it has all the
  407. # currently-existing localized strings
  408. def update_tr_file(dNew, templ, mod_name, tr_file):
  409. if params["verbose"]:
  410. print(f"updating {tr_file}")
  411. tr_import = import_tr_file(tr_file)
  412. dOld = tr_import[0]
  413. textOld = tr_import[1]
  414. textNew = strings_to_text(dNew, dOld, mod_name, tr_import[2], tr_import[3], templ)
  415. if textOld and textOld != textNew:
  416. print(f"{tr_file} has changed.")
  417. if params["old-file"]:
  418. shutil.copyfile(tr_file, f"{tr_file}.old")
  419. with open(tr_file, "w", encoding='utf-8') as new_tr_file:
  420. new_tr_file.write(textNew)
  421. # Updates translation files for the mod in the given folder
  422. def update_mod(folder):
  423. if not os.path.exists(os.path.join(folder, "init.lua")):
  424. print(f"Mod folder {folder} is missing init.lua, aborting.")
  425. exit(1)
  426. assert not is_modpack(folder)
  427. modname = get_modname(folder)
  428. print(f"Updating translations for {modname}")
  429. (data, templ) = generate_template(folder, modname)
  430. if data == None:
  431. print(f"No translatable strings found in {modname}")
  432. else:
  433. for tr_file in get_existing_tr_files(folder):
  434. update_tr_file(data, templ, modname, os.path.join(folder, "locale/", tr_file))
  435. def is_modpack(folder):
  436. return os.path.exists(os.path.join(folder, "modpack.txt")) or os.path.exists(os.path.join(folder, "modpack.conf"))
  437. def is_game(folder):
  438. return os.path.exists(os.path.join(folder, "game.conf")) and os.path.exists(os.path.join(folder, "mods"))
  439. # Determines if the folder being pointed to is a game, mod or a mod pack
  440. # and then runs update_mod accordingly
  441. def update_folder(folder):
  442. if is_game(folder):
  443. run_all_subfolders(os.path.join(folder, "mods"))
  444. elif is_modpack(folder):
  445. run_all_subfolders(folder)
  446. else:
  447. update_mod(folder)
  448. print("Done.")
  449. def run_all_subfolders(folder):
  450. for modfolder in [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')]:
  451. update_folder(modfolder)
  452. main()