mod_translation_updater.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Script to generate Minetest translation template files and update
  5. # translation files.
  6. #
  7. # Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer,
  8. # 2023 Wuzzy.
  9. # License: LGPLv2.1 or later (see LICENSE file for details)
  10. import os, fnmatch, re, shutil, errno
  11. from sys import argv as _argv
  12. from sys import stderr as _stderr
  13. # Running params
  14. params = {"recursive": False,
  15. "help": False,
  16. "verbose": False,
  17. "folders": [],
  18. "old-file": False,
  19. "break-long-lines": False,
  20. "print-source": False,
  21. "truncate-unused": False,
  22. }
  23. # Available CLI options
  24. options = {"recursive": ['--recursive', '-r'],
  25. "help": ['--help', '-h'],
  26. "verbose": ['--verbose', '-v'],
  27. "old-file": ['--old-file', '-o'],
  28. "break-long-lines": ['--break-long-lines', '-b'],
  29. "print-source": ['--print-source', '-p'],
  30. "truncate-unused": ['--truncate-unused', '-t'],
  31. }
  32. # Strings longer than this will have extra space added between
  33. # them in the translation files to make it easier to distinguish their
  34. # beginnings and endings at a glance
  35. doublespace_threshold = 80
  36. # These symbols mark comment lines showing the source file name.
  37. # A comment may look like "##[ init.lua ]##".
  38. symbol_source_prefix = "##["
  39. symbol_source_suffix = "]##"
  40. # comment to mark the section of old/unused strings
  41. comment_unused = "##### not used anymore #####"
  42. def set_params_folders(tab: list):
  43. '''Initialize params["folders"] from CLI arguments.'''
  44. # Discarding argument 0 (tool name)
  45. for param in tab[1:]:
  46. stop_param = False
  47. for option in options:
  48. if param in options[option]:
  49. stop_param = True
  50. break
  51. if not stop_param:
  52. params["folders"].append(os.path.abspath(param))
  53. def set_params(tab: list):
  54. '''Initialize params from CLI arguments.'''
  55. for option in options:
  56. for option_name in options[option]:
  57. if option_name in tab:
  58. params[option] = True
  59. break
  60. def print_help(name):
  61. '''Prints some help message.'''
  62. print(f'''SYNOPSIS
  63. {name} [OPTIONS] [PATHS...]
  64. DESCRIPTION
  65. {', '.join(options["help"])}
  66. prints this help message
  67. {', '.join(options["recursive"])}
  68. run on all subfolders of paths given
  69. {', '.join(options["old-file"])}
  70. create *.old files
  71. {', '.join(options["break-long-lines"])}
  72. add extra line breaks before and after long strings
  73. {', '.join(options["print-source"])}
  74. add comments denoting the source file
  75. {', '.join(options["verbose"])}
  76. add output information
  77. {', '.join(options["truncate-unused"])}
  78. delete unused strings from files
  79. ''')
  80. def main():
  81. '''Main function'''
  82. set_params(_argv)
  83. set_params_folders(_argv)
  84. if params["help"]:
  85. print_help(_argv[0])
  86. else:
  87. # Add recursivity message
  88. print("Running ", end='')
  89. if params["recursive"]:
  90. print("recursively ", end='')
  91. # Running
  92. if len(params["folders"]) >= 2:
  93. print("on folder list:", params["folders"])
  94. for f in params["folders"]:
  95. if params["recursive"]:
  96. run_all_subfolders(f)
  97. else:
  98. update_folder(f)
  99. elif len(params["folders"]) == 1:
  100. print("on folder", params["folders"][0])
  101. if params["recursive"]:
  102. run_all_subfolders(params["folders"][0])
  103. else:
  104. update_folder(params["folders"][0])
  105. else:
  106. print("on folder", os.path.abspath("./"))
  107. if params["recursive"]:
  108. run_all_subfolders(os.path.abspath("./"))
  109. else:
  110. update_folder(os.path.abspath("./"))
  111. # Group 2 will be the string, groups 1 and 3 will be the delimiters (" or ')
  112. # See https://stackoverflow.com/questions/46967465/regex-match-text-in-either-single-or-double-quote
  113. pattern_lua_quoted = re.compile(
  114. r'(?:^|[\.=,{\(\s])' # Look for beginning of file or anything that isn't a function identifier
  115. r'N?F?S\s*\(\s*' # Matches S, FS, NS or NFS function call
  116. r'(["\'])((?:\\\1|(?:(?!\1)).)*)(\1)' # Quoted string
  117. r'[\s,\)]', # End of call or argument
  118. re.DOTALL)
  119. # Handles the [[ ... ]] string delimiters
  120. pattern_lua_bracketed = re.compile(
  121. r'(?:^|[\.=,{\(\s])' # Same as for pattern_lua_quoted
  122. r'N?F?S\s*\(\s*' # Same as for pattern_lua_quoted
  123. r'\[\[(.*?)\]\]' # [[ ... ]] string delimiters
  124. r'[\s,\)]', # Same as for pattern_lua_quoted
  125. re.DOTALL)
  126. # Handles "concatenation" .. " of strings"
  127. pattern_concat = re.compile(r'["\'][\s]*\.\.[\s]*["\']', re.DOTALL)
  128. # Handles a translation line in *.tr file.
  129. # Group 1 is the source string left of the equals sign.
  130. # Group 2 is the translated string, right of the equals sign.
  131. pattern_tr = re.compile(
  132. r'(.*)' # Source string
  133. # the separating equals sign, if NOT preceded by @, unless
  134. # that @ is preceded by another @
  135. r'(?:(?<!(?<!@)@)=)'
  136. r'(.*)' # Translation string
  137. )
  138. pattern_name = re.compile(r'^name[ ]*=[ ]*([^ \n]*)')
  139. pattern_tr_filename = re.compile(r'\.tr$')
  140. # Matches bad use of @ signs in Lua string
  141. pattern_bad_luastring = re.compile(
  142. r'^@$|' # single @, OR
  143. r'[^@]@$|' # trailing unescaped @, OR
  144. r'(?<!@)@(?=[^@1-9])' # an @ that is not escaped or part of a placeholder
  145. )
  146. # Attempt to read the mod's name from the mod.conf file or folder name. Returns None on failure
  147. def get_modname(folder):
  148. try:
  149. with open(os.path.join(folder, "mod.conf"), "r", encoding='utf-8') as mod_conf:
  150. for line in mod_conf:
  151. match = pattern_name.match(line)
  152. if match:
  153. return match.group(1)
  154. except FileNotFoundError:
  155. folder_name = os.path.basename(folder)
  156. # Special case when run in Minetest's builtin directory
  157. return "__builtin" if folder_name == "builtin" else folder_name
  158. # If there are already .tr files in /locale, returns a list of their names
  159. def get_existing_tr_files(folder):
  160. out = []
  161. for root, dirs, files in os.walk(os.path.join(folder, 'locale/')):
  162. for name in files:
  163. if pattern_tr_filename.search(name):
  164. out.append(name)
  165. return out
  166. # from https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python/600612#600612
  167. # Creates a directory if it doesn't exist, silently does
  168. # nothing if it already exists
  169. def mkdir_p(path):
  170. try:
  171. os.makedirs(path)
  172. except OSError as exc: # Python >2.5
  173. if exc.errno == errno.EEXIST and os.path.isdir(path):
  174. pass
  175. else: raise
  176. # Converts the template dictionary to a text to be written as a file
  177. # dKeyStrings is a dictionary of localized string to source file sets
  178. # dOld is a dictionary of existing translations and comments from
  179. # the previous version of this text
  180. def strings_to_text(dkeyStrings, dOld, mod_name, header_comments, textdomain):
  181. # if textdomain is specified, insert it at the top
  182. if textdomain != None:
  183. lOut = [textdomain] # argument is full textdomain line
  184. # otherwise, use mod name as textdomain automatically
  185. else:
  186. lOut = [f"# textdomain: {mod_name}"]
  187. if header_comments is not None:
  188. lOut.append(header_comments)
  189. dGroupedBySource = {}
  190. for key in dkeyStrings:
  191. sourceList = list(dkeyStrings[key])
  192. sourceString = "\n".join(sourceList)
  193. listForSource = dGroupedBySource.get(sourceString, [])
  194. listForSource.append(key)
  195. dGroupedBySource[sourceString] = listForSource
  196. lSourceKeys = list(dGroupedBySource.keys())
  197. lSourceKeys.sort()
  198. for source in lSourceKeys:
  199. localizedStrings = dGroupedBySource[source]
  200. if params["print-source"]:
  201. if lOut[-1] != "":
  202. lOut.append("")
  203. lOut.append(source)
  204. for localizedString in localizedStrings:
  205. val = dOld.get(localizedString, {})
  206. translation = val.get("translation", "")
  207. comment = val.get("comment")
  208. if params["break-long-lines"] and len(localizedString) > doublespace_threshold and not lOut[-1] == "":
  209. lOut.append("")
  210. if comment != None and comment != "" and not comment.startswith("# textdomain:"):
  211. lOut.append(comment)
  212. lOut.append(f"{localizedString}={translation}")
  213. if params["break-long-lines"] and len(localizedString) > doublespace_threshold:
  214. lOut.append("")
  215. unusedExist = False
  216. if not params["truncate-unused"]:
  217. for key in dOld:
  218. if key not in dkeyStrings:
  219. val = dOld[key]
  220. translation = val.get("translation")
  221. comment = val.get("comment")
  222. # only keep an unused translation if there was translated
  223. # text or a comment associated with it
  224. if translation != None and (translation != "" or comment):
  225. if not unusedExist:
  226. unusedExist = True
  227. lOut.append("\n\n" + comment_unused + "\n")
  228. if params["break-long-lines"] and len(key) > doublespace_threshold and not lOut[-1] == "":
  229. lOut.append("")
  230. if comment != None:
  231. lOut.append(comment)
  232. lOut.append(f"{key}={translation}")
  233. if params["break-long-lines"] and len(key) > doublespace_threshold:
  234. lOut.append("")
  235. return "\n".join(lOut) + '\n'
  236. # Writes a template.txt file
  237. # dkeyStrings is the dictionary returned by generate_template
  238. def write_template(templ_file, dkeyStrings, mod_name):
  239. # read existing template file to preserve comments
  240. existing_template = import_tr_file(templ_file)
  241. text = strings_to_text(dkeyStrings, existing_template[0], mod_name, existing_template[2], existing_template[3])
  242. mkdir_p(os.path.dirname(templ_file))
  243. with open(templ_file, "wt", encoding='utf-8') as template_file:
  244. template_file.write(text)
  245. # Gets all translatable strings from a lua file
  246. def read_lua_file_strings(lua_file):
  247. lOut = []
  248. with open(lua_file, encoding='utf-8') as text_file:
  249. text = text_file.read()
  250. text = re.sub(pattern_concat, "", text)
  251. strings = []
  252. for s in pattern_lua_quoted.findall(text):
  253. strings.append(s[1])
  254. for s in pattern_lua_bracketed.findall(text):
  255. strings.append(s)
  256. for s in strings:
  257. found_bad = pattern_bad_luastring.search(s)
  258. if found_bad:
  259. print("SYNTAX ERROR: Unescaped '@' in Lua string: " + s)
  260. continue
  261. s = s.replace('\\"', '"')
  262. s = s.replace("\\'", "'")
  263. s = s.replace("\n", "@n")
  264. s = s.replace("\\n", "@n")
  265. s = s.replace("=", "@=")
  266. lOut.append(s)
  267. return lOut
  268. # Gets strings from an existing translation file
  269. # returns both a dictionary of translations
  270. # and the full original source text so that the new text
  271. # can be compared to it for changes.
  272. # Returns also header comments in the third return value.
  273. def import_tr_file(tr_file):
  274. dOut = {}
  275. text = None
  276. in_header = True
  277. header_comments = None
  278. textdomain = None
  279. if os.path.exists(tr_file):
  280. with open(tr_file, "r", encoding='utf-8') as existing_file :
  281. # save the full text to allow for comparison
  282. # of the old version with the new output
  283. text = existing_file.read()
  284. existing_file.seek(0)
  285. # a running record of the current comment block
  286. # we're inside, to allow preceeding multi-line comments
  287. # to be retained for a translation line
  288. latest_comment_block = None
  289. for line in existing_file.readlines():
  290. line = line.rstrip('\n')
  291. # "##### not used anymore #####" comment
  292. if line == comment_unused:
  293. # Always delete the 'not used anymore' comment.
  294. # It will be re-added to the file if neccessary.
  295. latest_comment_block = None
  296. if header_comments != None:
  297. in_header = False
  298. continue
  299. # Comment lines
  300. elif line.startswith("#"):
  301. # Source file comments: ##[ file.lua ]##
  302. if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix):
  303. # This line marks the end of header comments.
  304. if params["print-source"]:
  305. in_header = False
  306. # Remove those comments; they may be added back automatically.
  307. continue
  308. # Store first occurance of textdomain
  309. # discard all subsequent textdomain lines
  310. if line.startswith("# textdomain:"):
  311. if textdomain == None:
  312. textdomain = line
  313. continue
  314. elif in_header:
  315. # Save header comments (normal comments at top of file)
  316. if not header_comments:
  317. header_comments = line
  318. else:
  319. header_comments = header_comments + "\n" + line
  320. else:
  321. # Save normal comments
  322. if line.startswith("# textdomain:") and textdomain == None:
  323. textdomain = line
  324. elif not latest_comment_block:
  325. latest_comment_block = line
  326. else:
  327. latest_comment_block = latest_comment_block + "\n" + line
  328. continue
  329. match = pattern_tr.match(line)
  330. if match:
  331. # this line is a translated line
  332. outval = {}
  333. outval["translation"] = match.group(2)
  334. if latest_comment_block:
  335. # if there was a comment, record that.
  336. outval["comment"] = latest_comment_block
  337. latest_comment_block = None
  338. in_header = False
  339. dOut[match.group(1)] = outval
  340. return (dOut, text, header_comments, textdomain)
  341. # like os.walk but returns sorted filenames
  342. def sorted_os_walk(folder):
  343. tuples = []
  344. t = 0
  345. for root, dirs, files in os.walk(folder):
  346. tuples.append( (root, dirs, files) )
  347. t = t + 1
  348. tuples = sorted(tuples)
  349. paths_and_files = []
  350. f = 0
  351. for tu in tuples:
  352. root = tu[0]
  353. dirs = tu[1]
  354. files = tu[2]
  355. files = sorted(files, key=str.lower)
  356. for filename in files:
  357. paths_and_files.append( (os.path.join(root, filename), filename) )
  358. f = f + 1
  359. return paths_and_files
  360. # Walks all lua files in the mod folder, collects translatable strings,
  361. # and writes it to a template.txt file
  362. # Returns a dictionary of localized strings to source file lists
  363. # that can be used with the strings_to_text function.
  364. def generate_template(folder, mod_name):
  365. dOut = {}
  366. paths_and_files = sorted_os_walk(folder)
  367. for paf in paths_and_files:
  368. fullpath_filename = paf[0]
  369. filename = paf[1]
  370. if fnmatch.fnmatch(filename, "*.lua"):
  371. found = read_lua_file_strings(fullpath_filename)
  372. if params["verbose"]:
  373. print(f"{fullpath_filename}: {str(len(found))} translatable strings")
  374. for s in found:
  375. sources = dOut.get(s, set())
  376. sources.add(os.path.relpath(fullpath_filename, start=folder))
  377. dOut[s] = sources
  378. if len(dOut) == 0:
  379. return None
  380. # Convert source file set to list, sort it and add comment symbols.
  381. # Needed because a set is unsorted and might result in unpredictable.
  382. # output orders if any source string appears in multiple files.
  383. for d in dOut:
  384. sources = dOut.get(d, set())
  385. sources = sorted(list(sources), key=str.lower)
  386. newSources = []
  387. for i in sources:
  388. i = "/".join(os.path.split(i)).lstrip("/")
  389. newSources.append(f"{symbol_source_prefix} {i} {symbol_source_suffix}")
  390. dOut[d] = newSources
  391. templ_file = os.path.join(folder, "locale/template.txt")
  392. write_template(templ_file, dOut, mod_name)
  393. return dOut
  394. # Updates an existing .tr file, copying the old one to a ".old" file
  395. # if any changes have happened
  396. # dNew is the data used to generate the template, it has all the
  397. # currently-existing localized strings
  398. def update_tr_file(dNew, mod_name, tr_file):
  399. if params["verbose"]:
  400. print(f"updating {tr_file}")
  401. tr_import = import_tr_file(tr_file)
  402. dOld = tr_import[0]
  403. textOld = tr_import[1]
  404. textNew = strings_to_text(dNew, dOld, mod_name, tr_import[2], tr_import[3])
  405. if textOld and textOld != textNew:
  406. print(f"{tr_file} has changed.")
  407. if params["old-file"]:
  408. shutil.copyfile(tr_file, f"{tr_file}.old")
  409. with open(tr_file, "w", encoding='utf-8') as new_tr_file:
  410. new_tr_file.write(textNew)
  411. # Updates translation files for the mod in the given folder
  412. def update_mod(folder):
  413. if not os.path.exists(os.path.join(folder, "init.lua")):
  414. print(f"Mod folder {folder} is missing init.lua, aborting.")
  415. exit(1)
  416. assert not is_modpack(folder)
  417. modname = get_modname(folder)
  418. print(f"Updating translations for {modname}")
  419. data = generate_template(folder, modname)
  420. if data == None:
  421. print(f"No translatable strings found in {modname}")
  422. else:
  423. for tr_file in get_existing_tr_files(folder):
  424. update_tr_file(data, modname, os.path.join(folder, "locale/", tr_file))
  425. def is_modpack(folder):
  426. return os.path.exists(os.path.join(folder, "modpack.txt")) or os.path.exists(os.path.join(folder, "modpack.conf"))
  427. def is_game(folder):
  428. return os.path.exists(os.path.join(folder, "game.conf")) and os.path.exists(os.path.join(folder, "mods"))
  429. # Determines if the folder being pointed to is a game, mod or a mod pack
  430. # and then runs update_mod accordingly
  431. def update_folder(folder):
  432. if is_game(folder):
  433. run_all_subfolders(os.path.join(folder, "mods"))
  434. elif is_modpack(folder):
  435. run_all_subfolders(folder)
  436. else:
  437. update_mod(folder)
  438. print("Done.")
  439. def run_all_subfolders(folder):
  440. for modfolder in [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')]:
  441. update_folder(modfolder)
  442. main()