Improve add_toc_to_github_wiki_page.py

- reformat using `black` - add an option `--max-level` - quote anchors (for accentuated character) - fix linter errors
2023-11-17 11:25:44 +01:00 · 2023-11-17 11:25:44 +01:00 · 73bd84ddd1
parent 29675319c0
commit 73bd84ddd1
1 changed files with 98 additions and 93 deletions
--- a/Scripts/developer_scripts/add_toc_to_github_wiki_page.py
+++ b/Scripts/developer_scripts/add_toc_to_github_wiki_page.py
@ -1,20 +1,21 @@
-from sys import argv
-from sys import exit
 import codecs
 import re
 import argparse
+import sys
+from urllib.parse import quote

 parser = argparse.ArgumentParser()
-parser.add_argument("filename",
-                    help="the Mardown file to process")
-parser.add_argument("--codebase",
+parser.add_argument("filename", help="the Mardown file to process")
+parser.add_argument(
+    "--codebase",
    help="for a Markdown file of Codebase instead of Github",
-                    action="store_true")
-parser.add_argument("--h1",
-                    help="support level one sections (h1)",
-                    action="store_true")
+    action="store_true",
+)
+parser.add_argument("--h1", help="support level one sections (h1)", action="store_true")
+parser.add_argument("--max-level", help="maximum level of sections", type=int, default = 5)
 args = parser.parse_args()

+
 # a probably incomplete version to generate an anchor from a section name
 def get_anchor(s):
    s = s.replace("`", "")
@ -38,28 +39,31 @@ def get_anchor(s):
    s = s.lstrip(" ")
    s = s.rstrip("\n")
    s = s.rstrip(" ")
-  s = re.sub(r'\s+','-',s)
+    s = re.sub(r"\s+", "-", s)
    if not args.codebase:
        s = s.lower()
    if args.codebase:
        s = s.replace("'", "-and-39-")
-  return "#"+s
+    return "#" + quote(s)
+

 # indices the nesting level (first level allowed is ##)
 def get_level(s):
-  m = re.search('^(#+)\s', s)
+    m = re.search(r"^(#+)\s", s)
    if m:
        return len(m.group(1))
    else:
        return 0

+
 def get_name(s):
-  m = re.search('^#+\s+(.*)\s*$', s)
+    m = re.search(r"^#+\s+(.*)\s*$", s)
    if m:
        return m.group(1)
    else:
        return "ERROR: Section name extraction"

+
 # generate the entry for one section
 def get_toc_entry(s):
    name = get_name(s)
@ -73,23 +77,24 @@ def get_toc_entry(s):
        return "ERROR: h1 sections are not allowed"

    res = "* [" + name + "](" + anchor + ")"
-  for i in range(0,level):
+    for _ in range(0, level):
        res = "  " + res
    return res

-#now the main
-input = args.filename

-f = codecs.open(input, 'r', encoding='utf-8')
+# now the main
+filename = args.filename
+
+f = codecs.open(filename, "r", encoding="utf-8")

 if not f:
    print("Cannot open " + input + "\n")
-  exit()
+    sys.exit()

 # look for <!--TOC--> the begin of the file
 line = f.readline()
 if line.find("<!--TOC-->") == -1:
-  exit()
+    sys.exit()

 # skip current TOC
 line = f.readline()
@ -97,7 +102,7 @@ while line and line.find("<!--TOC-->")==-1:
    line = f.readline()

 if not line:
-  exit()
+    sys.exit()

 buffer = ""
 TOC = "<!--TOC-->\n\n# Table of Contents\n"
@ -113,13 +118,13 @@ for line in f.readlines():
        if line[:3] == "```":
            verbatim_mode = True
        else:
-      if line[0]=="#":
-        TOC+=(get_toc_entry(line)+"\n")
+            if line[0] == "#" and get_level(line) <= args.max_level:
+                TOC += get_toc_entry(line) + "\n"
                TOC_empty = False
 TOC += "\n<!--TOC-->\n"

 if not TOC_empty:
    f.close()
-  f = codecs.open(input, 'w', encoding='utf-8')
+    f = codecs.open(filename, "w", encoding="utf-8")
    f.write(TOC)
    f.write(buffer)