Improve add_toc_to_github_wiki_page.py

- reformat using `black`
- add an option `--max-level`
- quote anchors (for accentuated character)
- fix linter errors
This commit is contained in:
Laurent Rineau 2023-11-17 11:25:44 +01:00
parent 29675319c0
commit 73bd84ddd1
1 changed files with 98 additions and 93 deletions

View File

@ -1,125 +1,130 @@
from sys import argv
from sys import exit
import codecs import codecs
import re import re
import argparse import argparse
import sys
from urllib.parse import quote
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("filename", parser.add_argument("filename", help="the Mardown file to process")
help="the Mardown file to process") parser.add_argument(
parser.add_argument("--codebase", "--codebase",
help="for a Markdown file of Codebase instead of Github", help="for a Markdown file of Codebase instead of Github",
action="store_true") action="store_true",
parser.add_argument("--h1", )
help="support level one sections (h1)", parser.add_argument("--h1", help="support level one sections (h1)", action="store_true")
action="store_true") parser.add_argument("--max-level", help="maximum level of sections", type=int, default = 5)
args = parser.parse_args() args = parser.parse_args()
# a probably incomplete version to generate an anchor from a section name # a probably incomplete version to generate an anchor from a section name
def get_anchor(s): def get_anchor(s):
s = s.replace("`","") s = s.replace("`", "")
s = s.replace("(","") s = s.replace("(", "")
s = s.replace(")","") s = s.replace(")", "")
s = s.replace(".","") s = s.replace(".", "")
s = s.replace("#","") s = s.replace("#", "")
s = s.replace(":","") s = s.replace(":", "")
s = s.replace(",","") s = s.replace(",", "")
s = s.replace(";","") s = s.replace(";", "")
if args.codebase: if args.codebase:
s = s.replace("/","-") s = s.replace("/", "-")
else: else:
s = s.replace("/","") s = s.replace("/", "")
s = s.replace("<","") s = s.replace("<", "")
s = s.replace(">","") s = s.replace(">", "")
s = s.replace("+","") s = s.replace("+", "")
s = s.replace("=","") s = s.replace("=", "")
s = s.replace("?","") s = s.replace("?", "")
s = s.replace("@","") s = s.replace("@", "")
s = s.lstrip(" ") s = s.lstrip(" ")
s = s.rstrip("\n") s = s.rstrip("\n")
s = s.rstrip(" ") s = s.rstrip(" ")
s = re.sub(r'\s+','-',s) s = re.sub(r"\s+", "-", s)
if not args.codebase: if not args.codebase:
s = s.lower() s = s.lower()
if args.codebase: if args.codebase:
s = s.replace("'","-and-39-") s = s.replace("'", "-and-39-")
return "#"+s return "#" + quote(s)
# indices the nesting level (first level allowed is ##) # indices the nesting level (first level allowed is ##)
def get_level(s): def get_level(s):
m = re.search('^(#+)\s', s) m = re.search(r"^(#+)\s", s)
if m: if m:
return len(m.group(1)) return len(m.group(1))
else: else:
return 0 return 0
def get_name(s): def get_name(s):
m = re.search('^#+\s+(.*)\s*$', s) m = re.search(r"^#+\s+(.*)\s*$", s)
if m: if m:
return m.group(1) return m.group(1)
else: else:
return "ERROR: Section name extraction" return "ERROR: Section name extraction"
#generate the entry for one section
# generate the entry for one section
def get_toc_entry(s): def get_toc_entry(s):
name = get_name(s) name = get_name(s)
if args.h1: if args.h1:
level = get_level(s)-1 level = get_level(s) - 1
else: else:
level = get_level(s)-2 level = get_level(s) - 2
anchor = get_anchor(s) anchor = get_anchor(s)
if level<0: if level < 0:
return "ERROR: h1 sections are not allowed" return "ERROR: h1 sections are not allowed"
res="* ["+name+"]("+anchor+")" res = "* [" + name + "](" + anchor + ")"
for i in range(0,level): for _ in range(0, level):
res=" "+res res = " " + res
return res return res
#now the main
input = args.filename
f = codecs.open(input, 'r', encoding='utf-8') # now the main
filename = args.filename
f = codecs.open(filename, "r", encoding="utf-8")
if not f: if not f:
print("Cannot open "+input+"\n") print("Cannot open " + input + "\n")
exit() sys.exit()
#look for <!--TOC--> the begin of the file # look for <!--TOC--> the begin of the file
line=f.readline() line = f.readline()
if line.find("<!--TOC-->")==-1: if line.find("<!--TOC-->") == -1:
exit() sys.exit()
#skip current TOC # skip current TOC
line=f.readline() line = f.readline()
while line and line.find("<!--TOC-->")==-1: while line and line.find("<!--TOC-->") == -1:
line=f.readline() line = f.readline()
if not line: if not line:
exit() sys.exit()
buffer="" buffer = ""
TOC="<!--TOC-->\n\n# Table of Contents\n" TOC = "<!--TOC-->\n\n# Table of Contents\n"
verbatim_mode=False # to ignore verbatim mode while looking for sections verbatim_mode = False # to ignore verbatim mode while looking for sections
TOC_empty=True TOC_empty = True
for line in f.readlines(): for line in f.readlines():
buffer+=line buffer += line
if verbatim_mode: if verbatim_mode:
if line[:3]=="```": if line[:3] == "```":
verbatim_mode=False verbatim_mode = False
else:
if line[:3]=="```":
verbatim_mode=True
else: else:
if line[0]=="#": if line[:3] == "```":
TOC+=(get_toc_entry(line)+"\n") verbatim_mode = True
TOC_empty=False else:
TOC+="\n<!--TOC-->\n" if line[0] == "#" and get_level(line) <= args.max_level:
TOC += get_toc_entry(line) + "\n"
TOC_empty = False
TOC += "\n<!--TOC-->\n"
if not TOC_empty: if not TOC_empty:
f.close() f.close()
f = codecs.open(input, 'w', encoding='utf-8') f = codecs.open(filename, "w", encoding="utf-8")
f.write(TOC) f.write(TOC)
f.write(buffer) f.write(buffer)