# MIT License
#
# Copyright The SCons Foundation
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""Tool-specific initialization for Docbook.
There normally shouldn't be any need to import this module directly.
It will usually be imported through the generic SCons.Tool.Tool()
selection method.
"""
import os
import glob
import re
import SCons.Action
import SCons.Builder
import SCons.Defaults
import SCons.Script
import SCons.Tool
import SCons.Util
__debug_tool_location = False
# Get full path to this script
scriptpath = os.path.dirname(os.path.realpath(__file__))
# Local folder for the collection of DocBook XSLs
db_xsl_folder = 'docbook-xsl-1.76.1'
# Do we have lxml?
has_lxml = True
try:
import lxml
except Exception:
has_lxml = False
# Set this to True, to prefer xsltproc over lxml
prefer_xsltproc = False
# Regexs for parsing Docbook XML sources of MAN pages
re_manvolnum = re.compile(r"([^<]*)")
re_refname = re.compile(r"([^<]*)")
#
# lxml etree XSLT global max traversal depth
#
lmxl_xslt_global_max_depth = 3600
if has_lxml and lmxl_xslt_global_max_depth:
def __lxml_xslt_set_global_max_depth(max_depth) -> None:
from lxml import etree
etree.XSLT.set_global_max_depth(max_depth)
__lxml_xslt_set_global_max_depth(lmxl_xslt_global_max_depth)
#
# Helper functions
#
def __extend_targets_sources(target, source):
""" Prepare the lists of target and source files. """
if not SCons.Util.is_List(target):
target = [target]
if not source:
source = target[:]
elif not SCons.Util.is_List(source):
source = [source]
if len(target) < len(source):
target.extend(source[len(target):])
return target, source
def __init_xsl_stylesheet(kw, env, user_xsl_var, default_path) -> None:
if kw.get('DOCBOOK_XSL','') == '':
xsl_style = kw.get('xsl', env.subst(user_xsl_var))
if xsl_style == '':
path_args = [scriptpath, db_xsl_folder] + default_path
xsl_style = os.path.join(*path_args)
kw['DOCBOOK_XSL'] = xsl_style
def __select_builder(lxml_builder, cmdline_builder):
""" Selects a builder, based on which Python modules are present. """
if has_lxml and not prefer_xsltproc:
return lxml_builder
return cmdline_builder
def __ensure_suffix(t, suffix):
""" Ensure that the target t has the given suffix. """
tpath = str(t)
if not tpath.endswith(suffix):
return tpath+suffix
return t
def __ensure_suffix_stem(t, suffix):
""" Ensure that the target t has the given suffix, and return the file's stem. """
tpath = str(t)
if not tpath.endswith(suffix):
stem = tpath
tpath += suffix
return tpath, stem
else:
stem, ext = os.path.splitext(tpath)
return t, stem
def __get_xml_text(root):
""" Return the text for the given root node (xml.dom.minidom). """
txt = ""
for e in root.childNodes:
if e.nodeType == e.TEXT_NODE:
txt += e.data
return txt
def __create_output_dir(base_dir) -> None:
""" Ensure that the output directory base_dir exists. """
root, tail = os.path.split(base_dir)
dir = None
if tail:
if base_dir.endswith('/'):
dir = base_dir
else:
dir = root
else:
if base_dir.endswith('/'):
dir = base_dir
if dir and not os.path.isdir(dir):
os.makedirs(dir)
#
# Supported command line tools and their call "signature"
#
xsltproc_com_priority = ['xsltproc', 'saxon', 'saxon-xslt', 'xalan']
# TODO: Set minimum version of saxon-xslt to be 8.x (lower than this only supports xslt 1.0.
# see: https://saxon.sourceforge.net/saxon6.5.5/
# see: https://saxon.sourceforge.net/
xsltproc_com = {'xsltproc' : '$DOCBOOK_XSLTPROC $DOCBOOK_XSLTPROCFLAGS -o $TARGET $DOCBOOK_XSL $SOURCE',
'saxon' : '$DOCBOOK_XSLTPROC $DOCBOOK_XSLTPROCFLAGS -o $TARGET $DOCBOOK_XSL $SOURCE $DOCBOOK_XSLTPROCPARAMS',
# Note if saxon-xslt is version 5.5 the proper arguments are: (swap order of docbook_xsl and source)
# 'saxon-xslt' : '$DOCBOOK_XSLTPROC $DOCBOOK_XSLTPROCFLAGS -o $TARGET $SOURCE $DOCBOOK_XSL $DOCBOOK_XSLTPROCPARAMS',
'saxon-xslt' : '$DOCBOOK_XSLTPROC $DOCBOOK_XSLTPROCFLAGS -o $TARGET $DOCBOOK_XSL $SOURCE $DOCBOOK_XSLTPROCPARAMS',
'xalan' : '$DOCBOOK_XSLTPROC $DOCBOOK_XSLTPROCFLAGS -q -out $TARGET -xsl $DOCBOOK_XSL -in $SOURCE'}
xmllint_com = {'xmllint' : '$DOCBOOK_XMLLINT $DOCBOOK_XMLLINTFLAGS --xinclude $SOURCE > $TARGET'}
fop_com = {'fop' : '$DOCBOOK_FOP $DOCBOOK_FOPFLAGS -fo $SOURCE -pdf $TARGET',
'xep' : '$DOCBOOK_FOP $DOCBOOK_FOPFLAGS -valid -fo $SOURCE -pdf $TARGET',
'jw' : '$DOCBOOK_FOP $DOCBOOK_FOPFLAGS -f docbook -b pdf $SOURCE -o $TARGET'}
def __detect_cl_tool(env, chainkey, cdict, cpriority=None) -> None:
"""
Helper function, picks a command line tool from the list
and initializes its environment variables.
"""
if env.get(chainkey,'') == '':
clpath = ''
if cpriority is None:
cpriority = cdict.keys()
for cltool in cpriority:
if __debug_tool_location:
print("DocBook: Looking for %s"%cltool)
clpath = env.WhereIs(cltool)
if clpath:
if __debug_tool_location:
print("DocBook: Found:%s"%cltool)
env[chainkey] = clpath
if not env[chainkey + 'COM']:
env[chainkey + 'COM'] = cdict[cltool]
break
def _detect(env) -> None:
"""
Detect all the command line tools that we might need for creating
the requested output formats.
"""
global prefer_xsltproc
if env.get('DOCBOOK_PREFER_XSLTPROC',''):
prefer_xsltproc = True
if (not has_lxml) or prefer_xsltproc:
# Try to find the XSLT processors
__detect_cl_tool(env, 'DOCBOOK_XSLTPROC', xsltproc_com, xsltproc_com_priority)
__detect_cl_tool(env, 'DOCBOOK_XMLLINT', xmllint_com)
__detect_cl_tool(env, 'DOCBOOK_FOP', fop_com, ['fop','xep','jw'])
#
# Scanners
#
include_re = re.compile(r'fileref\\s*=\\s*["|\']([^\\n]*)["|\']')
sentity_re = re.compile(r'')
def __xml_scan(node, env, path, arg):
""" Simple XML file scanner, detecting local images and XIncludes as implicit dependencies. """
# Does the node exist yet?
if not os.path.isfile(str(node)):
return []
if env.get('DOCBOOK_SCANENT',''):
# Use simple pattern matching for system entities..., no support
# for recursion yet.
contents = node.get_text_contents()
return sentity_re.findall(contents)
xsl_file = os.path.join(scriptpath,'utils','xmldepend.xsl')
if not has_lxml or prefer_xsltproc:
# Try to call xsltproc
xsltproc = env.subst("$DOCBOOK_XSLTPROC")
if xsltproc and xsltproc.endswith('xsltproc'):
result = env.backtick(' '.join([xsltproc, xsl_file, str(node)]))
depfiles = [x.strip() for x in str(result).splitlines() if x.strip() != "" and not x.startswith(" None:
"""Generate the *.epub file from intermediate outputs
Constructs the epub file according to the Open Container Format. This
function could be replaced by a call to the SCons Zip builder if support
was added for different compression formats for separate source nodes.
"""
with zipfile.ZipFile(str(target[0]), 'w') as zf:
with open('mimetype', 'w') as mime_file:
mime_file.write('application/epub+zip')
zf.write(mime_file.name, compress_type = zipfile.ZIP_STORED)
for s in source:
if os.path.isfile(str(s)):
head, tail = os.path.split(str(s))
if not head:
continue
s = head
for dirpath, dirnames, filenames in os.walk(str(s)):
for fname in filenames:
path = os.path.join(dirpath, fname)
if os.path.isfile(path):
zf.write(path, os.path.relpath(path, str(env.get('ZIPROOT', ''))),
zipfile.ZIP_DEFLATED)
def add_resources(target, source, env) -> None:
"""Add missing resources to the OEBPS directory
Ensure all the resources in the manifest are present in the OEBPS directory.
"""
hrefs = []
content_file = os.path.join(source[0].get_abspath(), 'content.opf')
if not os.path.isfile(content_file):
return
hrefs = []
if has_lxml:
from lxml import etree
opf = etree.parse(content_file)
# All the opf:item elements are resources
for item in opf.xpath('//opf:item',
namespaces= { 'opf': 'http://www.idpf.org/2007/opf' }):
hrefs.append(item.attrib['href'])
for href in hrefs:
# If the resource was not already created by DocBook XSL itself,
# copy it into the OEBPS folder
referenced_file = os.path.join(source[0].get_abspath(), href)
if not os.path.exists(referenced_file):
shutil.copy(href, os.path.join(source[0].get_abspath(), href))
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_EPUB', ['epub','docbook.xsl'])
# Setup builder
__builder = __select_builder(__lxml_noresult_builder, __xsltproc_nobase_builder)
# Create targets
result = []
if not env.GetOption('clean'):
# Ensure that the folders OEBPS and META-INF exist
__create_output_dir('OEBPS/')
__create_output_dir('META-INF/')
dirs = env.Dir(['OEBPS', 'META-INF'])
# Set the fixed base_dir
kw['base_dir'] = 'OEBPS/'
tocncx = __builder(env, 'toc.ncx', source[0], **kw)
cxml = env.File('META-INF/container.xml')
env.SideEffect(cxml, tocncx)
env.Depends(tocncx, kw['DOCBOOK_XSL'])
result.extend(tocncx+[cxml])
container = env.Command(__ensure_suffix(str(target[0]), '.epub'),
tocncx+[cxml], [add_resources, build_open_container])
mimetype = env.File('mimetype')
env.SideEffect(mimetype, container)
result.extend(container)
# Add supporting files for cleanup
env.Clean(tocncx, dirs)
return result
def DocbookHtml(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for HTML output.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTML', ['html','docbook.xsl'])
# Setup builder
__builder = __select_builder(__lxml_builder, __xsltproc_builder)
# Create targets
result = []
for t,s in zip(target,source):
r = __builder(env, __ensure_suffix(t,'.html'), s, **kw)
env.Depends(r, kw['DOCBOOK_XSL'])
result.extend(r)
return result
def DocbookHtmlChunked(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for chunked HTML output.
"""
# Init target/source
if not SCons.Util.is_List(target):
target = [target]
if not source:
source = target
target = ['index.html']
elif not SCons.Util.is_List(source):
source = [source]
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTMLCHUNKED', ['html','chunkfast.xsl'])
# Setup builder
__builder = __select_builder(__lxml_noresult_builder, __xsltproc_nobase_builder)
# Detect base dir
base_dir = kw.get('base_dir', '')
if base_dir:
__create_output_dir(base_dir)
# Create targets
result = []
r = __builder(env, __ensure_suffix(str(target[0]), '.html'), source[0], **kw)
env.Depends(r, kw['DOCBOOK_XSL'])
result.extend(r)
# Add supporting files for cleanup
env.Clean(r, glob.glob(os.path.join(base_dir, '*.html')))
return result
def DocbookHtmlhelp(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for HTMLHELP output.
"""
# Init target/source
if not SCons.Util.is_List(target):
target = [target]
if not source:
source = target
target = ['index.html']
elif not SCons.Util.is_List(source):
source = [source]
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_HTMLHELP', ['htmlhelp','htmlhelp.xsl'])
# Setup builder
__builder = __select_builder(__lxml_noresult_builder, __xsltproc_nobase_builder)
# Detect base dir
base_dir = kw.get('base_dir', '')
if base_dir:
__create_output_dir(base_dir)
# Create targets
result = []
r = __builder(env, __ensure_suffix(str(target[0]), '.html'), source[0], **kw)
env.Depends(r, kw['DOCBOOK_XSL'])
result.extend(r)
# Add supporting files for cleanup
env.Clean(r, ['toc.hhc', 'htmlhelp.hhp', 'index.hhk'] +
glob.glob(os.path.join(base_dir, '[ar|bk|ch]*.html')))
return result
def DocbookPdf(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for PDF output.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_PDF', ['fo','docbook.xsl'])
# Setup builder
__builder = __select_builder(__lxml_builder, __xsltproc_builder)
# Create targets
result = []
for t,s in zip(target,source):
t, stem = __ensure_suffix_stem(t, '.pdf')
xsl = __builder(env, stem+'.fo', s, **kw)
result.extend(xsl)
env.Depends(xsl, kw['DOCBOOK_XSL'])
result.extend(__fop_builder(env, t, xsl, **kw))
return result
def DocbookMan(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for Man page output.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_MAN', ['manpages','docbook.xsl'])
# Setup builder
__builder = __select_builder(__lxml_noresult_builder, __xsltproc_builder)
# Create targets
result = []
for t,s in zip(target,source):
volnum = "1"
outfiles = []
srcfile = __ensure_suffix(str(s),'.xml')
if os.path.isfile(srcfile):
try:
import xml.dom.minidom
dom = xml.dom.minidom.parse(__ensure_suffix(str(s),'.xml'))
# Extract volume number, default is 1
for node in dom.getElementsByTagName('refmeta'):
for vol in node.getElementsByTagName('manvolnum'):
volnum = __get_xml_text(vol)
# Extract output filenames
for node in dom.getElementsByTagName('refnamediv'):
for ref in node.getElementsByTagName('refname'):
outfiles.append(__get_xml_text(ref)+'.'+volnum)
except Exception:
# Use simple regex parsing
with open(__ensure_suffix(str(s), '.xml')) as f:
content = f.read()
for m in re_manvolnum.finditer(content):
volnum = m.group(1)
for m in re_refname.finditer(content):
outfiles.append(m.group(1)+'.'+volnum)
if not outfiles:
# Use stem of the source file
spath = str(s)
if not spath.endswith('.xml'):
outfiles.append(spath+'.'+volnum)
else:
stem, ext = os.path.splitext(spath)
outfiles.append(stem+'.'+volnum)
else:
# We have to completely rely on the given target name
outfiles.append(t)
__builder(env, outfiles[0], s, **kw)
env.Depends(outfiles[0], kw['DOCBOOK_XSL'])
result.append(outfiles[0])
if len(outfiles) > 1:
env.Clean(outfiles[0], outfiles[1:])
return result
def DocbookSlidesPdf(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for PDF slides output.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_SLIDESPDF', ['slides','fo','plain.xsl'])
# Setup builder
__builder = __select_builder(__lxml_builder, __xsltproc_builder)
# Create targets
result = []
for t,s in zip(target,source):
t, stem = __ensure_suffix_stem(t, '.pdf')
xsl = __builder(env, stem+'.fo', s, **kw)
env.Depends(xsl, kw['DOCBOOK_XSL'])
result.extend(xsl)
result.extend(__fop_builder(env, t, xsl, **kw))
return result
def DocbookSlidesHtml(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, providing a Docbook toolchain for HTML slides output.
"""
# Init list of targets/sources
if not SCons.Util.is_List(target):
target = [target]
if not source:
source = target
target = ['index.html']
elif not SCons.Util.is_List(source):
source = [source]
# Init XSL stylesheet
__init_xsl_stylesheet(kw, env, '$DOCBOOK_DEFAULT_XSL_SLIDESHTML', ['slides','xhtml','plain.xsl'])
# Setup builder
__builder = __select_builder(__lxml_builder, __xsltproc_builder)
# Detect base dir
base_dir = kw.get('base_dir', '')
if base_dir:
__create_output_dir(base_dir)
# Create targets
result = []
r = __builder(env, __ensure_suffix(str(target[0]), '.html'), source[0], **kw)
env.Depends(r, kw['DOCBOOK_XSL'])
result.extend(r)
# Add supporting files for cleanup
env.Clean(r, [os.path.join(base_dir, 'toc.html')] +
glob.glob(os.path.join(base_dir, 'foil*.html')))
return result
def DocbookXInclude(env, target, source, *args, **kw):
"""
A pseudo-Builder, for resolving XIncludes in a separate processing step.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Setup builder
__builder = __select_builder(__xinclude_lxml_builder,__xmllint_builder)
# Create targets
result = []
for t,s in zip(target,source):
result.extend(__builder(env, t, s, **kw))
return result
def DocbookXslt(env, target, source=None, *args, **kw):
"""
A pseudo-Builder, applying a simple XSL transformation to the input file.
"""
# Init list of targets/sources
target, source = __extend_targets_sources(target, source)
# Init XSL stylesheet
kw['DOCBOOK_XSL'] = env.File(kw.get('xsl', 'transform.xsl'))
# Setup builder
__builder = __select_builder(__lxml_builder, __xsltproc_builder)
# Create targets
result = []
for t,s in zip(target,source):
r = __builder(env, t, s, **kw)
env.Depends(r, kw['DOCBOOK_XSL'])
result.extend(r)
return result
def generate(env) -> None:
"""Add Builders and construction variables for docbook to an Environment."""
env.SetDefault(
# Default names for customized XSL stylesheets
DOCBOOK_DEFAULT_XSL_EPUB = '',
DOCBOOK_DEFAULT_XSL_HTML = '',
DOCBOOK_DEFAULT_XSL_HTMLCHUNKED = '',
DOCBOOK_DEFAULT_XSL_HTMLHELP = '',
DOCBOOK_DEFAULT_XSL_PDF = '',
DOCBOOK_DEFAULT_XSL_MAN = '',
DOCBOOK_DEFAULT_XSL_SLIDESPDF = '',
DOCBOOK_DEFAULT_XSL_SLIDESHTML = '',
# Paths to the detected executables
DOCBOOK_XSLTPROC = '',
DOCBOOK_XMLLINT = '',
DOCBOOK_FOP = '',
# Additional flags for the text processors
DOCBOOK_XSLTPROCFLAGS = SCons.Util.CLVar(''),
DOCBOOK_XMLLINTFLAGS = SCons.Util.CLVar(''),
DOCBOOK_FOPFLAGS = SCons.Util.CLVar(''),
DOCBOOK_XSLTPROCPARAMS = SCons.Util.CLVar(''),
# Default command lines for the detected executables
DOCBOOK_XSLTPROCCOM = xsltproc_com['xsltproc'],
DOCBOOK_XMLLINTCOM = xmllint_com['xmllint'],
DOCBOOK_FOPCOM = fop_com['fop'],
# Screen output for the text processors
DOCBOOK_XSLTPROCCOMSTR = None,
DOCBOOK_XMLLINTCOMSTR = None,
DOCBOOK_FOPCOMSTR = None,
)
_detect(env)
env.AddMethod(DocbookEpub, "DocbookEpub")
env.AddMethod(DocbookHtml, "DocbookHtml")
env.AddMethod(DocbookHtmlChunked, "DocbookHtmlChunked")
env.AddMethod(DocbookHtmlhelp, "DocbookHtmlhelp")
env.AddMethod(DocbookPdf, "DocbookPdf")
env.AddMethod(DocbookMan, "DocbookMan")
env.AddMethod(DocbookSlidesPdf, "DocbookSlidesPdf")
env.AddMethod(DocbookSlidesHtml, "DocbookSlidesHtml")
env.AddMethod(DocbookXInclude, "DocbookXInclude")
env.AddMethod(DocbookXslt, "DocbookXslt")
def exists(env) -> bool:
return True