#!/usr/bin/python
# -*- coding: utf-8 -*-
#**************************************************************************************************
# DMTF - Distributed Management Task Force, Inc. - http://www.dmtf.org
# See dsp2023_readme.txt for copyright information.
#
# mrp2html.py - part of the DMTF DSP2023 zip archive.
#
# Python script for converting MRP XML to HTML.
# Invoke with -h or --help for help.
#
# OS platforms: Windows, Linux
#
# Python versions: 2.6, 2.7
#
# Additional Python packages:
#   lxml 2.3 - For Windows, download the MS Windows installer version from
#              http://pypi.python.org/pypi/lxml/2.3 because that version has libxml2 statically
#              linked and thus has no further dependencies. See the DSP2023 readme file for
#              installation instructions.
#
# Change history: See readme file of DSP2023.
#
# Last Updated: 2014-01-16
#**************************************************************************************************

import sys, os, re, string, urllib
from lxml import etree

my_name = os.path.basename(sys.argv[0])     # Command name of the script
my_version = '1.1.1'                        # Version of this script (typically also the version of
                                            # DSP2023 in which this script was included)

# Default locations of dependent files, all relative to the location of this script.
# These relative locations reflect the standard installation of DSP2023.
default_xsltfile  = "../profiles/resources/dsp8029_1.1.1.xsl"   # default location of DSP8029 XSLT file
default_cimxmlurl = "../profiles/resources/cim-xml"             # default location of CIM-XML root
default_cssurl    = "../profiles/resources/dsp8054_1.1.1.css"   # default location of DSP8054 CSS file,
default_tocgenurl = "../profiles/resources/tocgen.js"           # default location of tocgen.js file
default_catfile   = "../localcopy/catalog.xml"                  # default location of XML Catalog file

rc_success = 0  # exit code for successful completion (including possible XSLT transformation errors)
rc_error = 1    # exit code for errors during execution (other than XSLT transformation errors)
rc_usage = 2    # exit code for command line syntax errors

#--------------------------------------------------------------------------------------------------
def Usage ():
    """Print the command usage to stdout.
    """

    print ""
    print "Converts an MRP XML file into an HTML file."
    print ""
    print "Usage:"
    print ""
    print "  "+my_name+"  [options]  {xmlfile}  [{htmlfile}]"
    print ""
    print "Where:"
    print ""
    print "  {xmlfile}             Relative or absolute path name of MRP XML input file."
    print "                        The file name must end with .mrp.xml or _mrp.xml."
    print ""
    print "  {htmlfile}            Optional: Relative or absolute path name of HTML output file."
    print "                        Default: {xmlfile} with .mrp.xml changed to .html."
    print ""
    print "Options:"
    print ""                                                                          # 119 chars width at this quote position -> "
    print "  --xsltfile {file}     Use this DSP8029 XSLT file instead of the default. {file} is the file name (and path) of the"
    print "                        DSP8029 XSLT file. Relative paths are based on the current working directory when running this"
    print "                        script."
    print "                        Default: "+default_xsltfile+", relative to the location of this script."
    print ""
    print "  --cimxmlurl {file_url}    Use this CIM-XML root instead of the default. {file_url} is the directory name (and path)"
    print "                        or the URL of a directory with CIM-XML files of the underlying schemas, assuming the following"
    print "                        directory structure below the {file_url} directory:"
    print "                          {schemaname}/{m.n}[+]/{classname}.xml"
    print "                        Relative paths are based on the current working directory when running this script. XML Catalog"
    print "                        resolution is applied to the URLs of the CIM-XML files (but not used in the XML Catalog file"
    print "                        catalog.xml of DSP2023)."
    print "                        Default: "+default_cimxmlurl+", relative to the location of this script."
    print ""
    print "  --cssurl {file_url}   Use this DSP8054 CSS file instead of the default. {file_url} is the file name (and path) or the"
    print "                        URL of the DSP8054 CSS file, or one that is compatible with it. Relative paths are based on the"
    print "                        current working directory when running this script."
    print "                        Default: "+default_cssurl+", relative to the location of this script."
    print ""
    print "  --tocgenurl {file_url}    Use this tocgen.js file instead of the default. {file_url} is the file name (and path) or"
    print "                        the URL of tocgen.js of DSP2023. Relative paths are based on the current working directory when"
    print "                        running this script."
    print "                        Default: "+default_tocgenurl+", relative to the location of this script."
    print ""
    print "  --catfile {file}      Use this XML Catalog file instead of the default. {file} is the file name (and path) of the XML"
    print "                        Catalog file catalog.xml of DSP2023, or one that is compatible with it. Relative paths are"
    print "                        based on the current working directory when running this script. The conversion performed by"
    print "                        this script resolves URIs using that XML Catalog for the following files:"
    print "                          * CIM-XML files (not used in the XML Catalog file catalog.xml of DSP2023)."
    print "                          * Text registry files specified in MRP XML, such as DSP8008."
    print "                        Default: "+default_catfile+", relative to the location of this script."
    print ""
    print "  -v, --verbose         Verbose mode (prints more messages)."
    print "  -h, --help, -?        Display this help text."
    print ""
    print "Notes:"
    print "  * Relative path names for images specified in the MRP XML file (e.g. diagrams) are relative to the MRP XML file."
    print "  * This script makes any relative paths absolute before using them."
    print "  * Options can be specified anywhere between positional parameters on the command line."
    print "  * In the standard DSP2023 installation, the defaults for all options work, even if the profile location is changed."
    print ""
    print "Examples:"
    print "  "+my_name+" dsp1117_1.0.0a.mrp.xml"
    print "  "+my_name+" -v --cimxml C:\\DMTF\\MRP\\cim-xml mrp\\dsp1117_1.0.0a.mrp.xml html/dsp1117_1.0.0a.html"

    return


#--------------------------------------------------------------------------------------------------
def absUrl(filename_or_url,base_filename):
    if filename_or_url.startswith("http:"):
        abs_url = filename_or_url
    elif filename_or_url.startswith("https:"):
        abs_url = filename_or_url
    else:
        if filename_or_url.startswith("file:"):
            filename = urllib.url2pathname(filename_or_url[5:])
        else:
            filename = filename_or_url
        if os.path.isabs(filename):
            if os.name == "nt" and not re.match("[a-zA-Z]:",filename):
                # On Windows, isabs() returns True also for "\dirname", i.e. without drive letter.
                abs_filename = os.getcwd()[0:2]+filename
            else:
                abs_filename = filename
        else:
            abs_filename = os.path.normpath(os.path.join(base_filename,filename))
        abs_url = "file:"+urllib.pathname2url(abs_filename)
    return abs_url

#--------------------------------------------------------------------------------------------------
def absFilename(filename_or_url,base_filename):
    if filename_or_url.startswith("http:") or filename_or_url.startswith("https:"):
        raise ValueError("Invalid URI scheme specified: "+filename_or_url)
    else:
        if filename_or_url.startswith("file:"):
            filename = urllib.url2pathname(filename_or_url[5:])
        else:
            filename = filename_or_url
        if os.path.isabs(filename):
            if os.name == "nt" and not re.match("[a-zA-Z]:",filename):
                # On Windows, isabs() returns True also for "\dirname", i.e. without drive letter.
                abs_filename = os.getcwd()[0:2]+filename
            else:
                abs_filename = filename
        else:
            abs_filename = os.path.normpath(os.path.join(base_filename,filename))
    return abs_filename

#--------------------------------------------------------------------------------------------------
def removeCssComments(cssfiledata):

    def remover(match):
        s = match.group(0)
        if s.startswith('/'):
            sret = ""
        else:
            sret = s
        return sret

    pattern = re.compile(r'/\*.*?\*/',re.DOTALL | re.MULTILINE)
    return re.sub(pattern, remover, cssfiledata)


#--------------------------------------------------------------------------------------------------
def resolveEmbeds(line):

    def embedder(match):
        global xml_base, verbose_mode, errors
        s = match.group(0)
        if not s.startswith("#EMBED-"):
            sret = s
        else:
            filename = match.group(2)
            if filename == "":
                print "Warning: #EMBED-... statement without file specified in intermediate file: "+s
                sret = s
            else:
                if s.startswith("#EMBED-FILE-AS-BASE64"):
                    abs_url = absUrl(filename,xml_base)
                    if verbose_mode:
                        print "Info: Embedding file as base64: "+abs_url
                    try:
                        fp = urllib.urlopen(abs_url)
                    except IOError as exc:
                        print "Error: "+str(exc)
                        sret = "#ERROR: "+str(exc)
                        errors += 1
                    else:
                        filedata = fp.read()
                        fp.close()
                        sret = filedata.encode("base64")
                elif s.startswith("#EMBED-FILE"):
                    abs_url = absUrl(filename,xml_base)
                    if verbose_mode:
                        print "Info: Embedding file unchanged: "+abs_url
                    try:
                        fp = urllib.urlopen(abs_url)
                    except IOError as exc:
                        print "Error: "+str(exc)
                        sret = "#ERROR: "+str(exc)
                        errors += 1
                    else:
                        filedata = fp.read()
                        fp.close()
                        sret = filedata.replace("\r\n","\n").replace("\r","\n")
                elif s.startswith("#EMBED-CSS-FILE"):
                    abs_url = absUrl(filename,xml_base)
                    if verbose_mode:
                        print "Info: Embedding CSS file:       "+abs_url
                    try:
                        fp = urllib.urlopen(abs_url)
                    except IOError as exc:
                        print "Error: "+str(exc)
                        sret = "#ERROR: "+str(exc)
                        errors += 1
                    else:
                        filedata = fp.read()
                        fp.close()
                        sret = removeCssComments(filedata.replace("\r\n","\n").replace("\r","\n"))
                        # sret = filedata.replace("\r\n","\n").replace("\r","\n")
                else:
                    sret = s
        return sret

    pattern = re.compile(r'#EMBED-(FILE|FILE-AS-BASE64|CSS-FILE)\((.*)\)',re.DOTALL | re.MULTILINE)
    return re.sub(pattern, embedder, line)

#--------------------------------------------------------------------------------------------------
def main():

    global xml_base, verbose_mode, errors

    print my_name+" Version "+my_version

    #
    # command line parsing
    #
    pos_argv = []                       # positional command line parameters
    verbose_mode = False                # verbose mode, controlled by -v option
    xsltfile     = None                 # parameter of --xsltfile option
    cimxmlurl    = None                 # parameter of --cimxmlurl option
    cssurl       = None                 # parameter of --cssurl option
    tocgenurl    = None                 # parameter of --tocgenurl option
    catfile      = None                 # parameter of --catfile option
    _i = 1
    helpmsg = "Invoke with -h or --help for usage help."
    while _i < len(sys.argv):
        arg = sys.argv[_i]
        if arg[0] == "-":
            if arg == "-h" or arg == "--help" or arg == "-?":
                 Usage()
                 return rc_usage
            elif arg == "--xsltfile":
                _i += 1
                if _i == len(sys.argv):
                    print "Error: Missing {file} parameter for --xsltfile option."
                    print helpmsg
                    return rc_usage
                xsltfile = sys.argv[_i]
            elif arg == "--cimxmlurl":
                _i += 1
                if _i == len(sys.argv):
                    print "Error: Missing {file_url} parameter for --cimxmlurl option."
                    print helpmsg
                    return rc_usage
                cimxmlurl = sys.argv[_i]
            elif arg == "--cssurl":
                _i += 1
                if _i == len(sys.argv):
                    print "Error: Missing {file_url} parameter for --cssurl option."
                    print helpmsg
                    return rc_usage
                cssurl = sys.argv[_i]
            elif arg == "--tocgenurl":
                _i += 1
                if _i == len(sys.argv):
                    print "Error: Missing {file_url} parameter for --tocgenurl option."
                    print helpmsg
                    return rc_usage
                tocgenurl = sys.argv[_i]
            elif arg == "--catfile":
                _i += 1
                if _i == len(sys.argv):
                    print "Error: Missing {file} parameter for --catfile option."
                    print helpmsg
                    return rc_usage
                catfile = sys.argv[_i]
            elif arg == "-v" or arg == "--verbose":
                verbose_mode = True
            else:
                print "Error: Invalid command line option: "+arg
                print helpmsg
                return rc_usage
        else:
            pos_argv.append(arg)
        _i += 1
    del _i

    if len(pos_argv) == 0:
        print ""
        print "Usage:  "+my_name+"  [options]  {xmlfile}  [{htmlfile}]"
        print ""
        print "Invoke with -h or --help for more usage help."
        return rc_usage

    if len(pos_argv) > 2:
        print "Error: Too many positional command line parameters."
        print helpmsg
        return rc_usage

    xml_filename = pos_argv[0]
    if not xml_filename.endswith(".mrp.xml") and not xml_filename.endswith("_mrp.xml"):
        print "Error: XML input file does not have a file extension .mrp.xml or _mrp.xml: '%s'" % xml_filename
        return rc_usage

    if xml_filename.endswith(".mrp.xml"):
        xml_basename = xml_filename.rstrip(".mrp.xml")
    elif xml_filename.endswith("_mrp.xml"):
        xml_basename = xml_filename.rstrip("_mrp.xml")
    else:
        raise Exception("Internal Error: Unexpected file extension: '%s'" % xml_filename)

    cwd_base = os.getcwd()                                              # base path for relative paths using current working directory
    script_base = os.path.dirname(absFilename(sys.argv[0],cwd_base))    # base path for relative paths using location of this script
    xml_base = os.path.dirname(absFilename(xml_filename,cwd_base))      # base path for relative paths using location of MRP XML file

    if len(pos_argv) >= 2:
        html_filename = pos_argv[1]
    else:
        html_filename = xml_basename+".html"

    # Apply the defaults for the options.
    # Note:
    #   For defaulted options, the directory of this script is used as a base path.
    #   For specified options, the current working directory is used as a base path.

    if xsltfile != None:
        xsltfile = absFilename(xsltfile,cwd_base)
    else:
        xsltfile = absFilename(default_xsltfile,script_base)

    if catfile != None:
        catfile = absFilename(catfile,cwd_base)
    else:
        catfile = absFilename(default_catfile,script_base)

    if cimxmlurl != None:
        cimxmlurl = absUrl(cimxmlurl,cwd_base)
    else:
        cimxmlurl = absUrl(default_cimxmlurl,script_base)

    if cssurl != None:
        cssurl = absUrl(cssurl,cwd_base)
    else:
        cssurl = absUrl(default_cssurl,script_base)

    if tocgenurl != None:
        tocgenurl = absUrl(tocgenurl,cwd_base)
    else:
        tocgenurl = absUrl(default_tocgenurl,script_base)

    # Parameters for DSP8029 XSLT
    # Note: The values need to be in single quotes.
    params = dict()
    params["standalone"]       = "'true'"
    params["linenumbers"]      = "'true'"
    params["cim-xml-root-url"] = "'"+cimxmlurl+"'"
    params["css-url"]          = "'"+cssurl+"'"
    params["tocgen-url"]       = "'"+tocgenurl+"'"
    params["verbose"]          = "'true'" if verbose_mode else "'false'"

    print "Processing MRP XML file:  "+xml_filename
    print "Generating HTML file:     "+html_filename

    if verbose_mode:
        print "Using DSP8029 XSLT file:  "+xsltfile
        print "Using XML Catalog file:   "+catfile
        print "Using CML-XML root URL:   "+cimxmlurl
        print "Using DSP8054 CSS URL:    "+cssurl
        print "Using tocgen.js URL:      "+tocgenurl

    errors = 0
    transform_errors = 0

    if verbose_mode:
        print "Step 1: Transforming the MRP XML file using XSLT..."

    os.environ['XML_CATALOG_FILES'] = catfile                   # Syntax: Space-separated URLs or file names of XML Catalog files

    if verbose_mode:
        os.environ['XML_DEBUG_CATALOG'] = "1"                   # libxml2 documents this as a means to get URI resolver
                                                                # messages printed, but it does not seem to work.

    try:
        xml_tree = etree.parse(xml_filename)

        xml_tree.xinclude()                                     # Resolve XInclude (xi:include)

        xslt_root_elem = etree.parse(xsltfile).getroot()
        xslt_transform = etree.XSLT(xslt_root_elem)
        html_tree = xslt_transform(xml_tree,**params)           # This performs the XSLT transformation

        for entry in xslt_transform.error_log:
            if re.match(".*Error:.*",entry.message):
                transform_errors += 1
            print entry.message
            sys.stdout.flush()

        # The following parameters overwrite the output parameters set in DSP8029. Keep in sync.
        html_str = etree.tostring(html_tree,
            encoding="utf-8", method="html", xml_declaration=None,
            pretty_print=False, with_tail=True, standalone=None,
            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">')

        tmphtml_filename = xml_basename + ".tmp.html"

        tmphtml_fp = open(tmphtml_filename,"w")
        tmphtml_fp.write(html_str)
        tmphtml_fp.close()

        if verbose_mode:
            print "Step 2: Embedding dependent files..."
            sys.stdout.flush()

        tmphtml_fp = open(tmphtml_filename, 'r')
        html_fp = open(html_filename, 'w')
        for line in tmphtml_fp.readlines():
            html_fp.write(resolveEmbeds(line))
        html_fp.close()
        tmphtml_fp.close()
        os.remove(tmphtml_filename)

    except IOError as exc:
        print "IOError: "+str(exc)
        sys.stdout.flush()
        errors += 1

    except etree.XMLSyntaxError as exc:
        print "XMLSyntaxError: "+str(exc)
        sys.stdout.flush()
        errors += 1

    except etree.XSLTApplyError as exc:
        print "XSLTApplyError: "+str(exc)
        sys.stdout.flush()
        # import pdb; pdb.set_trace()
        # print "XSLTApplyError: XSLT Messages:\n"+repr(exc.error_log)
        # sys.stdout.flush()
        errors += 1

    if transform_errors == 0:
        print "Found no XSLT transformation errors."
    else:
        print "Found "+str(transform_errors)+" XSLT transformation errors."

    if errors == 0:
        print "Success."
        rc = rc_success
    else:
        print "Error: Found "+str(errors)+" errors."
        rc = rc_error

    sys.stdout.flush()

    return rc

if __name__ == "__main__":
    rc = main()
    exit(rc)
