#encoding=utf-8
'''
Copyright 2016 YANG Huan (sy.yanghuan@gmail.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
import sys        

if sys.version_info < (3, 0):
  print('python version need more than 3.x')
  sys.exit(1)
    
import os
import string
import collections
import codecs
import getopt
import re
import json
import xml.etree.ElementTree as ElementTree
import xml.dom.minidom as minidom
import xlrd

def fillvalue(parent, name, value, isschema):
  if isinstance(parent, list):
    parent.append(value) 
  else:
    if isschema and not re.match('^_|[a-zA-Z]\w*$', name):
      raise ValueError('%s is a illegal identifier' % name)
    parent[name] = value
    
def getindex(infos, name):
  for index, item in enumerate(infos):
    if item == name:
      return index;
  return -1

def getscemainfo(typename, description):
  if isinstance(typename, BindType):
    typename = typename.typename
  return [typename, description] if description else [typename]
        
def getexportmark(sheetName):
  p = re.search('\|[' + string.whitespace + ']*(_|[a-zA-Z]\w+)', sheetName)
  return p.group(1) if p else False

def issignmatch(signarg, sign):
  if signarg is None:
    return True
  return True if [s for s in re.split(r'[/\\, :]', sign) if s in signarg] else False

def isoutofdate(srcfile, tarfile):
  return not os.path.isfile(tarfile) or os.path.getmtime(srcfile) > os.path.getmtime(tarfile)

def gerexportfilename(root, format_, folder):
  filename = root +  '.' + format_
  return os.path.join(folder, filename)

def splitspace(s):
  return re.split(r'[' + string.whitespace + ']+', s.strip())

def buildbasexml(parent, name, value):
  value = str(value)
  if parent.tag == name:
    element = ElementTree.Element(name)
    element.text = value
    parent.append(element)
  else:
    parent.set(name, value)    
            
def buildlistxml(parent, name, list_):
  element = ElementTree.Element(name)
  parent.append(element)
  for v in list_:
    buildxml(element, name[:-1], v)    

def buildobjxml(parent, name, obj):
  element = ElementTree.Element(name)
  parent.append(element)
  
  for k, v in obj.items():
    buildxml(element, k, v)
        
def buildxml(parent, name, value):
  if isinstance(value, int) or isinstance(value, float) or isinstance(value, str):
    buildbasexml(parent, name, value)
      
  elif isinstance(value, list):
    buildlistxml(parent, name, value)
      
  elif isinstance(value, dict):
    buildobjxml(parent, name, value)
            
def savexml(record):
  book = ElementTree.ElementTree()
  book.append = lambda e: book._setroot(e)
  buildxml(book, record.root, record.obj)
  
  xmlstr = ElementTree.tostring(book.getroot(), 'utf-8')
  dom = minidom.parseString(xmlstr)
  with codecs.open(record.exportfile, 'w', 'utf-8') as f:
    dom.writexml(f, '', '  ', '\n', 'utf-8')
      
  print('save %s from %s in %s' % (record.exportfile, record.sheet.name, record.path))
  
def newline(count):
  return '\n' + '  ' * count
  
def tolua(obj, indent = 1):    
  if isinstance(obj, int) or isinstance(obj, float) or isinstance(obj, str):
    yield json.dumps(obj, ensure_ascii = False)
  else:
    yield '{'
    islist = isinstance(obj, list)
    isfirst = True
    for i in obj:
      if isfirst:
        isfirst = False
      else:
        yield ','
      yield newline(indent)
      if not islist:
        k = i
        i = obj[k]
        yield k 
        yield ' = '                
      for part in tolua(i, indent + 1):
        yield part
    yield newline(indent - 1)
    yield '}'
    
def toycl(obj, indent = 0):
  islist = isinstance(obj, list)
  for i in obj:
    yield newline(indent)  
    if not islist:
      k = i
      i = obj[k]
      yield k 
    if isinstance(i, int) or isinstance(i, float) or isinstance(i, str): 
      if not islist:
        yield ' = '
      yield json.dumps(i, ensure_ascii = False)
    else:
      if not islist:
        yield ' '
      yield '{'
      for part in toycl(i, indent + 1):
        yield part
      yield newline(indent)  
      yield '}'     

def exportexcel(context):
  Exporter(context).export()
  print("export finsish successful!!!")
    
class BindType:
  def __init__(self, type_):
    self.typename = type_
      
  def __eq__(self, other):
    return self.typename == other
    
class Record:
  def __init__(self, path, sheet, exportfile, root, item, obj, exportmark):
    self.path = path 
    self.sheet = sheet 
    self.exportfile = exportfile 
    self.root = root 
    self.item = item
    self.setobj(obj)
    self.exportmark = exportmark

  def setobj(self, obj):    
    self.schema = obj[0] if obj else None
    self.obj = obj[1] if obj else None
        
class Constraint:
  def __init__(self, mark, filed):
    self.mark = mark
    self.field = filed     
        
class Exporter:
  configsheettitles = ('name', 'value', 'type', 'sign', 'description')
  spacemaxrowcount = 3
  
  def __init__(self, context):
    self.context = context
    self.records = []
    self.constraints = []
  
  def gettype(self, type_):
    if type_[-2] == '[' and  type_[-1] == ']':
      return 'list'
    if type_[0] == '{' and type_[-1] == '}':
      return 'obj'
    if type_ in ('int', 'double', 'string', 'bool'):
      return type_
    
    p = re.search('(int|string)[' + string.whitespace + ']*\((\S+)\.(\S+)\)', type_)
    if p:
      type_ = BindType(p.group(1))
      type_.mark = p.group(2)
      type_.field = p.group(3)
      return type_
        
    raise ValueError('%s is not a legal type' % type_)
    
  def buildlistexpress(self, parent, type_, name, value, isschema):
    basetype = type_[:-2]        
    list_ = []
    if isschema:
      self.buildexpress(list_, basetype, name, None, isschema)
      list_ = getscemainfo(list_[0], value)
    else:
      valuelist = value.strip('[]').split(',')
      for v in valuelist:
        self.buildexpress(list_, basetype, name, v)
       
    fillvalue(parent, name, list_, isschema)     
      
  def buildobjexpress(self, parent, type_, name, value, isschema):
    obj = collections.OrderedDict()
    fieldnamestypes = type_.strip('{}').split(';')
    if isschema:
      for i in range(0, len(fieldnamestypes)):
        fieldtype, fieldname = splitspace(fieldnamestypes[i])
        self.buildexpress(obj, fieldtype, fieldname, None, isschema)
      obj = getscemainfo(obj, value)
    else:
      fieldValues = value.strip('{}').split(';')
      for i in range(0, len(fieldnamestypes)):
        if i < len(fieldValues):
          fieldtype, fieldname = splitspace(fieldnamestypes[i])
          self.buildexpress(obj, fieldtype, fieldname, fieldValues[i])

    fillvalue(parent, name, obj, isschema)       
      
  def buildbasexpress(self, parent, type_, name, value, isschema):
    typename = self.gettype(type_) 
    if isschema:
      value = getscemainfo(typename, value)
    else:
      if v.isspace() and typename != 'string':
        return
        
      if typename == 'int':
        value = int(float(value))
      elif typename == 'double':
        value = float(value)   
      elif typename == 'string':
        if value.endswith('.0'):          # may read is like "123.0"
          try:
            value = str(int(float(value)))
          except ValueError:
            value = str(value)
        else:            
          value = str(value)
      elif typename == 'bool':
        try:
          value = int(float(value))
          value = False if value == 0 else True 
        except ValueError:
          value = value.lower() 
          if value in ('false', 'no', 'off'):
            value = False
          elif value in ('true', 'yes', 'on'):
            value = True
          else:    
            raise ValueError('%s is a illegal bool value' % value) 
    fillvalue(parent, name, value, isschema)   
    
    if not isschema and isinstance(typename, BindType):
      self.addconstraint(typename.mark, typename.field, (type_, name, value))
        
  def buildexpress(self, parent, type_, name, value, isschema = False):
    typename = self.gettype(type_)
    if typename == 'list':
      self.buildlistexpress(parent, type_, name, value, isschema)
    elif typename == 'obj':
      self.buildobjexpress(parent, type_, name, value, isschema)
    else:
      self.buildbasexpress(parent, type_, name, value, isschema)
      
  def getrootname(self, exportmark, isitem):
    return exportmark + '' + (self.context.extension or '') if isitem else exportmark + (self.context.extension or '')

  def export(self):
    paths = re.split(r'[,;|]+', context.path.strip())

    for self.path in paths:
      if not self.path:
        continue
      
      self.checkpath(self.path)
      data = xlrd.open_workbook(self.path)
      cout = None
      for sheet in data.sheets():
        exportmark = getexportmark(sheet.name)
        self.sheetname = sheet.name
        if exportmark:
          coutmark = sheet.name.endswith('<<')
          configtitleinfo = self.getconfigsheetfinfo(sheet)
          if not configtitleinfo:
            root = self.getrootname(exportmark, not coutmark)
            item = exportmark
          else:
            root = self.getrootname(exportmark, False)
            item = None
          
          if not cout:
            exportfile = gerexportfilename(root, self.context.format, self.context.folder)
            self.checksheetname(self.path, sheet.name, root)
        
            exportobj = None
            nochanged = False
            #if isoutofdate(self.path, exportfile):
            if item:
              exportobj = self.exportitemsheet(sheet)
            else:
              exportobj = self.exportconfigsheet(sheet, configtitleinfo)
            # else:
            #   nochanged = True
            #   print(exportfile + ' is not change, so skip!')

            if coutmark:
              if not item:
                cout = exportobj
              else:
                cout = (collections.OrderedDict(), collections.OrderedDict())
                cout[0][item] = [[exportobj[0]]]
                item = None
                exportobj = cout
                obj = exportobj[1]
                if obj:
                  cout[1][item] = obj
                  
            self.addrecord(self.path, sheet, exportfile, root, item, exportobj, exportmark)
            if coutmark and nochanged:
              break
          else:
            if item:
              exportobj = self.exportitemsheet(sheet)
              cout[0][item] = [[exportobj[0]]]
              obj = exportobj[1]
              if obj:
                cout[1][item] = obj
            else:
              exportobj = self.exportconfigsheet(sheet, configtitleinfo)
              cout[0].update(exportobj[0])   
              obj = exportobj[1]
              if obj:
                cout[1].update(obj)
                
    self.checkconstraint()
    self.saves()                
    
  def getconfigsheetfinfo(self, sheet):
    titles = sheet.row_values(0)
    
    nameindex = getindex(titles, self.configsheettitles[0])
    valueindex = getindex(titles, self.configsheettitles[1])
    typeindex = getindex(titles, self.configsheettitles[2])
    signindex = getindex(titles, self.configsheettitles[3])
    descriptionindex = getindex(titles, self.configsheettitles[4])
    
    if nameindex != -1 and valueindex != -1 and typeindex != -1:
      return (nameindex, valueindex, typeindex, signindex, descriptionindex)
    else:
      return None
        
  def exportitemsheet(self, sheet):
    descriptions = sheet.row_values(0)
    types = sheet.row_values(1)
    names = sheet.row_values(2)
    signs = sheet.row_values(3)
    
    titleinfos = []
    schemaobj = collections.OrderedDict()
    
    try:
      for colindex in range(sheet.ncols):
        type_ = str(types[colindex]).strip()
        name = str(names[colindex]).strip()
        signmatch = issignmatch(self.context.sign, str(signs[colindex]).strip())
        titleinfos.append((type_, name, signmatch))
        
        if self.context.codegenerator:
          if type_ and name and signmatch:
            self.buildexpress(schemaobj, type_, name, descriptions[colindex], True)
                    
    except Exception as e: 
      e.args += ('%s has a title error, %s at %d column in %s' % (sheet.name, (type_, name), colindex + 1, self.path) , '')
      raise e
      
    list_ = []
    hasexport = next((i for i in titleinfos if i[0] and i[1] and i[2]), False)
    if hasexport:
      try:
        spacerowcount = 0
        
        for self.rowindex in range(4, sheet.nrows):
          row = sheet.row_values(self.rowindex)
          item = collections.OrderedDict()
          
          firsttext = str(row[0]).strip()
          if not firsttext:
            spacerowcount += 1
            if spacerowcount >= self.spacemaxrowcount:      # if space row is than max count, skil follow rows     
              break
          
          if not firsttext or firsttext[0] == '#':    # current line skip
            continue
             
          skiptokenindex = None   
          if firsttext[0] == '!':
            nextpos = firsttext.find('!', 1)
            if nextpos >= 2:
              signtoken = firsttext[1: nextpos]
              if issignmatch(self.context.sign, signtoken.strip()):
                continue
              else:
                skiptokenindex = len(signtoken) + 2
                 
          for self.colindex in range(sheet.ncols):
            signmatch = titleinfos[self.colindex][2]
            if signmatch:
              type_ = titleinfos[self.colindex][0]
              name = titleinfos[self.colindex][1]
              value = str(row[self.colindex])
              if skiptokenindex and self.colindex == 0:
                value = value.lstrip()[skiptokenindex:]
                
              if type_ and name and value:
                self.buildexpress(item, type_, name, value)  
            spacerowcount = 0
                
          if item:
            list_.append(item)
      except Exception as e:        
          e.args += ('%s has a error in %d row %d column in %s' % (sheet.name, self.rowindex + 1, self.colindex + 1, self.path) , '')
          raise e
    
    return (schemaobj, list_)
        
  def exportconfigsheet(self, sheet, titleindexs):
    nameindex = titleindexs[0]
    valueindex = titleindexs[1]
    typeindex = titleindexs[2]
    signindex = titleindexs[3]
    descriptionindex = titleindexs[4]
    
    schemaobj = collections.OrderedDict()
    obj = collections.OrderedDict()
    
    try:
      spacerowcount = 0
      
      for self.rowindex in range(1, sheet.nrows):
        row = sheet.row_values(self.rowindex) 
    
        name = str(row[nameindex]).strip()
        value = str(row[valueindex])
        type_ = str(row[typeindex]).strip()
        description = str(row[descriptionindex]).strip()
        
        if signindex > 0:
          sign = str(row[signindex]).strip()
          if not issignmatch(self.context.sign, sign):
            continue
          
        if not name and not value and not type_:
          spacerowcount += 1
          if spacerowcount >= self.spacemaxrowcount:
            break            # if space row is than max count, skil follow rows     
          continue
            
        if name and type_:
          if(name[0] != '#'):         # current line skip
            if self.context.codegenerator:
              self.buildexpress(schemaobj, type_, name, description, True)
            if value:    
              self.buildexpress(obj, type_, name, value)
          spacerowcount = 0    
              
    except Exception as e:
      e.args += ('%s has a error in %d row (%s, %s, %s) in %s' % (sheet.name, self.rowindex + 1, type_, name, value, self.path) , '')
      raise e
  
    return (schemaobj, obj)
    
  def saves(self):
    schemas = []
    for r in self.records:
        if r.obj:
          self.save(r)
          
          if self.context.codegenerator:        # has code generator
            schemas.append({ 'exportfile' : r.exportfile, 'root' : r.root, 'item' : r.item or r.exportmark, 'schema' : r.schema })
    
    if schemas and self.context.codegenerator:
      schemasjson = json.dumps(schemas, ensure_ascii = False, indent = 2)
      dir = os.path.dirname(self.context.codegenerator)
      if dir and not os.path.isdir(dir):
        os.makedirs(dir)
      with codecs.open(self.context.codegenerator, 'w', 'utf-8') as f:
        f.write(schemasjson)
                
  def save(self, record):
    if not record.obj:
      return
  
    if not os.path.isdir(self.context.folder):
      os.makedirs(self.context.folder)
        
    if self.context.format == 'json':
      jsonstr = json.dumps(record.obj, ensure_ascii = False, indent = 2)
      with codecs.open(record.exportfile, 'w', 'utf-8') as f:
        f.write(jsonstr)
      print('save %s from %s in %s' % (record.exportfile, record.sheet.name, record.path))
        
    elif self.context.format == 'xml':
      if record.item:
        record.obj = { record.item : record.obj }
      savexml(record) 
        
    elif self.context.format == 'lua':
      luastr = "".join(tolua(record.obj))
      with codecs.open(record.exportfile, 'w', 'utf-8') as f:
        f.write('return ')
        f.write(luastr)
      print('save %s from %s in %s' % (record.exportfile, record.sheet.name, record.path))
      
    elif self.context.format == 'ycl':
      g = toycl(record.obj)
      next(g) # skip first newline
      yclstr = "".join(g)
      with codecs.open(record.exportfile, 'w', 'utf-8') as f:
        f.write(yclstr)
      print('save %s from %s in %s' % (record.exportfile, record.sheet.name, record.path))

  def addrecord(self, path, sheet, exportfile, root, item, obj, exportmark):
    r = Record(path, sheet, exportfile, root, item, obj, exportmark)
    self.records.append(r)
      
  def checksheetname(self, path, sheetname, root):
    r = next((r for r in self.records if r.root == root), False)
    if r:
      raise ValueError('%s in %s is already defined in %s' % (root, path, r.path))
      
  def checkpath(self, path):
    r = next((r for r in self.records if r.path == path), False)
    if r:
      raise ValueError('%s is already export' % path)
            
  def addconstraint(self, mark, field, valueinfo):
    c = Constraint(mark, field)
    c.valueinfo = valueinfo
    c.path = self.path
    c.sheetname = self.sheetname
    c.rowindex = self.rowindex
    c.colindex = self.colindex
    self.constraints.append(c)

  def checkconstraint(self):
    for c in self.constraints:
      r = next((r for r in self.records if r.item == c.mark), False)
      if not r:
        raise ValueError('%s(mark) not found ,%s has a constraint %s error in %d row %d column in %s' % (c.mark, c.sheetname, c.valueinfo, c.rowindex + 1, c.colindex + 1, c.path))
      
      if not r.obj:  # is not change so not load
        exportobj = self.exportitemsheet(r.sheet)
        r.setobj(exportobj)
      
      v = c.valueinfo[2]    
      i = next((i for i in r.obj if i[c.field] == v), False)    
      if not i:
        raise ValueError('%s(field) %s not found ,%s has a constraint %s error in %d row %d column in %s' % (c.field, v, c.sheetname, c.valueinfo, c.rowindex + 1, c.colindex + 1, c.path))
    
if __name__ == '__main__':
  class Context:
    '''usage python proton.py [-p filelist] [-f outfolder] [-e format]
    Arguments
    -p      : input excel files, use , or ; or space to separate
    -f      : out folder
    -e      : format, json or xml or lua or ycl

    Options
    -s      :sign, controls whether the column is exported, defalut all export
    -t      : suffix, export file suffix
    -c      : a file path, save the excel structure to json
              the external program uses this file to automatically generate the read code
    -h      : print this help message and exit
    
    https://github.com/yanghuan/proton'''
  
  print('argv:' , sys.argv)
  opst, args = getopt.getopt(sys.argv[1:], 'p:f:e:s:t:c:h')

  context = Context()
  context.path = None
  context.folder = '.'
  context.format = 'json'
  context.sign = None
  context.extension = None
  context.codegenerator = None

  for op,v in opst:
    if op == '-p':
      context.path = v
    elif op == '-f':
      context.folder = v
    elif op == '-e':
      context.format = v.lower() 
    elif op == '-s':
      context.sign = v 
    elif op == '-t':
      context.extension = v
    elif op == '-c':
      context.codegenerator = v    
    elif op == '-h':
      print(Context.__doc__)
      sys.exit()
      
  if not context.path:
    print(Context.__doc__)
    sys.exit(2)
    
  exportexcel(context)