diff options
Diffstat (limited to 'python/toml/toml.py')
-rw-r--r-- | python/toml/toml.py | 643 |
1 files changed, 643 insertions, 0 deletions
diff --git a/python/toml/toml.py b/python/toml/toml.py new file mode 100644 index 00000000000..8ab0d3e260c --- /dev/null +++ b/python/toml/toml.py @@ -0,0 +1,643 @@ +import datetime, decimal, re + +class TomlTz(datetime.tzinfo): + + def __new__(self, toml_offset): + self._raw_offset = toml_offset + self._hours = int(toml_offset[:3]) + self._minutes = int(toml_offset[4:6]) + + def tzname(self, dt): + return "UTC"+self._raw_offset + + def utcoffset(self, dt): + return datetime.timedelta(hours=self._hours, minutes=self._minutes) + +try: + _range = xrange +except NameError: + unicode = str + _range = range + basestring = str + unichr = chr + +def load(f, _dict=dict): + """Returns a dictionary containing the named file parsed as toml.""" + if isinstance(f, basestring): + with open(f) as ffile: + return loads(ffile.read(), _dict) + elif isinstance(f, list): + for l in f: + if not isinstance(l, basestring): + raise Exception("Load expects a list to contain filenames only") + d = _dict() + for l in f: + d.append(load(l)) + r = _dict() + for l in d: + toml_merge_dict(r, l) + return r + elif f.read: + return loads(f.read(), _dict) + else: + raise Exception("You can only load a file descriptor, filename or list") + +def loads(s, _dict=dict): + """Returns a dictionary containing s, a string, parsed as toml.""" + implicitgroups = [] + retval = _dict() + currentlevel = retval + if isinstance(s, basestring): + try: + s.decode('utf8') + except AttributeError: + pass + sl = list(s) + openarr = 0 + openstring = False + openstrchar = "" + multilinestr = False + arrayoftables = False + beginline = True + keygroup = False + keyname = 0 + delnum = 1 + for i in range(len(sl)): + if sl[i] == '\r' and sl[i+1] == '\n': + sl[i] = ' ' + continue + if keyname: + if sl[i] == '\n': + raise Exception("Key name found without value. Reached end of line.") + if openstring: + if sl[i] == openstrchar: + keyname = 2 + openstring = False + openstrchar = "" + continue + elif keyname == 1: + if sl[i].isspace(): + keyname = 2 + continue + elif sl[i].isalnum() or sl[i] == '_' or sl[i] == '-': + continue + elif keyname == 2 and sl[i].isspace(): + continue + if sl[i] == '=': + keyname = 0 + else: + raise Exception("Found invalid character in key name: '"+sl[i]+"'. Try quoting the key name.") + if sl[i] == "'" and openstrchar != '"': + k = 1 + try: + while sl[i-k] == "'": + k += 1 + if k == 3: + break + except IndexError: + pass + if k == 3: + multilinestr = not multilinestr + openstring = multilinestr + else: + openstring = not openstring + if openstring: + openstrchar = "'" + else: + openstrchar = "" + if sl[i] == '"' and openstrchar != "'": + oddbackslash = False + k = 1 + tripquote = False + try: + while sl[i-k] == '"': + k += 1 + if k == 3: + tripquote = True + break + while sl[i-k] == '\\': + oddbackslash = not oddbackslash + k += 1 + except IndexError: + pass + if not oddbackslash: + if tripquote: + multilinestr = not multilinestr + openstring = multilinestr + else: + openstring = not openstring + if openstring: + openstrchar = '"' + else: + openstrchar = "" + if sl[i] == '#' and not openstring and not keygroup and \ + not arrayoftables: + j = i + try: + while sl[j] != '\n': + sl.insert(j, ' ') + sl.pop(j+1) + j += 1 + except IndexError: + break + if sl[i] == '[' and not openstring and not keygroup and \ + not arrayoftables: + if beginline: + if sl[i+1] == '[': + arrayoftables = True + else: + keygroup = True + else: + openarr += 1 + if sl[i] == ']' and not openstring: + if keygroup: + keygroup = False + elif arrayoftables: + if sl[i-1] == ']': + arrayoftables = False + else: + openarr -= 1 + if sl[i] == '\n': + if openstring or multilinestr: + if not multilinestr: + raise Exception("Unbalanced quotes") + if sl[i-1] == "'" or sl[i-1] == '"': + sl.insert(i, sl[i-1]) + sl.pop(i+1) + sl[i-3] = ' ' + elif openarr: + sl.insert(i, ' ') + sl.pop(i+1) + else: + beginline = True + elif beginline and sl[i] != ' ' and sl[i] != '\t': + beginline = False + if not keygroup and not arrayoftables: + if sl[i] == '=': + raise Exception("Found empty keyname. ") + keyname = 1 + s = ''.join(sl) + s = s.split('\n') + else: + raise Exception("What exactly are you trying to pull?") + multikey = None + multilinestr = "" + multibackslash = False + for line in s: + line = line.strip() + if multikey: + if multibackslash: + strippedline = line.lstrip(' \t\n') + if strippedline == '': + continue + multilinestr += strippedline + else: + multilinestr += line + multibackslash = False + if len(line) > 2 and line[-1] == multilinestr[0] and \ + line[-2] == multilinestr[0] and line[-3] == multilinestr[0]: + value, vtype = load_value(multilinestr) + currentlevel[multikey] = value + multikey = None + multilinestr = "" + else: + k = len(multilinestr) -1 + while k > -1 and multilinestr[k] == '\\': + multibackslash = not multibackslash + k -= 1 + if multibackslash: + multilinestr = multilinestr[:-1] + else: + multilinestr += "\n" + continue + if line == "": + continue + if line[0] == '[': + arrayoftables = False + if line[1] == '[': + arrayoftables = True + line = line[2:].split(']]', 1) + else: + line = line[1:].split(']', 1) + if line[1].strip() != "": + raise Exception("Key group not on a line by itself.") + groups = line[0].split('.') + i = 0 + while i < len(groups): + groups[i] = groups[i].strip() + if groups[i][0] == '"' or groups[i][0] == "'": + groupstr = groups[i] + j = i+1 + while not groupstr[0] == groupstr[-1]: + j += 1 + groupstr = '.'.join(groups[i:j]) + groups[i] = groupstr[1:-1] + j -= 1 + while j > i: + groups.pop(j) + j -= 1 + else: + if not re.match(r'^[A-Za-z0-9_-]+$', groups[i]): + raise Exception("Invalid group name '"+groups[i]+"'. Try quoting it.") + i += 1 + currentlevel = retval + for i in range(len(groups)): + group = groups[i] + if group == "": + raise Exception("Can't have a keygroup with an empty name") + try: + currentlevel[group] + if i == len(groups) - 1: + if group in implicitgroups: + implicitgroups.remove(group) + if arrayoftables: + raise Exception("An implicitly defined table can't be an array") + elif arrayoftables: + currentlevel[group].append(_dict()) + else: + raise Exception("What? "+group+" already exists?"+str(currentlevel)) + except TypeError: + if i != len(groups) - 1: + implicitgroups.append(group) + currentlevel = currentlevel[-1] + try: + currentlevel[group] + except KeyError: + currentlevel[group] = _dict() + if i == len(groups) - 1 and arrayoftables: + currentlevel[group] = [_dict()] + except KeyError: + if i != len(groups) - 1: + implicitgroups.append(group) + currentlevel[group] = _dict() + if i == len(groups) - 1 and arrayoftables: + currentlevel[group] = [_dict()] + currentlevel = currentlevel[group] + if arrayoftables: + try: + currentlevel = currentlevel[-1] + except KeyError: + pass + elif "=" in line: + i = 1 + pair = line.split('=', i) + if re.match(r'^[0-9]', pair[-1]): + pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1]) + l = len(line) + while pair[-1][0] != ' ' and pair[-1][0] != '\t' and \ + pair[-1][0] != "'" and pair[-1][0] != '"' and \ + pair[-1][0] != '[' and pair[-1] != 'true' and \ + pair[-1] != 'false': + try: + float(pair[-1]) + break + except ValueError: + pass + if load_date(pair[-1]) != None: + break + i += 1 + prev_val = pair[-1] + pair = line.split('=', i) + if re.match(r'^[0-9]', pair[-1]): + pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1]) + if prev_val == pair[-1]: + raise Exception("Invalid date or number") + newpair = [] + newpair.append('='.join(pair[:-1])) + newpair.append(pair[-1]) + pair = newpair + pair[0] = pair[0].strip() + if (pair[0][0] == '"' or pair[0][0] == "'") and \ + (pair[0][-1] == '"' or pair[0][-1] == "'"): + pair[0] = pair[0][1:-1] + pair[1] = pair[1].strip() + if len(pair[1]) > 2 and (pair[1][0] == '"' or pair[1][0] == "'") \ + and pair[1][1] == pair[1][0] and pair[1][2] == pair[1][0] \ + and not (len(pair[1]) > 5 and pair[1][-1] == pair[1][0] \ + and pair[1][-2] == pair[1][0] and \ + pair[1][-3] == pair[1][0]): + k = len(pair[1]) -1 + while k > -1 and pair[1][k] == '\\': + multibackslash = not multibackslash + k -= 1 + if multibackslash: + multilinestr = pair[1][:-1] + else: + multilinestr = pair[1] + "\n" + multikey = pair[0] + else: + value, vtype = load_value(pair[1]) + try: + currentlevel[pair[0]] + raise Exception("Duplicate keys!") + except KeyError: + if multikey: + continue + else: + currentlevel[pair[0]] = value + return retval + +def load_date(val): + microsecond = 0 + tz = None + if len(val) > 19 and val[19] == '.': + microsecond = int(val[20:26]) + if len(val) > 26: + tz = TomlTz(val[26:31]) + elif len(val) > 20: + tz = TomlTz(val[19:24]) + try: + d = datetime.datetime(int(val[:4]), int(val[5:7]), int(val[8:10]), int(val[11:13]), int(val[14:16]), int(val[17:19]), microsecond, tz) + except ValueError: + return None + return d + +def load_unicode_escapes(v, hexbytes, prefix): + hexchars = ['0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] + skip = False + i = len(v) - 1 + while i > -1 and v[i] == '\\': + skip = not skip + i -= 1 + for hx in hexbytes: + if skip: + skip = False + i = len(hx) - 1 + while i > -1 and hx[i] == '\\': + skip = not skip + i -= 1 + v += prefix + v += hx + continue + hxb = "" + i = 0 + hxblen = 4 + if prefix == "\\U": + hxblen = 8 + while i < hxblen: + try: + if not hx[i].lower() in hexchars: + raise IndexError("This is a hack") + except IndexError: + raise Exception("Invalid escape sequence") + hxb += hx[i].lower() + i += 1 + v += unichr(int(hxb, 16)) + v += unicode(hx[len(hxb):]) + return v + +def load_value(v): + if v == 'true': + return (True, "bool") + elif v == 'false': + return (False, "bool") + elif v[0] == '"': + testv = v[1:].split('"') + if testv[0] == '' and testv[1] == '': + testv = testv[2:-2] + closed = False + for tv in testv: + if tv == '': + closed = True + else: + oddbackslash = False + try: + i = -1 + j = tv[i] + while j == '\\': + oddbackslash = not oddbackslash + i -= 1 + j = tv[i] + except IndexError: + pass + if not oddbackslash: + if closed: + raise Exception("Stuff after closed string. WTF?") + else: + closed = True + escapes = ['0', 'b', 'f', 'n', 'r', 't', '"', '\\'] + escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"', '\\'] + escapeseqs = v.split('\\')[1:] + backslash = False + for i in escapeseqs: + if i == '': + backslash = not backslash + else: + if i[0] not in escapes and i[0] != 'u' and i[0] != 'U' and \ + not backslash: + raise Exception("Reserved escape sequence used") + if backslash: + backslash = False + for prefix in ["\\u", "\\U"]: + if prefix in v: + hexbytes = v.split(prefix) + v = load_unicode_escapes(hexbytes[0], hexbytes[1:], prefix) + for i in range(len(escapes)): + if escapes[i] == '\\': + v = v.replace("\\"+escapes[i], escapedchars[i]) + else: + v = re.sub("([^\\\\](\\\\\\\\)*)\\\\"+escapes[i], "\\1"+escapedchars[i], v) + if v[1] == '"': + v = v[2:-2] + return (v[1:-1], "str") + elif v[0] == "'": + if v[1] == "'": + v = v[2:-2] + return (v[1:-1], "str") + elif v[0] == '[': + return (load_array(v), "array") + else: + parsed_date = load_date(v) + if parsed_date != None: + return (parsed_date, "date") + else: + itype = "int" + digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + neg = False + if v[0] == '-': + neg = True + v = v[1:] + if '.' in v or 'e' in v: + if v.split('.', 1)[1] == '': + raise Exception("This float is missing digits after the point") + if v[0] not in digits: + raise Exception("This float doesn't have a leading digit") + v = float(v) + itype = "float" + else: + v = int(v) + if neg: + return (0 - v, itype) + return (v, itype) + +def load_array(a): + atype = None + retval = [] + a = a.strip() + if '[' not in a[1:-1]: + strarray = False + tmpa = a[1:-1].strip() + if tmpa != '' and tmpa[0] == '"': + strarray = True + a = a[1:-1].split(',') + b = 0 + if strarray: + while b < len(a) - 1: + while a[b].strip()[-1] != '"' and a[b+1].strip()[0] != '"': + a[b] = a[b] + ',' + a[b+1] + if b < len(a) - 2: + a = a[:b+1] + a[b+2:] + else: + a = a[:b+1] + b += 1 + else: + al = list(a[1:-1]) + a = [] + openarr = 0 + j = 0 + for i in range(len(al)): + if al[i] == '[': + openarr += 1 + elif al[i] == ']': + openarr -= 1 + elif al[i] == ',' and not openarr: + a.append(''.join(al[j:i])) + j = i+1 + a.append(''.join(al[j:])) + for i in range(len(a)): + a[i] = a[i].strip() + if a[i] != '': + nval, ntype = load_value(a[i]) + if atype: + if ntype != atype: + raise Exception("Not a homogeneous array") + else: + atype = ntype + retval.append(nval) + return retval + +def dump(o, f): + """Writes out to f the toml corresponding to o. Returns said toml.""" + if f.write: + d = dumps(o) + f.write(d) + return d + else: + raise Exception("You can only dump an object to a file descriptor") + +def dumps(o): + """Returns a string containing the toml corresponding to o, a dictionary""" + retval = "" + addtoretval, sections = dump_sections(o, "") + retval += addtoretval + while sections != {}: + newsections = {} + for section in sections: + addtoretval, addtosections = dump_sections(sections[section], section) + if addtoretval: + retval += "["+section+"]\n" + retval += addtoretval + for s in addtosections: + newsections[section+"."+s] = addtosections[s] + sections = newsections + return retval + +def dump_sections(o, sup): + retstr = "" + if sup != "" and sup[-1] != ".": + sup += '.' + retdict = {} + arraystr = "" + for section in o: + qsection = section + if not re.match(r'^[A-Za-z0-9_-]+$', section): + if '"' in section: + qsection = "'" + section + "'" + else: + qsection = '"' + section + '"' + if not isinstance(o[section], dict): + arrayoftables = False + if isinstance(o[section], list): + for a in o[section]: + if isinstance(a, dict): + arrayoftables = True + if arrayoftables: + for a in o[section]: + arraytabstr = "" + arraystr += "[["+sup+qsection+"]]\n" + s, d = dump_sections(a, sup+qsection) + if s: + if s[0] == "[": + arraytabstr += s + else: + arraystr += s + while d != {}: + newd = {} + for dsec in d: + s1, d1 = dump_sections(d[dsec], sup+qsection+"."+dsec) + if s1: + arraytabstr += "["+sup+qsection+"."+dsec+"]\n" + arraytabstr += s1 + for s1 in d1: + newd[dsec+"."+s1] = d1[s1] + d = newd + arraystr += arraytabstr + else: + if o[section] is not None: + retstr += (qsection + " = " + + str(dump_value(o[section])) + '\n') + else: + retdict[qsection] = o[section] + retstr += arraystr + return (retstr, retdict) + +def dump_value(v): + if isinstance(v, list): + t = [] + retval = "[" + for u in v: + t.append(dump_value(u)) + while t != []: + s = [] + for u in t: + if isinstance(u, list): + for r in u: + s.append(r) + else: + retval += " " + str(u) + "," + t = s + retval += "]" + return retval + if isinstance(v, (str, unicode)): + v = "%r" % v + if v[0] == 'u': + v = v[1:] + singlequote = v[0] == "'" + v = v[1:-1] + if singlequote: + v = v.replace("\\'", "'") + v = v.replace('"', '\\"') + v = v.replace("\\x", "\\u00") + return str('"'+v+'"') + if isinstance(v, bool): + return str(v).lower() + if isinstance(v, datetime.datetime): + return v.isoformat()[:19]+'Z' + if isinstance(v, float): + return str(v) + return v + +def toml_merge_dict(a, b): + for k in a: + if isinstance(a[k], dict): + try: + b[k] + except KeyError: + continue + if isinstance(b[k], dict): + b[k] = toml_merge_dict(a[k], b[k]) + else: + raise Exception("Can't merge dict and nondict in toml object") + a.update(b) + return a
\ No newline at end of file |