#!/usr/bin/env python # -*- coding: ascii -*- """ Split a stream of JSON-objects/arrays into separate strings. :Version: 2008-09-20-alpha :Status: experimental :Example: TODO :SeeAlso: JSON specification :Warning: .. Warning:: This is **experimental** code! :Bug: :Author: Roland Koebler (r.koebler@yahoo.de) :Copyright: 2008 by Roland Koebler (r.koebler@yahoo.de) :License: see __license__ :Changelog: - 2008-09-19: created TODO: - change to "yield" - test - enhance """ __version__ = "2008-09-20-alpha" __author__ = "Roland Koebler " __license__ = """Copyright (c) 2008 by Roland Koebler (r.koebler(at)yahoo.de)""" #========================================= def json_split(s): """Split a stream of JSON-objects/arrays into separate strings. Every resulting string then contains exactly 1 JSON-array/object. This is especially useful if you send JSON-objects/arrays over a streaming connection, and want to separate the JSON-objects/arrays before deserialization. :Parameters: s: (unicode-) JSON-string :Returns: (LIST_OF_JSON_STRINGS, REMAINING_STRING) :Note: no error-handling is included :Example: verbose example:: >>> import simplejson >>> test = {u'a': u'b', u'1': 2, u'c': {u'1': [1, 2], u'3': [{u'd': [u'}']}], u'2': {u'3': 4}}, u'xy': u'x ] } " [ { y'} >>> json = simplejson.dumps(test) >>> stream = json*5 >>> print stream[0:120] #show first 120 characters of stream {"a": "b", "1": 2, "c": {"1": [1, 2], "3": [{"d": ["}"]}], "2": {"3": 4}}, "xy": "x ] } \\" [ { y"}{"a": "b", "1": 2, "c" >>> list = json_split(stream) >>> result = simplejson.loads(list[0][0]) >>> result == test True the same in much shorter notation:: >>> import simplejson >>> test = {u'a': u'b', u'1': 2, u'c': {u'1': [1, 2], u'3': [{u'd': [u'}']}], u'2': {u'3': 4}}, u'xy': u'x ] } " [ { y'} >>> result = simplejson.loads(json_split(simplejson.dumps(test)*5)[0][0]) >>> result == test True :Author: Roland Koebler (r.koebler@yahoo.de) :Version: 2008-09-20-pre """ json_strings = [] state = 0 #0=search start, 1=array, 11=string inside array # 2=object, 12=string inside object b = 0 #begin of string-part i = 0 #current position in string cnt = 0 while True: if i >= len(s): break if 0 == state: #find start of json-array or object #TODO: what to do with s[b:i] #TODO: skip chars after \\ ? if s[i] == '[': state = 1 b = i cnt = 1 elif s[i] == '{': state = 2 b = i cnt = 1 elif state > 10: #inside string if s[i] == '\\': # skip char after \ i += 1 elif s[i] == '"': # end of string state -= 10 elif 1 == state: #inside array if s[i] == '"': state += 10 elif s[i] == '[': cnt += 1 elif s[i] == ']': cnt -= 1 if cnt == 0: json_strings.append(s[b:i+1]) state = 0 b = i+1 elif 2 == state: #inside object if s[i] == '"': state += 10 elif s[i] == '{': cnt += 1 elif s[i] == '}': cnt -= 1 if cnt == 0: json_strings.append(s[b:i+1]) state = 0 b = i+1 i += 1 return (json_strings, s[b:]) #========================================= # doctest if __name__ == "__main__": import doctest doctest.testmod() #=========================================