import json import xml.etree.ElementTree as ET def read_json_and_xml(lines): """ A generator that parses a stream of lines containing multiline JSON or XML entries. Each entry starts on a new line with either JSON (e.g. '{' or '[') or XML (e.g. '<'). Lines are added to an internal buffer until a valid entry is parsed. Yields: A parsed JSON object (dict, list, etc.), or an xml.etree.ElementTree.Element depending on the input. """ buffer = "" entry_type = None # Either "json" or "xml" for i, line in enumerate(lines): # Remove trailing newline (and optionally other whitespace) stripped_line = line.rstrip("\n").rstrip("\r") # If we are starting a new entry, decide what type based on the first non-space char. if not buffer: first = stripped_line.lstrip() if first.startswith("<"): entry_type = "xml" elif first.startswith("{") or first.startswith("["): entry_type = "json" else: # If the first character is not immediately recognizable, assume JSON. entry_type = "json" else: # force flush if we just ingested a pile of garbage and are seemingly presented with json again if stripped_line.startswith("{"): entry_type = "json" yield buffer buffer = "" # Append the current line. (You might include a newline if significant.) buffer += stripped_line # Now try to see if we have a complete entry. if entry_type == "json": try: # Try to parse the current buffer as JSON. obj = json.loads(buffer) yield obj # Reset buffer and type once parsed. buffer = "" entry_type = None except json.JSONDecodeError as err: # Likely the entry is not complete. # For leniency, we simply continue accumulating. pass elif entry_type == "xml": try: # Try to parse the buffer as XML. #print(buffer) obj = ET.fromstring(buffer) yield obj # Reset for the next entry. buffer = "" entry_type = None except ET.ParseError as err: # Not complete as XML or malformed; continue accumulating. pass # At the end of the stream, try one last parse if there’s data left. if buffer: if entry_type == "json": try: obj = json.loads(buffer) yield obj except json.JSONDecodeError: pass elif entry_type == "xml": try: obj = ET.fromstring(buffer) yield obj except ET.ParseError: pass if __name__ == "__main__": import sys for entry in read_json_and_xml(sys.stdin): if isinstance(entry, dict): entry else: print(entry)