#!/usr/bin/python

###             -htindent.py-            ###
### Indent html code         August 26, 2006
### Simeon Veldstra  <reallifesim@gmail.com> 
### Free to use and distribute. No warranty.

import sys

def main():
	"""Run indent with stdin, stdout"""
	indent(sys.stdin, sys.stdout)

def indent(fp, out, q=' '):
	"""Indent HTML code.

	fp is a file-like object opened for reading. 
	out is a file-like object opened for writing
	that the formatted html will be written to.
	The optional q argument is the string printed 
	for each tabstop. q defaults to a single space.
	"""

	data = fp.read()
	i = 0
	level = 0
	line = []

	def nl(i):
		if not line:
			return i
		out.write(q * level)
		out.write(''.join(line).lstrip())
		out.write('\n')
		line[:] = []
		while i < len(data) and data[i] in (' ', '\t', '\n'):
	 		i += 1
		return i

	while i < len(data):
		if data[i] == '\n':
			i = i + 1
			i = nl(i)

		elif data[i] == '<':
			if (i + 4 < len(data) 
			   and data[i + 1: i + 4].lower() == 'pre'):
				while (i < len(data) 
				      and ''.join(line[i - 6: i]).lower() != '</pre>'):
					line.append(data[i])
					i  += 1
				i = nl(i)
				continue
			add = 0
			if i + 1 < len(data) and data[i + 1] != '!':
				if data[i + 1] == '/' and level > 0:
					add = -1
				else:
					add = 1
			if line:
				i = nl(i)
			while 1:
				line.append(data[i])
				i += 1
				if data[i - 1] == '>' or i >= len(data):
					if data[i-2:i] == '/>':
						i = nl(i)
					else:
						if add > 0:
							i = nl(i)
							level = level + add
						else:
							level = level + add
							i = nl(i)
					break
			if i < len(data) and data[i] == '\n':
				i += 1
				
		else:
			line.append(data[i])
			i += 1

if __name__ == "__main__":
	main()
