#!/usr/bin/env python
import sys, re, math

CYCLES = {
        'rcall': 3,
        'push': 2,
        'ldi': 1,
        'pop': 2,
        'ret': 4,
        'dec': 1,
        'brne': 2, # assumes false.  true must be normalized with nops
}

if len(sys.argv) != 3:
    print """Usage: 
    avr_delay.py <clockspeed (cycles/sec)> <delay>"""
    sys.exit(1)

clockspeed = int(sys.argv[1])
delay_in = sys.argv[2]
if delay_in.find('ms') != -1:
    delay_s = float(delay_in.split('ms')[0]) / 1000
elif delay_in.find('us') != -1:
    delay_s = float(delay_in.split('us')[0]) / 1000000
elif delay_in.find('ns') != -1:
    delay_s = float(delay_in.split('ns')[0]) / 1000000000
elif delay_in.find('s') != -1:
    delay_s = float(delay_in.split('s')[0])
else:
    delay_s = float(delay_in)

cycles = int(math.ceil(clockspeed * delay_s))

def delay_name(cycles, clockspeed):
    time = cycles / float(clockspeed)
    if time < 0.000001:
        return "delay_%ins" % int(time * 1000000000)
    elif time < 0.001:
        return "delay_%ius" % int(time * 1000000)
    else:
        return "delay_%ims" % int(time * 1000)

#
# nop delays - non-looping short delays.
#
def nop_delay_boilerplate_cycles():
    return CYCLES['rcall'] + CYCLES['ret']

def min_nop_delay():
    return nop_delay_boilerplate_cycles()
# no max delay; but should use loops if at all possible.

def nop_delay(cycles, clockspeed):
    boilerplate = nop_delay_boilerplate_cycles()
    return \
"""%(delay_name)s: ; %(cycles)i cycles at %(clockspeed)iHz
%(nops)s    ret
    """ % {
            'cycles': cycles,
            'clockspeed': clockspeed,
            'delay_name': delay_name(cycles, clockspeed),
            'nops': "    nop\n" * (cycles - boilerplate),
            }
# 
# one byte loop delays.
# delay by decreasing a temp variable.
#
def one_byte_boilerplate_cycles():
    return CYCLES['rcall'] + CYCLES['push'] + CYCLES['ldi'] + CYCLES['pop'] + CYCLES['ret']

def one_byte_loop_cycles():
    return CYCLES['dec'] + CYCLES['brne']

def max_one_byte_delay():
    return 255 * one_byte_loop_cycles() + one_byte_boilerplate_cycles()

def min_one_byte_delay():
    return one_byte_boilerplate_cycles() + one_byte_loop_cycles()

def one_byte_delay(cycles, clockspeed):
    #  loop:
    #    1 for temp
    #    2 for brne (except 1 for last)
    boilerplate = one_byte_boilerplate_cycles()
    loop_length = one_byte_loop_cycles()
    num_loops = int((cycles - boilerplate) / loop_length)
    num_nops = cycles - boilerplate - num_loops * loop_length
    # N.B.: if you change this ASM function, be sure to change 
    # boilerplate and loop calculations above accordingly.
    return \
"""%(delay_name)s: ; %(cycles)i cycles at %(clockspeed)iHz
    push temp
    ldi temp, %(temp_count)i
    %(delay_name)s_loop:
        dec temp
        brne %(delay_name)s_loop
    nop ; even out final brne time
    pop temp
%(nops)s    ret
""" % {
            'cycles': cycles,
            'clockspeed': clockspeed,
            'delay_name': delay_name(cycles, clockspeed), 
            'temp_count': num_loops,
            'nops': "    nop\n" * num_nops
    }

#
#  Nested delays - delays that call another delay.
#  Used for delays longer than max_one_byte_delay().
#
def nested_delay_boilerplate_cycles():
    """ same as one_byte_delay's boilerplate """
    return one_byte_boilerplate_cycles()

def nested_delay_loop_cycles():
    return max_one_byte_delay() + CYCLES['dec'] + CYCLES['brne']

def max_nested_delay():
    return 255 * nested_delay_loop_cycles() + nested_delay_boilerplate_cycles()

def nested_delay(cycles, clockspeed):
    boilerplate = one_byte_boilerplate_cycles()
    # loop:
    #   255 for delay call
    #   1 for dec temp
    #   2 for brne
    inner_delay = one_byte_delay(max_one_byte_delay(), clockspeed)
    inner_delay_name = delay_name(max_one_byte_delay(), clockspeed)
    num_loops = int((cycles - boilerplate) / nested_delay_loop_cycles())
    remainder = cycles - boilerplate - nested_delay_loop_cycles() * num_loops
    if remainder > 15:
        final_delay = one_byte_delay(remainder, clockspeed)
        nops = ""
        final_delay_call = "\n    rcall %s" % delay_name(remainder, clockspeed)
    else:
        final_delay = "\n"
        nops = "    nop\n" * remainder
        final_delay_call = ""

    return inner_delay + final_delay + \
"""%(delay_name)s: ; %(cycles)i cycles at %(clockspeed)iHz
    push temp
    ldi temp, %(temp_count)i
    %(delay_name)s_loop:
        rcall %(inner_delay_name)s
        dec temp
        brne %(delay_name)s_loop
    nop ; even out final brne time
    pop temp %(final_delay_call)s
%(nops)s    ret""" % {
        'cycles': cycles,
        'clockspeed': clockspeed,
        'delay_name': delay_name(cycles, clockspeed),
        'temp_count': num_loops,
        'inner_delay_name': inner_delay_name,
        'final_delay_call': final_delay_call,
        'nops': nops
    }

if cycles < min_nop_delay():
    print "; %s (%i cycles at %iHz)" % \
            (delay_name(cycles, clockspeed), cycles, clockspeed)
    print "nop\n" * cycles

elif cycles < min_one_byte_delay():
    print nop_delay(cycles, clockspeed)

elif cycles < max_one_byte_delay():
    print one_byte_delay(cycles, clockspeed)

elif cycles < max_nested_delay():
    print nested_delay(cycles, clockspeed)

else:
    print """ERROR:
    Maximum delay: %(max_cycles)i cycles (%(max_time)fms at %(clockspeed)iHz)
""" % {
        'max_cycles': max_nested_delay(), 
        'cycles': cycles, 
        'clockspeed': clockspeed,
        'max_time': max_nested_delay() / float(clockspeed) * 1000,
    }
