import sys
import numpy as np
import json
from math import exp
from os.path import isfile

# response file structure
response = {
    'isSuccessful':True,
    'errorMessage':None
}

def generate_matrix(cc, delta_c):
    max_occupancy = max(list(map(abs, cc)))  # makes sense? this prevents filtered value more than max(cc)

    M = np.zeros((len(delta_c), max_occupancy+1), dtype=np.float16)
    N = np.zeros((len(delta_c), max_occupancy+1), dtype=np.uint16)
    
    global response
    if max_occupancy > 500:  # large occupancy will take very long to process
        response['isSuccessful'] = False
        response['errorMessage'] = 'Occupancy (%d) too large' % max_occupancy
        with open(response_file, 'w') as f:
            f.write(json.dumps(response))
        sys.exit()

    for t, delta_c_ in enumerate(delta_c):

        # first row initialization for M and N
        if t == 0:
            M[t,0] = 1
            continue

        # calculate mean and var of gaussian
        mean = delta_c_
        var = abs(delta_c_) if delta_c_ != 0 else 1
        
        # fill up each element of M and N in a row, keeping the sum of probabilities
        sum_prob = 0
        for c in range(M.shape[1]):

            # for a particular c, want to find which k it comes from that gives the highest probability
            probs = np.zeros((M.shape[1], ), dtype=np.float16)
            for k in range(M.shape[1]):

                # gaussian probability
                prob = M[t-1,k] * exp(-1 * ((c - k) - mean)**2 / (2 * var))
                probs[k] = prob

            M[t,c] = np.amax(probs)
            N[t,c] = np.argmax(probs)
            sum_prob  = sum_prob + M[t,c]

        # normalize the row
        M[t] = M[t] / sum_prob
        
    return M, N
    
def predict_occupancy(N, backtrack_start=0):
    backtrack = backtrack_start  # start backtrack with occupancy 0 because it is the end of operating hours
    cc_filtered = [backtrack]

    # go from last row of N all the way to the first row
    for t in range(N.shape[0]-1, 0, -1):

        backtrack = N[t,backtrack]
        cc_filtered.append(backtrack)
    
    cc_filtered.reverse()
    return cc_filtered
        
def main(argv):
    assert len(argv) == 3
    input_file = argv[0]
    output_file = argv[1]
    response_file = argv[2]

    # read raw occupancy for the whole day
    if not isfile(input_file):
        response['isSuccessful'] = False
        response['errorMessage'] = 'Invalid file %s' % input_file
        with open(response_file, 'w') as f:
            f.write(json.dumps(response))
        sys.exit()
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    cc, delta_c, times = [], [], []
    for i in range(len(data)):
        cc.append(data[i]['occupancy'])
        times.append(data[i]['timestamp'])
        if i == 0:
            delta_c.append(data[i]['occupancy'])
        else:
            delta_c.append(data[i]['occupancy'] - data[i-1]['occupancy'])
    
    # generate gaussian probability matrix
    assert len(delta_c) == len(cc)
    M, N = generate_matrix(cc, delta_c)
    
    # make prediction
    cc_filtered = predict_occupancy(N)
    assert len(cc) == len(cc_filtered)
    
    # save to output
    json_output = []
    for i in range(len(cc_filtered)):
        json_output.append({
            'timestamp':int(times[i]),
            'occupancy':int(cc_filtered[i])
        })
    
    with open(output_file, 'w') as f:
        f.write(json.dumps(json_output))
    with open(response_file, 'w') as f:
        f.write(json.dumps(response))

if __name__ == '__main__':
    main(sys.argv[1:])