import sys
import numpy as np
import pickle
import json
from time import ctime, time
from os.path import isfile, realpath, dirname, join

# predefined constants
history = 180
model = join(dirname(realpath(__file__)), 'realtime_count.model')

# response file structure
response = {
    'isSuccessful':True,
    'errorMessage':None
}

def fill_missing_data(data):
    end_t = data[-1]['timestamp']
    history_t = end_t - (history * 60)
    start_t = data[0]['timestamp'] if data[0]['timestamp'] > history_t else history_t
    
    now_t = end_t - 60
    for i in range(len(data)-2, -1, -1):        
        if now_t < start_t: break
        
        if now_t > data[i]['timestamp']:
            cc_to_insert = data[i]['occupancy']
            
            while now_t > data[i]['timestamp']:
                data.insert(
                    i+1,
                    {
                        'timestamp': now_t,
                        'occupancy': cc_to_insert
                    }
                )
                now_t = now_t - 60
        
        now_t = now_t - 60
        
    return data

def generate_feature_vector(data, branch_id):
    cc, pd = [], []
    data = sorted(data, key = lambda i: i['timestamp'])
    
    data = fill_missing_data(data)
        
    for i in range(len(data)-history, len(data)):
        if i < 0:
            cc.append(0)
            pd.append(-1)
        else:
            cc.append(data[i]['occupancy'])
            local_datetime = ctime(data[i]['timestamp']).split()
            ts = local_datetime[3]
            hours = ts.split(':')[0]
            if hours in ('07','08','09'):
                pd.append(0)
            elif hours in ('10','11','12'):
                pd.append(1)
            elif hours in ('13','14','15'):
                pd.append(2)
            elif hours in ('16','17','18'):
                pd.append(3)
            elif hours in ('19','20','21'):
                pd.append(4)
            elif hours in ('22','23'):
                pd.append(5)
            else:
                pd.append(-1)
    
    if branch_id in [19463, 19454]:  # Leeuwarden and Nieuwegein are small branches
        ss = 1
    else:
        ss = 2
    
    day = local_datetime[0]
    if day in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']:
        dw = 0
    elif day == 'Sat':
        dw = 1
    else:
        dw = 2
    
    assert len(pd) == len(cc)
    assert len(pd) == history
    x = np.array([ss, dw] + pd + cc).reshape(1,-1)
    
    return x, cc
    
def main(argv):
    tic = time()
    assert len(argv) == 3
    input_file = argv[0]
    output_file = argv[1]
    response_file = argv[2]

    # read raw occupancy data
    if not isfile(input_file):
        response['isSuccessful'] = False
        response['errorMessage'] = 'Invalid file %s' % input_file
        with open(response_file, 'w') as f:
            f.write(json.dumps(response))
        sys.exit()
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # load random forest regressor model
    if not isfile(model):
        response['isSuccessful'] = False
        response['errorMessage'] = 'Model file %s not found in the same directory as %s' % (model, __file__)
        with open(response_file, 'w') as f:
            f.write(json.dumps(response))
        sys.exit()
    with open(model, 'rb') as f:
        rfr = pickle.load(f)
    
    # loop through each branch
    output = []
    for branch_data in data:
    
        # generate feature vector from raw occupancy data
        x, cc = generate_feature_vector(branch_data['historicalOccupancy'], branch_data['branchid'])
        pred = rfr.predict(x)[0]
        output.append({
            'branchid': branch_data['branchid'],
            'offset': int(pred)
        })
    
    # write to file
    with open(output_file, 'w') as f:
        f.write(json.dumps(output))
    with open(response_file, 'w') as f:
        f.write(json.dumps(response))
        
    print(time() - tic)

if __name__ == '__main__':
    main(sys.argv[1:])
