#!/usr/bin/env python3
''' 
        ;     ; User
        ; --- ; "username": "u241117"
    +++ ; --- ; "user_id": 20391,
        ; --- ; "groupname": "ifmto",
    +++ ; --- ; "group_id": 1597,
    +++ ; --- ; "account": "ku0646",
        ; --- ; "parent_accounts": "/root/dkrz/ku0646/ku0646",

        ;     ; Job configuration
    +++ ; --- ; "jobname": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job",
        ; --- ; "job_name": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job",
        ; --- ; "work_dir": "/mnt/lustre01/work/ku0646/u241117/TiME/1deg_res/build_dbg",
        ;     ; "time_limit": 1800,
    +++ ;     ; "total_cpus": 48,
    +++ ;     ; "total_nodes": 1,
    +++ ;     ; "ntasks_per_node": 1,
    +++ ;     ; "ntasks": 1,
    +++ ;     ; "cpus_per_task": 1,

        ;     ; Job runtime statistics
    +++ ; --- ; "jobid": 19611958,
    +++ ;     ; "cluster": "mistral",
    +++ ;     ; "nodes": " m11275 ",
    +++ ;     ; "partition": "compute",
    +++ ;     ; "@start": "2020-02-21T13:41:25",
    +++ ;     ; "@end": "2020-02-21T14:00:48",
        ;     ; "@eligible": "2020-02-21T13:41:23",
        ;     ; "@submit": "2020-02-21T13:41:23",
    +++ ;     ; "exit_code": "0:0",
    +++ ;     ; "state": "CANCELLED",
    +++ ;     ; "elapsed": 1163,
        ;     ; "cpu_hours": 15.506667,

        ;     ; Other
        ; --- ; "std_in": "/dev/null",
        ; --- ; "std_out": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.out",
        ; --- ; "std_err": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.err",
        ;     ; "pack_job_id": 0,
        ;     ; "qos": "normal",
        ;     ; "alloc_node": "mlogin100",
        ;     ; "pack_job_offset": 0,
        ;     ; "derived_ec": "0:0",
        ;     ; "queue_wait": 2,

'''

import os
#import time
#import json
#from difflib import SequenceMatcher
import numpy as np
import pandas as pd


if __name__ == '__main__':
    FNS = [
        'job_codings_v4_confidential.csv',
        #'job_codings_v3_confidential.csv',
        #'job_metadata_confidential.csv',
        ]

    for in_fn in FNS:
        #(name, ext) = os.path.splitext(in_fn)
        out_fn = in_fn.replace('_confidential', '')
        if not os.path.exists(out_fn):
            print('Processing %s' % in_fn)
            df = pd.read_csv(in_fn)
            df['jobid'] = df['jobid'] ^ 22897682
            if 'user_id' in df:
                df['user_id'] = df['user_id'] ^ 90235
            if 'grou_id' in df:
                df['group_id'] = df['group_id'] ^ 30235
            if 'account' in df:
                df.drop(['account', 'job_name', 'nodes'], inplace=True, axis=1)
            df.to_csv(out_fn, index=False)
        else:
            print('Skipping %s. File exists.' % in_fn)