#!/usr/bin/env python3 ''' ; ; User ; --- ; "username": "u241117" +++ ; --- ; "user_id": 20391, ; --- ; "groupname": "ifmto", +++ ; --- ; "group_id": 1597, +++ ; --- ; "account": "ku0646", ; --- ; "parent_accounts": "/root/dkrz/ku0646/ku0646", ; ; Job configuration +++ ; --- ; "jobname": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job", ; --- ; "job_name": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job", ; --- ; "work_dir": "/mnt/lustre01/work/ku0646/u241117/TiME/1deg_res/build_dbg", ; ; "time_limit": 1800, +++ ; ; "total_cpus": 48, +++ ; ; "total_nodes": 1, +++ ; ; "ntasks_per_node": 1, +++ ; ; "ntasks": 1, +++ ; ; "cpus_per_task": 1, ; ; Job runtime statistics +++ ; --- ; "jobid": 19611958, +++ ; ; "cluster": "mistral", +++ ; ; "nodes": " m11275 ", +++ ; ; "partition": "compute", +++ ; ; "@start": "2020-02-21T13:41:25", +++ ; ; "@end": "2020-02-21T14:00:48", ; ; "@eligible": "2020-02-21T13:41:23", ; ; "@submit": "2020-02-21T13:41:23", +++ ; ; "exit_code": "0:0", +++ ; ; "state": "CANCELLED", +++ ; ; "elapsed": 1163, ; ; "cpu_hours": 15.506667, ; ; Other ; --- ; "std_in": "/dev/null", ; --- ; "std_out": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.out", ; --- ; "std_err": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.err", ; ; "pack_job_id": 0, ; ; "qos": "normal", ; ; "alloc_node": "mlogin100", ; ; "pack_job_offset": 0, ; ; "derived_ec": "0:0", ; ; "queue_wait": 2, ''' import os #import time #import json #from difflib import SequenceMatcher import numpy as np import pandas as pd if __name__ == '__main__': FNS = [ 'job_codings_v3_confidential.csv', 'job_metadata_confidential.csv', ] for in_fn in FNS: #(name, ext) = os.path.splitext(in_fn) out_fn = in_fn.replace('_confidential', '') if not os.path.exists(out_fn): print('Processing %s' % in_fn) df = pd.read_csv(in_fn) df['jobid'] = df['jobid'] ^ 22897682 if 'user_id' in df: df['user_id'] = df['user_id'] ^ 90235 if 'grou_id' in df: df['group_id'] = df['group_id'] ^ 30235 if 'account' in df: df.drop(['account', 'job_name', 'nodes'], inplace=True, axis=1) df.to_csv(out_fn, index=False) else: print('Skipping %s. File exists.' % in_fn)