Anonymizer script
This commit is contained in:
parent
d96faa0fe8
commit
8a303528ab
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env python3
|
||||
'''
|
||||
; ; User
|
||||
; --- ; "username": "u241117"
|
||||
+++ ; --- ; "user_id": 20391,
|
||||
; --- ; "groupname": "ifmto",
|
||||
+++ ; --- ; "group_id": 1597,
|
||||
+++ ; --- ; "account": "ku0646",
|
||||
; --- ; "parent_accounts": "/root/dkrz/ku0646/ku0646",
|
||||
|
||||
; ; Job configuration
|
||||
+++ ; --- ; "jobname": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job",
|
||||
; --- ; "job_name": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job",
|
||||
; --- ; "work_dir": "/mnt/lustre01/work/ku0646/u241117/TiME/1deg_res/build_dbg",
|
||||
; ; "time_limit": 1800,
|
||||
+++ ; ; "total_cpus": 48,
|
||||
+++ ; ; "total_nodes": 1,
|
||||
+++ ; ; "ntasks_per_node": 1,
|
||||
+++ ; ; "ntasks": 1,
|
||||
+++ ; ; "cpus_per_task": 1,
|
||||
|
||||
; ; Job runtime statistics
|
||||
+++ ; --- ; "jobid": 19611958,
|
||||
+++ ; ; "cluster": "mistral",
|
||||
+++ ; ; "nodes": " m11275 ",
|
||||
+++ ; ; "partition": "compute",
|
||||
+++ ; ; "@start": "2020-02-21T13:41:25",
|
||||
+++ ; ; "@end": "2020-02-21T14:00:48",
|
||||
; ; "@eligible": "2020-02-21T13:41:23",
|
||||
; ; "@submit": "2020-02-21T13:41:23",
|
||||
+++ ; ; "exit_code": "0:0",
|
||||
+++ ; ; "state": "CANCELLED",
|
||||
+++ ; ; "elapsed": 1163,
|
||||
; ; "cpu_hours": 15.506667,
|
||||
|
||||
; ; Other
|
||||
; --- ; "std_in": "/dev/null",
|
||||
; --- ; "std_out": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.out",
|
||||
; --- ; "std_err": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.err",
|
||||
; ; "pack_job_id": 0,
|
||||
; ; "qos": "normal",
|
||||
; ; "alloc_node": "mlogin100",
|
||||
; ; "pack_job_offset": 0,
|
||||
; ; "derived_ec": "0:0",
|
||||
; ; "queue_wait": 2,
|
||||
|
||||
'''
|
||||
|
||||
import os
|
||||
#import time
|
||||
#import json
|
||||
#from difflib import SequenceMatcher
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FNS = [
|
||||
'job_codings_v3_confidential.csv',
|
||||
'job_metadata_confidential.csv',
|
||||
]
|
||||
|
||||
for in_fn in FNS:
|
||||
#(name, ext) = os.path.splitext(in_fn)
|
||||
out_fn = in_fn.replace('_confidential', '')
|
||||
if not os.path.exists(out_fn):
|
||||
print('Processing %s' % in_fn)
|
||||
df = pd.read_csv(in_fn)
|
||||
df['jobid'] = df['jobid'] ^ 22897682
|
||||
if 'user_id' in df:
|
||||
df['user_id'] = df['user_id'] ^ 90235
|
||||
if 'grou_id' in df:
|
||||
df['group_id'] = df['group_id'] ^ 30235
|
||||
if 'account' in df:
|
||||
df.drop(['account', 'job_name', 'nodes'], inplace=True, axis=1)
|
||||
df.to_csv(out_fn, index=False)
|
||||
else:
|
||||
print('Skipping %s. File exists.' % in_fn)
|
Loading…
Reference in New Issue