Anonymizer script
This commit is contained in:
		
							parent
							
								
									d96faa0fe8
								
							
						
					
					
						commit
						8a303528ab
					
				
							
								
								
									
										78
									
								
								datasets/anonymise.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										78
									
								
								datasets/anonymise.py
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,78 @@ | ||||
| #!/usr/bin/env python3 | ||||
| '''  | ||||
|         ;     ; User | ||||
|         ; --- ; "username": "u241117" | ||||
|     +++ ; --- ; "user_id": 20391, | ||||
|         ; --- ; "groupname": "ifmto", | ||||
|     +++ ; --- ; "group_id": 1597, | ||||
|     +++ ; --- ; "account": "ku0646", | ||||
|         ; --- ; "parent_accounts": "/root/dkrz/ku0646/ku0646", | ||||
| 
 | ||||
|         ;     ; Job configuration | ||||
|     +++ ; --- ; "jobname": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job", | ||||
|         ; --- ; "job_name": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.ddt.job", | ||||
|         ; --- ; "work_dir": "/mnt/lustre01/work/ku0646/u241117/TiME/1deg_res/build_dbg", | ||||
|         ;     ; "time_limit": 1800, | ||||
|     +++ ;     ; "total_cpus": 48, | ||||
|     +++ ;     ; "total_nodes": 1, | ||||
|     +++ ;     ; "ntasks_per_node": 1, | ||||
|     +++ ;     ; "ntasks": 1, | ||||
|     +++ ;     ; "cpus_per_task": 1, | ||||
| 
 | ||||
|         ;     ; Job runtime statistics | ||||
|     +++ ; --- ; "jobid": 19611958, | ||||
|     +++ ;     ; "cluster": "mistral", | ||||
|     +++ ;     ; "nodes": " m11275 ", | ||||
|     +++ ;     ; "partition": "compute", | ||||
|     +++ ;     ; "@start": "2020-02-21T13:41:25", | ||||
|     +++ ;     ; "@end": "2020-02-21T14:00:48", | ||||
|         ;     ; "@eligible": "2020-02-21T13:41:23", | ||||
|         ;     ; "@submit": "2020-02-21T13:41:23", | ||||
|     +++ ;     ; "exit_code": "0:0", | ||||
|     +++ ;     ; "state": "CANCELLED", | ||||
|     +++ ;     ; "elapsed": 1163, | ||||
|         ;     ; "cpu_hours": 15.506667, | ||||
| 
 | ||||
|         ;     ; Other | ||||
|         ; --- ; "std_in": "/dev/null", | ||||
|         ; --- ; "std_out": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.out", | ||||
|         ; --- ; "std_err": "/home/zmaw/u241117/wr-work/TiME/1deg_res/build_dbg/time.%j.err", | ||||
|         ;     ; "pack_job_id": 0, | ||||
|         ;     ; "qos": "normal", | ||||
|         ;     ; "alloc_node": "mlogin100", | ||||
|         ;     ; "pack_job_offset": 0, | ||||
|         ;     ; "derived_ec": "0:0", | ||||
|         ;     ; "queue_wait": 2, | ||||
| 
 | ||||
| ''' | ||||
| 
 | ||||
| import os | ||||
| #import time | ||||
| #import json | ||||
| #from difflib import SequenceMatcher | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     FNS = [ | ||||
|         'job_codings_v3_confidential.csv', | ||||
|         'job_metadata_confidential.csv', | ||||
|         ] | ||||
| 
 | ||||
|     for in_fn in FNS: | ||||
|         #(name, ext) = os.path.splitext(in_fn) | ||||
|         out_fn = in_fn.replace('_confidential', '') | ||||
|         if not os.path.exists(out_fn): | ||||
|             print('Processing %s' % in_fn) | ||||
|             df = pd.read_csv(in_fn) | ||||
|             df['jobid'] = df['jobid'] ^ 22897682 | ||||
|             if 'user_id' in df: | ||||
|                 df['user_id'] = df['user_id'] ^ 90235 | ||||
|             if 'grou_id' in df: | ||||
|                 df['group_id'] = df['group_id'] ^ 30235 | ||||
|             if 'account' in df: | ||||
|                 df.drop(['account', 'job_name', 'nodes'], inplace=True, axis=1) | ||||
|             df.to_csv(out_fn, index=False) | ||||
|         else: | ||||
|             print('Skipping %s. File exists.' % in_fn) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user