Source code for viewclust_vis.job_scatter

import pandas as pd
import datetime as dt
from datetime import datetime
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go

import viewclust as vc
from viewclust import slurm
from viewclust.target_series import target_series

from viewclust_vis.job_stack import job_stack


[docs]def job_scatter(account, target, d_from, d_to='', d_from_drop='', out_name='', out_path='', plot_jobstack=True, plot_insta=True, plot_cumu=True, plot_mem_delta=False, plot_start_wait=False): """Accepts an account name and query period to generate job usage summary figures. Parameters ------- account: string Name of account for which to query job records (note that Compute Canada systems expect a _cpu or _gpu suffix). target: int-like The target share value for the account on the system (typically expressed as "cores" or "core-equivalents"). d_from: date str Beginning of the query period, e.g. '2019-04-01T00:00:00'. d_to: date str, optional End of the query period, e.g. '2020-01-01T00:00:00'. Defaults to now if empty. d_from_drop: date str, optional Time prior to which to ingnore jobs of any state, e.g. '2019-12-01T00:00:00'. out_path: date str, optional Name of path in which to place the output figure files. Defaults to current path plot_jobstack: boolean, optional If True plot the jobstack figure. Note that for large job record data frames the jobstack figure can take some time to produce. The jobstack figure is a representation of the time periods and and resource size of each job in a job record query. Defaults to True. plot_insta: boolean, optional If True plot the insta_plot figure. The insta_plot is a display of the job record usage measurement at each time point over the query period. Defaults to True. plot_cumu: boolean, optional If True plot the cumu_plot figure. The cumu_plot is a display of the cumulative job record usage measurement at each time point over the query period. Defaults to True. plot_mem_delta: boolean, optional If True plot the mem_delta figure. The mem_delta is a display memory requested (allocated) to each job as well as its peak polled memory (MaxRSS). Defaults to False. plot_start_wait: boolean, optional If True create the start-time by wait-hours scatter plot figure. Defaults to False. Output ------- Requested job usage figures located in the out_path directory """ # d_to boilerplate if d_to == '': d_to = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') # Handle folder creation safe_folder = out_path if safe_folder[-1] != '/': safe_folder += '/' Path(safe_folder).mkdir(parents=True, exist_ok=True) # Perform ES job record query job_frame = slurm.sacct_jobs(account, d_from, d_to=d_to) if d_from_drop != '': job_frame = job_frame[job_frame['start'] > d_from_drop] job_frame = job_frame[job_frame['submit'] > d_from_drop] print(job_frame) print('Number of josb in query: '+str(len(job_frame))) print('Number of jobs in query: '+str(len(job_frame))) job_frame['waittime'] = job_frame['start'] - job_frame['submit'] job_frame['waittime_hours'] = job_frame['waittime'].dt.total_seconds()/3600 job_frame['timelimit_hours'] = job_frame[ 'timelimit'].dt.total_seconds()/3600 job_frame['mem_c'] = job_frame['mem']/job_frame['reqcpus'] fig_viol = px.violin(job_frame, y='priority') fig_viol.write_html(safe_folder + account + out_name + 'violin.html') fig_scat = px.scatter(job_frame, x='waittime_hours', y='priority', opacity=.3, color="partition") fig_scat.update_layout( title=go.layout.Title( text="Job scatter: ", xref="paper", x=0 ), xaxis=go.layout.XAxis( title=go.layout.xaxis.Title( text="Wait time hours", font=dict( family="Courier New, monospace", size=18, color="#7f7f7f" ) ) ), yaxis=go.layout.YAxis( title=go.layout.yaxis.Title( text='Priority', font=dict( family="Courier New, monospace", size=18, color="#7f7f7f" ) ) ) ) fig_scat.write_html(safe_folder + account + out_name + 'scatter.html') fig_hist = px.histogram(job_frame, y='priority', color="partition") fig_hist.write_html(safe_folder + account + out_name + 'histogram_y.html') fig_hist = px.histogram(job_frame, x='waittime_hours', color="partition") fig_hist.write_html(safe_folder + account + out_name + 'histogram_x.html') job_frame_pend = job_frame.copy() job_frame_pend = job_frame_pend[ job_frame_pend['state'].str.match('PENDING')] fig_hist = px.histogram(job_frame_pend, y='priority', color="partition") fig_hist.write_html( safe_folder + account + out_name + 'pend_histogram_y.html') job_frame_run = job_frame.copy() job_frame_run = job_frame_run[job_frame_run['state'].str.match('RUNNING')] fig_hist = px.histogram(job_frame_run, y='priority', color="partition") fig_hist.write_html( safe_folder + account + out_name + 'run_histogram_y.html') fig_scat = px.scatter(job_frame_run, x='mem_c', y='priority', opacity=.3, color="partition", hover_data=['jobid']) fig_scat.update_layout( title=go.layout.Title( text="Job scatter: ", xref="paper", x=0 ), xaxis=go.layout.XAxis( title=go.layout.xaxis.Title( text="Memory per cpu", font=dict( family="Courier New, monospace", size=18, color="#7f7f7f" ) ) ), yaxis=go.layout.YAxis( title=go.layout.yaxis.Title( text='Priority', font=dict( family="Courier New, monospace", size=18, color="#7f7f7f" ) ) ) ) fig_scat.write_html(safe_folder + account + out_name + 'run_scatter.html') return job_frame