static char dqs_schedule_rcsid[]="$Id: dqs_schedule.c,v 1.1.1.1 1998/08/18 14:39:13 green Exp $";

/*----------------------------------------------------
 * dqs_schedule.c Tom Green Mon Jan 31 10:43:00 1994
 *
 * Copyright 1993
 *
 * SUPER COMPUTER COMPUTATIONS RESEARCH INSTITUTE
 *            FLORIDA STATE UNIVERSITY
 *
 *
 * SCRI representatives make no claims about the
 * suitability of this software for any purpose.
 * It is provided "as is" without express or
 * implied warranty.
 *
 * $Log: dqs_schedule.c,v $
 * Revision 1.1.1.1  1998/08/18 14:39:13  green
 * DQS 3.2.0.5 WIP Import
 *
 * Revision 1.4  1998/02/03 19:49:30  decker
 * Added note to log, concerning queue in alarm mode
 * Few minor changes for new revision 3.2 s
 *
 * Revision 1.3  1997/12/15 20:40:34  decker
 * Cleaned up Brackets to fix mem leak. (Ron Lee Idea)
 *
 * Revision 1.2  1997/04/15 13:34:42  green
 * added Curtis Janssen's MAXUJOBS patch to scheduler
 *
 * Revision 1.1.1.1  1997/04/10 15:10:33  green
 * DQS 3.1.3.4.1 Distribution
 *
 * Revision 3.35  1997/03/07 14:03:30  nrl
 * Replaced NULL in all string_insert calls for master_queue_exec_str
 * to plug a memory leak.
 *
 * Revision 3.34  1996/11/20 23:04:13  nrl
 * Several fixes submitted by or as a result of investigations by
 * Ron Lee, Bodo Bechenback, Guntram Wolski and Frank Dwyyer.
 *
 * Revision 3.33  1996/09/18  18:34:24  nrl
 * A serious flaw crept in to dqs_schedule.c for the DQS 3.1.3.4
 * release.. The line "lp = Job_head;" was delete by mistake.
 *
 * Revision 3.32  1996/08/26  14:20:57  nrl
 * Incorporated SCRI scheduling changes
 *
 * Revision 3.31  1996/07/10  21:13:21  nrl
 * Fixed an agregous error where lp pointer was missing
 * in job->subpriority
 *
 * Revision 3.30  1996/07/10  20:46:13  nrl
 * Gevious typo error in subpriority loop
 *
 * Revision 3.29  1996/06/27  01:55:59  nrl
 * changes to accomodate osf gcc
 *
 * Revision 3.28  1996/06/17  02:29:06  nrl
 * Updtaes from Guntram Wolski, Ron Lee, John Makosky and
 * Bodo Beckebach
 *
 * Revision 3.27  1996/03/26  00:03:22  nrl
 * changed hold/release to explicit use of hold types
 *
 * Revision 3.26  1996/03/22  04:21:05  nrl
 * Added error cataloguing number to all routines
 *
 * Revision 3.25  1996/03/19  23:27:25  nrl
 * added capability to clean up consumable resources whenthey
 * get out of sync with reality
 *
 * Revision 3.24  1996/03/17  00:57:50  nrl
 * merge in qsub prevalidation scheme and consumable restoration
 *
 * Revision 3.23  1996/03/14  03:16:27  nrl
 * merge in subordinate queues and consumable resource changes
 *
 * Revision 3.22  1996/03/12  17:12:32  nrl
 * removed aborts and replaced with an error messaging scheme
 * to send email to the dqs adminsitrator and wait for
 * actions by that administrator
 *
 * Revision 3.21  1996/02/19  19:02:23  nrl
 * added a separate subpriority field, pluys scheduling_flags and
 * job_seq_number to remove the 3.1.2.4 kludges , modified the
 * scheduling algorith once again
 *
 * Revision 3.20  1996/02/07  13:08:17  nrl
 * Added "process leader" and TMP_FILES link capability
 *
 * Revision 3.19  1996/01/19  20:59:01  nrl
 * merged SCRI code and new job and queue structure changes
 *
 * Revision 3.18  1995/06/27  22:35:05  nrl
 * fixed MAXUJOBS test for gteater than OR EQUAL TO...
 *
 * Revision 3.17  1995/06/22  19:31:10  nrl
 * Added kludgie "subpriority" field to differentiate jobs from the
 * same user. Also fixed MAXUJOBS and added "MAXU" warning to
 * the "qstat" command.
 *
 * Revision 3.16  1995/06/21  16:57:48  nrl
 * Major scheduling changes... added a subpriority field to manage
 * things within the user submitted priority. Added priority info to the
 * accounting file.
 *
 * Revision 3.15  1995/06/15  11:39:46  nrl
 * Changed accounting info for Solaris and AIX.
 *
 * Revision 3.14  1995/05/26  19:07:40  nrl
 * Cleaned up signal handling and the notify option with the
 * help of Ron Lee.
 *
 * Revision 3.13  1995/05/14  18:28:55  nrl
 * Plugged one hole in dqs_execd and qmaster handhsaking...
 * added gethostbyname calls to overcome problems with some
 * systems
 *
 * Revision 3.12  1995/03/07  21:06:23  nrl
 * Running jobs were unnecessarily being put through the
 * dqs_schedule_job routine.
 *
 * Revision 3.11  1995/03/06  22:45:16  nrl
 * changed more ERROR prints to DPRINTF since they are informational only
 *
 * Revision 3.10  1995/03/06  12:31:28  nrl
 * Cleaned up annotation for dqs_schedule
 *
 * Revision 3.9  1995/03/05  16:22:40  nrl
 * Moved some scheduling messages to DPRINTF from ERROR
 *
 * Revision 3.8  1995/03/05  03:47:32  nrl
 * Included Axel Brandes job scheduling mechanism to keep one
 * user from hawging the queue.
 *
 * Revision 3.7  1995/02/21  20:37:00  nrl
 * fixed write_queue_to)disk as per A.brandes in schedule.c
 *
 * Revision 3.6  1995/02/01  23:17:35  nrl
 * Tidied up and hopefully bulletproofed "tid" management. Reversed
 * tid file naming to sort by time.
 *
 * Revision 3.5  1995/01/30  15:22:03  nrl
 * added "tid" verification between execd and qmaster to prevent
 * "ghost" jobs from persisting in visible queue. Changed ERROR messages
 * which were for information only to DEBUG messages.
 *
 * Revision 3.4  1994/08/02  23:11:11  green
 * added support for a crude job staging mechanism
 *
 * Revision 3.3  1994/06/03  03:44:04  green
 * updated to support p4/mpi
 *
 * Revision 3.2  1994/06/03  00:25:51  green
 * replaced "DQSX_STR12" with "master_queue_exec_str" in support of MPI
 * mods
 *
 * Revision 3.1  1994/03/24  19:04:47  green
 * had some static holdovers of DQS_EXECD_SERVICE that in reality should
 * have been conf.dqs_execd_service.
 *
 * Revision 3.0  1994/03/07  04:14:26  green
 * 3.0 freeze
 *
 * Revision 1.1.1.1  1994/02/01  17:57:46  green
 * DQS 3.0 ALPHA
 *
 *--------------------------------------------------*/


#include "h.h"
#include "def.h"
#include "dqs.h"
#include "struct.h"
#include "func.h"
#include "globals.h"
#include "dqs_errno.h"

/************************************************************************/
int dqs_schedule(job)
     dqs_job_type  *job;
     
{
  
  int           status, user_job_count, new_priority, old_priority;
  int           newcomer,min_maxjob;
  static u_long now=999999;
  static u_long then=0;
  dqs_list_type listel;
  dqs_list_type *lp;
  dqs_list_type *lp2;
  
  int           jobs_scanned;
  dqs_list_type *tmp_lp1, *tmp_lp2;
  
  DENTER((DQS_EVENT,"dqs_schedule"));
  
  if ((!job)&&!jobs_to_start)
    { /* qmaster "batch" scheduling on a regular interval */
      now=dqs_get_gmt();
      if (((u_long)labs(now-then)) > conf.schedule_time)  {reschedule=TRUE;}
      
      if (!reschedule)
	{
	  DEXIT;
	  return(0);
	}
      reschedule=FALSE;
      then=now;
    }
  
  dqs_update_queue_state();
  
  if (job) 
    { /* only one who should do this is "QSUB" */
      status=dqs_do_schedule(job);
      DEXIT;
      return(status);
    }
  
  Queue_head=dqs_sort_queues(Queue_head);
  
  /*********************************************************************
									The following code replaces the kludges which begin in DQS 3.1.2.3.
									A separate subpriority field has been added to the job information.
									This subpriority field is used as a secondary sort key to differentiate
									jobs submistted at the same priority level. This differentiation
									can be adjusted at each site but the default supplied here consists
									of decrementing the subpriority for each job the user has queued
									during this scheduling pass. This includes running jobs. The jobs
									are then sorted  with  priority, subpriority and job_number as the
									pimary, secondary and tertiary keys, respectively.
									
									Each queue has its own limit of the maximum jobs a user is permitted
									to queue for that resource. If more jobs appear in the queue for
									that resource, the excess will NOT be cosndidered during this
									scheduling pass but the job will be queued unless:
									
									During the pre_validation phase of a newly qsub'd job a number
									of criteria are examined to see if the job qualifiies for
									queueing. These criteria are:
									
									1. Actual existence of the resource at the time of submission,
									even of the resource is in use at the time, or the user has chosen
									to override this situation with the "-W" (wait for resource)
									flag in the qsub command line.
									
									2. The total number of jobs for a user that are in the RUNNING state.
									The minimum "max_user_jobs" for all queues which this used has
									jobs queued.. including the newly submitted job being analyzed
									If the max_user_jobs for the queue(s) appropriate for the submitted
									job are smaller than the number of RUNNING jobs for this user then
									the submitted jobs is rejected.. and an error message sent back
									to QSUB.
									
									In addition each queue now contains a paramater "last_user_delay" which
									can be set to the number of seconds before a uaer job will be considered
									for execution in a queue which that user has just finished a job. This
									measure prevents user's from "capturing" a queue by spawnimg a new job
									for that queue before terminating the current job.
									
  */
  
  
  
  
  
  /****          clean up and save all newly submitted  jobs    ********/
  /*****        and reset everyone's subpriorities              ********/
  
  lp = Job_head;
  
  while (lp) {                                   
    if(!lp->job){
      lp= lp->next;
      continue;
    }
    if (lp->job->scheduling_flags  & NEWCOMER_FLAG) {
      lp->job->scheduling_flags=0;
      lp->job->schedule_seq_num=0;
      lp->int0 = 0;   /* force an immediate scheduling attempt */
      dqs_write_job_to_disk (lp->job);
    }
    lp->job->schedule_seq_num= lp->job->subpriority; /* retain previous pass history */
    lp->job->subpriority=0;
    lp->job->scheduling_flags=0;
    DPRINTF ((DQS_EVENT, "Clean Job %08x",lp->job->priority));
    
    lp = lp->next;
  }
  
  /*****  the subpriority field contains a count of the number of jobs   ***/
  /*****  for a user  at the same priority                               ***/
  
  lp = Job_head;
  while (lp) {
    /* find first job in this list for a user */
    if(lp->job->subpriority>0){
      lp = lp->next;
      continue;
    }          
    user_job_count=0;
    min_maxjob=conf.maxujobs; /* keep track during pass */
    new_priority= lp->job->priority ;
    
    tmp_lp1 = lp;  /* continue searching for this user's jobs */
    
    while (tmp_lp1) {
      DPRINTF ((DQS_EVENT, "lp=%s tmp_lp1=%s",lp->job->dqs_job_name,tmp_lp1->job->dqs_job_name));
      if ( tmp_lp1->job->uid == lp->job->uid ){
	user_job_count++;
	/*
	  if (tmp_lp1->job->master_queue) {
	  listel.queue=dqs_locate_queue(tmp_lp1->job->master_queue);
	  DPRINTF ((DQS_EVENT, "Found max_job of %d",listel.queue->max_user_jobs));
	  if (listel.queue->max_user_jobs < min_maxjob) 
	  min_maxjob = listel.queue->max_user_jobs;
	  DPRINTF ((DQS_EVENT, "Set min_maxjob to %d",min_maxjob));
	  }
	  
	  if (user_job_count > min_maxjob) {
	  tmp_lp1->job->scheduling_flags = MAX_JOBS_EXCEEDED; 
	  DPRINTF ((DQS_EVENT, "SET max_jobs_exceeded FOR JOB %s",tmp_lp1->job->dqs_job_name));
	  } 
	  
	  if (tmp_lp1->job->priority == lp->job->priority) {
	  tmp_lp1->job->subpriority = user_job_count;
	  }
	  
	  } 
	*/
	if(user_job_count>=conf.maxujobs) {
	  tmp_lp1->job->scheduling_flags=  MAX_JOBS_EXCEEDED;  /* set flag */
	}
	else {
	  if( tmp_lp1->job->priority == lp->job->priority)
	    tmp_lp1->job->subpriority= user_job_count;
	}
      }
      
      tmp_lp1 = tmp_lp1->next;
    }
    
    DPRINTF ((DQS_EVENT, "SubPriority %08x",lp->job->priority));
    
    lp = lp->next;
  }
  
  /***----------------------------------------------------------------*/
  /***      Sort job list placing highest priorities at the head of   */
  /***      the list (highest priority is largest numerical value     */
  /***                for the priority field and the lowest value     */
  /***                in the subpriority field.                       */
  /***      The job number is the third sort key, in order to         */
  /***      the original submission order within the priority scheme  */
  /***    Any jobs marked as MAX_JOBS_EXCEEDED are moved to the       */
  /***    end of the sort sequence so they won't get considered for   */
  /***    scheduling                                                  */
  /***----------------------------------------------------------------*/
  lp = Job_head;
  while ( (lp) && (lp->next) ) {
    
    /* most entries will be in the correct order                 */
    if( dqs_test_priority(  lp->job, lp->next->job)) {
      lp = lp->next;
      continue;
    }
    
    /* at the break in descending priority  sequence we assume that */
    /* the odd entry may be one of a kind so we take this one entry */
    /*  and move it up the list to where it belongs                 */
    /*    remove the "odd" entry from the list and make it tmp_lp1 */
    
    tmp_lp1 = lp->next;
    lp->next = lp->next->next;
    
    if ( dqs_test_priority(tmp_lp1->job, Job_head->job) ) {
      
      /* the "odd" entry is a higher priority than head of the list    */
      /* move it to the top and move the Job Head down to the next entry  */ 
      
      tmp_lp1->next = Job_head;
      Job_head = tmp_lp1;
    } else {
      
      /* the "odd" entry is a lower priority than the Job Head so we   */
      /* begin searching for the entry which is less than the "odd" entry */
      
      tmp_lp2 = Job_head;
      
      /* search until we find an entry which is less than the "odd" entry */
      /* or we find ourselves back where we started                       */
      
      while (tmp_lp2 != lp &&
	     dqs_test_priority(tmp_lp2->next->job,tmp_lp1->job) ) {
	tmp_lp2 = tmp_lp2->next;
	if(!tmp_lp2->next) break;
      }
      
      
      /*  insert the "odd"  entry into the list                */                    
      tmp_lp1->next = tmp_lp2->next;
      tmp_lp2->next = tmp_lp1;
    }
    lp = lp->next;
  }
  
  
  /*** skip to the first potential job to schedule                    ****/
  /*** each time we enter this scheduler we may begin at a different  ****/
  /*** point in the list, since we do not always complete a pass thru ****/
  /*** all jobs at a single entry. This keeps us from tying up the    ****/
  /**** qmaster completely when there are many jobs queued            ****/
  
  lp = Job_head; 
  /*          while ( (lp && lp->int0) || (lp && (lp->job->scheduling_flags  & MAX_JOBS_EXCEEDED) ) ){*/
  while ( (lp && lp->int0) || (lp && (lp->job->priority  & MAX_JOBS_EXCEEDED) ) ){
    lp = lp->next;
  }
  
  if(lp){
    DPRINTF ((DQS_EVENT, "(%d) start scheduling at job %s",
	      time(0), lp->job->dqs_job_name));
  }
  
  jobs_scanned = 0;
  
  while (lp && jobs_scanned < JOBS_SCANNED_PER_PASS)
    {
      
      if(lp->job->scheduling_flags & MAX_JOBS_EXCEEDED) {
	lp = NULL; /* make it look as if we got to the end */
	break; /* end of eligibles */
      }
      
      if (lp->int0) {
	lp = lp->next;        /* skip ones already looked at  this pass*/
	continue;
      }
      else {
	lp->int0 = 1;   /* mark as processed this pass */
	jobs_scanned++;
      }
      
      
      if (lp->job->status!=RUNNING)
	{
	  DPRINTF((DQS_EVENT,"scheduling loop \"%s\"",lp->job->dqs_job_name));
	  
	  tmp_lp1=Job_head;
	  /*
	    lp->job->jobs_per_user=0;
	    while (tmp_lp1) 
	    {
	    if (lp->job->uid == tmp_lp1->job->uid)
	    lp->job->jobs_per_user++;
	    tmp_lp1=tmp_lp1->next;
	    }
	    
	  */
	  
	  if (dqs_do_schedule(lp->job))
	    {
	      DPRINTF((DQS_EVENT,"unable to schedule \"%s\"",lp->job->dqs_job_name));
	    }
	  else
	    {
	      
	      DPRINTF((DQS_EVENT,"about to give \"%s\" away ",lp->job->dqs_job_name));
	      dqs_give_job(lp->job);
	    }
	}
      lp=lp->next;
    }
  
  /*          if (! lp  || (lp->job->scheduling_flags & MAX_JOBS_EXCEEDED) ) {*/
  if (! lp  || (lp->job->priority & MAX_JOBS_EXCEEDED) ) {
    
    /** we have completed a pass through all eligible jobs  */
    /*** reset flags to make all jobs eligible on next pass   */
    
    lp = Job_head;
    while (lp) {
      lp->int0 = 0;
      lp = lp->next;
    }
  }
  if(lp){
    
    /* we have completed a partial pass through the list of jobs */
    DPRINTF ((DQS_EVENT, "(%d) end  partial scheduling (%d, %d)",
	      time(0), jobs_scanned,
	      lp->job->priority));
  }
  
  DEXIT;
  return(0);
  
}
/*********************************************************************/
/*         is job1 a higher "priority" than job2                     */
/*********************************************************************/

int dqs_test_priority( job1, job2 )
     dqs_job_type   *job1;
     dqs_job_type   *job2;
{
  int priority1, priority2;
  DENTER((DQS_EVENT,"dqs_test_priority"));
  
  /* this routine provides the basic test for the job sorting routine */
  /* the first sort key is the submission priority                    */
  /* the submission priority can range from -1024 to +1023            */
  /* this is converted to an internal value from 0 to +2047           */
  
  /*    Any jobs which exceed the maximum permitted for the system    */
  /*    are pushed to the bottom of the list by failing this test     */
  if ( !(job1->scheduling_flags & MAX_JOBS_EXCEEDED) ||
       ! (job2->scheduling_flags & MAX_JOBS_EXCEEDED) ){    
    if  (job1->scheduling_flags & MAX_JOBS_EXCEEDED){
      DEXIT;
      return FALSE;
    }
    if  (job2->scheduling_flags & MAX_JOBS_EXCEEDED){
      DEXIT;
      return TRUE;
    }
  }
  if  (job1->priority > job2->priority){
    DEXIT;
    return TRUE;
  }
  
  
  /*  the current subpriority abd the subpriority computed during the      */
  /*  previous scheduling pass are compared and the largest of these is    */
  /*  used for the second key comparison.                                  */
  /*  This is done to make sure that a user who had a running job          */
  /*  terminate after the last scheduling pass does not have an equal      */
  /*  subpriority to users who had no running jobs terminate since the     */
  /* last scheduling pass                                                  */
  
  if(job1->subpriority > job1->schedule_seq_num)priority1=job1->subpriority;
  else priority1=job1->schedule_seq_num;
  if(job2->subpriority > job2->schedule_seq_num)priority2=job2->subpriority;
  else priority2=job2->schedule_seq_num;
  
  
  if(job1->priority ==  job2->priority){
    
    /* the job with the lowest number of jobs queued ahead of it for         */
    /*  this user is the  "higher priority"  element                         */
    
    if(priority1 < priority2) {  /* inserted DEXIT to keep debug output nice */
      DEXIT;                    /* hajnal@scientist.com */
      return TRUE;
    }
    
    /*******  include job_number as third sort key to prevent things         */
    /*****    from getting too dissarrayed                                   */
    
    if(priority1== priority2){
      if(job1->job_number < job2->job_number){
	DEXIT;
	return TRUE;
      }
    }
  }
  DEXIT;
  return FALSE;
}

/*****************************************************************/
int dqs_do_schedule(job)
     dqs_job_type   *job;
     
{
  
  
  dqs_list_type *lp;
  
  DENTER((DQS_EVENT,"dqs_do_schedule"));;
  
  if (job->status!=IDLE) 
    {
      DPRINTF((DQS_EVENT,"job \"%s\" is not IDLE",job->dqs_job_name));
      DEXIT;
      return(-1);
    } 
  
  if (( VALID(OTHER,job->hold) ) || (VALID(SYSTEM, job->hold) ) ||
      (VALID(USER,job->hold) ) ) {
    DPRINTF((DQS_EVENT,"job \"%s\" is on hold",job->dqs_job_name));
    DEXIT;
    return(-1);
  }
  
  if (dqs_ck_jid_hold_list(job->jid_hold_list))
    {
      DPRINTF((DQS_EVENT,"job \"%s\" is waiting for another to finish",
	       job->dqs_job_name));
      DEXIT;
      return(-1);
    }
  
  if (job->execution_time>dqs_get_gmt())
    {          
      DPRINTF((DQS_EVENT,"job \"%s\" not eligible for execution till %s",
	       job->dqs_job_name,ctime((time_t *)&job->start_time)));
      DEXIT;
      return(-1);
    }
  
  /*----------------------------------------------------------------------------*/
  /* fill hard master request - */
  /* note that we only fill (1) request for master, obviously */
  
  lp=job->hard_master_list;
  while (lp) 
    {
      if (dqs_fill_queue(job,lp->str0,lp->str2,TAIL))
	{
	  DPRINTF((DQS_EVENT,"unable to fill request for HARD MASTER \"%s\"",lp->str0));
	}
      else
	{
	  DPRINTF((DQS_EVENT,"filled HARD MASTER \"%s\"",lp->str0));
	  job->master_queue=dqs_string_insert(NULL,job->granted_destin_identifier_list->str0);
	  job->master_queue_exec_str=dqs_string_insert(job->master_queue_exec_str,job->granted_destin_identifier_list->str2);
	  break;
	}
      DTRACE;
      lp=lp->next;
      DTRACE;
    }
  DTRACE;
  
  if ((!job->master_queue)&&(job->hard_master_list))
    { /* no need to go any further */
      DTRACE;
      dqs_mark_clean(job);
      DEXITE;
      return(-1);
    }
  
  /*----------------------------------------------------------------------------*/
  /* fill hard queue request(s) - */
  /* we must fill ALL of them to successfully schedule */
  
  lp=job->hard_queue_list;
  while (lp)
    {
      if (dqs_fill_queue(job,lp->str0,lp->str2,TAIL))
	{ /* no need to go any further */
	  DPRINTF((DQS_EVENT,"unable to fill request for HARD MASTER \"%s\"",lp->str0));
	  dqs_mark_clean(job);
	  DEXITE;
	  return(-1);
	}
      else
	{
	  DPRINTF((DQS_EVENT,"filled HARD QUEUE \"%s\"",lp->str0));
	  job->master_queue=dqs_string_insert(NULL,job->granted_destin_identifier_list->str0);
	  job->master_queue_exec_str=dqs_string_insert(job->master_queue_exec_str,job->granted_destin_identifier_list->str2);
	}
      lp=lp->next;
      DTRACE;
    }
  DTRACE;
  
  /*----------------------------------------------------------------------------*/
  /* fill hard resource request(s) */
  /* we must fill ALL of them to successfully schedule */
  
  lp=job->hard_resource_list;
  while (lp)
    {
      dqs_select_queues(lp->chain,job);
      if (dqs_tag_queues(job,lp->int0,lp->str2,dqs_length_of_list(lp->chain)))
	{
	  DPRINTF((DQS_EVENT,"unable to fill request for HARD RESOURCE request"));
	  dqs_mark_clean(job);
	  DEXITE;
	  return(-1);
	}
      dqs_clear_suitability();
      lp=lp->next;
    }
  
  /*----------------------------------------------------------------------------*/
  /* fill soft master resource request - if need be and if available */
  /* note that we only fill (1) request for master, obviously */
  
  if (!job->master_queue)
    {
      lp=job->soft_master_list;
      while (lp) 
	{
	  if (dqs_fill_queue(job,lp->str0,lp->str2,HEAD))
	    {
	      DPRINTF((DQS_EVENT,"unable to fill request for SOFT MASTER \"%s\"",lp->str0));
	    }
	  else
	    {
	      DPRINTF((DQS_EVENT,"filled SOFT MASTER \"%s\"",lp->str0));
	      job->master_queue=dqs_string_insert(NULL,job->granted_destin_identifier_list->str0);
	      job->master_queue_exec_str=dqs_string_insert(job->master_queue_exec_str,
							   job->granted_destin_identifier_list->str2);
	      break;
	    }
	  DTRACE;
	  lp=lp->next;
	  DTRACE;
	}
      DTRACE;
    }
  
  /*----------------------------------------------------------------------------*/
  /* fill soft queue request(s) - if available */
  
  lp=job->soft_queue_list;
  while (lp)
    {
      if (dqs_fill_queue(job,lp->str0,lp->str2,TAIL))
	{ /* no need to go any further */
	  DPRINTF((DQS_EVENT,"unable to fill request for HARD MASTER \"%s\"",lp->str0));
	}
      else
	{
	  DPRINTF((DQS_EVENT,"filled HARD QUEUE \"%s\"",lp->str0));
	}
      lp=lp->next;
      DTRACE;
    }
  DTRACE;
  
  /*----------------------------------------------------------------------------*/
  /* fill soft resource request(s) */
  /* we fill as many as possible */
  
  lp=job->soft_resource_list;
  while (lp)
    {
      dqs_select_queues(lp->chain,job);
      dqs_tag_queues(job,lp->int0,lp->str2,dqs_length_of_list(lp->chain));
      dqs_clear_suitability();
      lp=lp->next;
    }
  
  if (job->granted_destin_identifier_list)
    job->granted_destin_identifier_list->int1=MASTER;
  
  if (!job->master_queue)
    {
      if (job->granted_destin_identifier_list)
	{
	  job->master_queue=dqs_string_insert(NULL,
					      job->granted_destin_identifier_list->str0);
	  if (dqs_verify_queue_load (job->master_queue)) {
	    DPRINTF((DQS_EVENT,"We bombed while filling soft requests"));
	    DEXITE;
	    dqs_mark_clean(job);
	    return(-1);
	  }
	  
	  job->master_queue_exec_str=dqs_string_insert(job->master_queue_exec_str,
						       job->granted_destin_identifier_list->str2);
	  DEXIT;
	  return(0);
	}
      else
	{
	  DEXITE;
	  dqs_mark_clean(job);
	  return(-1);
	}
    }
  if (dqs_verify_queue_load (job->master_queue)) {
    DPRINTF((DQS_EVENT,"we bombed while checking the master queue"));
    DEXITE;
    dqs_mark_clean(job);
    return(-1);
  }
  
  
  DEXIT;
  return(0);
  
}

/*****************************************************************/
int dqs_fill_queue(job,qname,exec_str,where)
     dqs_job_type *job;
     char         *qname;
     char         *exec_str;
     int          where;
     
{
  
  u_long32      now;
  dqs_list_type listel;
  dqs_list_type *q_list;
  
  DENTER((DQS_EVENT,"dqs_fill_queue"));
  
  now=dqs_get_gmt();
  
  q_list=Queue_head;
  while (q_list)
    {
      /*  provide delay before the same user can reschedule the same queue  */
      
      if ( (job) && (q_list->queue->last_user_delay> 0 ) ){
	if (q_list->queue->last_user->user) {
	  if (!strcmp(q_list->queue->last_user->user,job->owner)) {
	    if (now < q_list->queue->last_user->int0) {
	      q_list=q_list->next;
	      continue;
	    }
	  }
	}
      }
      
      
      if (dqs_wildmat(qname,q_list->queue->qname)){
#ifdef FRANK
	if (job->jobs_per_user >= q_list->queue->max_user_jobs) {
	  DPRINTF((DQS_EVENT,"Queue %s maxjob is too low",q_list->queue->qname));
	  job->scheduling_flags = MAX_JOBS_EXCEEDED;
	  q_list=q_list->next;
	  continue;
	}
	
	/* clear the MAX_JOBS_EXCEEDED flag, if it's set, since we finally
	   found a queue which has max_user_jobs > jobs_per_user */
	job->scheduling_flags = 0;
#endif
	if (dqs_queue_available(q_list->queue)){
	  q_list=q_list->next;
	  continue;
	}
	
	if (dqs_verify_queue_load (q_list->queue->qname)) {
	  DPRINTF((DQS_EVENT,"queue %s throuwn out due to alarm state",q_list->queue->qname));
	  q_list = q_list->next;
	  continue;
	}
	
	if (!dqs_valid_queue_user(job->owner,q_list->queue)){
	  
	  q_list=q_list->next;
	  continue;
	}
	
	
	DPRINTF((DQS_EVENT,"adding \"%s\" to job->granted_destin_identifier_list",q_list->queue->qname));
	bzero((char *)&listel,sizeof(listel));
	listel.str0=dqs_string_insert(NULL,q_list->queue->qname);
	listel.str1=dqs_string_insert(NULL,q_list->queue->qhostname);
	listel.str2=dqs_string_insert(NULL,exec_str);
	job->granted_destin_identifier_list=dqs_insert(DQS_STR0,where,
						       job->granted_destin_identifier_list,&listel);
	q_list->queue->tagged+=1;
	DEXIT;
	return(0);
      }
      q_list=q_list->next;
    } 
  
  DEXITE;
  return(-1);
  
}

/*****************************************************************/
void dqs_mark_clean(job)
     dqs_job_type   *job;
     
{
  
  dqs_list_type *lp;
  
  DENTER((DQS_EVENT,"dqs_mark_clean"));
  
  job->master_queue=dqs_free(job->master_queue);
  
  if (job->granted_destin_identifier_list)
    {
      DTRACE;
      job->granted_destin_identifier_list=dqs_free_list(job->granted_destin_identifier_list);
    }
  
  lp=Queue_head;
  while (lp) 
    {
      lp->queue->suitable=FALSE;
      lp->queue->tagged=FALSE;
      lp->queue->suitability_level=0;
      lp=lp->next;
    }
  
  DEXIT;
  return;
  
}

/*****************************************************************/
void dqs_give_job(job)
     dqs_job_type   *job;
     
{
  
  int              sfd=0;
  dqs_list_type    listel;
  dqs_list_type    *lp;    
  DENTER((DQS_EVENT,"dqs_give_job"));
  
  DPRINTF((DQS_EVENT,"=====job->granted_destin_identifier_list===="));
  dqs_showlist(job->granted_destin_identifier_list,DQS_STR0|DQS_DEBUG,4);
  DPRINTF((DQS_EVENT,"==============================================================="));
  
  if (!job->granted_destin_identifier_list)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0419 ERROR-NULL-granted_destin_identifier_list-ERROR-ERROR-ERROR-"));
      return;
    }
  if (!job->master_queue)
    {
      ERROR((DQS_EVENT,"DQS_ERROR_0420 ERROR-NULL-master_queue-ERROR-ERROR-ERROR-ERROR-ERROR-ERROR-"));
      return;
    }
  
  bzero((char *)&listel,sizeof(listel));
  listel.type=JOB_EXECUTION;
  listel.job=job;
  
  listel.queue=dqs_locate_queue(job->master_queue);
  
  if (!listel.queue)
    {
      CRITICAL((DQS_EVENT,"DQS_ERROR_0421 error: unable to locate queue \"%s\"",job->master_queue));
      DEXITE;
      abort();
    }
  dqs_consume_resource(job);
  dqs_mark_subordinated(listel.queue);     
  if ((sfd=dqs_send_list(listel.queue->qhostname,conf.dqs_execd_service,sfd,&listel))<0)
    {
      dqs_close_sfd(sfd);
      ERROR((DQS_EVENT,"DQS_ERROR_0422 error: unable to handoff job \"%s\" to queue \"%s\"",
	     job->dqs_job_name,job->master_queue));
      dqs_mark_clean(job);
      dqs_mark_unheard(listel.queue->qhostname);
      
      SETBIT(HANDOFF_ERROR,listel.queue->state);   /* flag a problem till someone looks at the problem*/
      DEXITE;
      return;
    }
  
  
  dqs_close_sfd(sfd);
  
  dqs_commit_job(job);
  
  DPRINTF((DQS_EVENT,
	   "successfully handed off job \"%s\" to queue \"%s\"",
	   job->dqs_job_name, job->master_queue));
  DPRINTF((DQS_EVENT,
	   "startup context of job \"%s\": load_avg=%3d load_alarm=%3d %d",
	   job->dqs_job_name,
	   listel.queue->load_avg, listel.queue->load_alarm,
	   listel.queue->load_avg < listel.queue->load_alarm));
  
  CLEARBIT(HANDOFF_ERROR,listel.queue->state);
  DEXIT;
  return;
  
}

/*****************************************************************/
void dqs_commit_job(job)
     dqs_job_type *job;
     
     /*
       dqs_commit_job - commits that a job has been started
       
       several things need to be done,
       mark granted resources as "";
       tag respective queues;
       head of granted_destination_id marked as "master"
       spool dirty queues to disk
     */
     
{
  int           i;
  int           master_hit=FALSE;
  dqs_list_type listel;
  dqs_list_type *lp;
  
  DENTER((DQS_EVENT,"dqs_commit_job"));
  
  job->status=RUNNING;
  job->state=RUNNING;
  
  job->granted_destin_identifier_list->int0=RUNNING; /* master is running */
  
  lp=Queue_head;
  while (lp)
    {
      if (lp->queue->tagged)
	{
	  for (i=0;i<lp->queue->tagged;i++)
	    {
	      bzero((char *)&listel,sizeof(listel));
	      listel.str0=dqs_string_insert(NULL,job->dqs_job_name);
	      listel.int0=job->job_number;
	      listel.int1=SLAVE;
	      if (!master_hit)
		{
		  if (!strcmp(job->master_queue,lp->queue->qname))
		    {
		      listel.int1=MASTER;
		      master_hit=TRUE;
		    }
		}
	      lp->queue->job_list=dqs_insert(DQS_INT0,ASCENDING,lp->queue->job_list,&listel);
	    }
	  lp->queue->qty_active=lp->queue->qty_active+lp->queue->tagged;
	  lp->queue->tagged=FALSE;
	  lp->queue->suitability_level=0;
	  dqs_write_queue_to_disk(lp->queue);
	}
      lp=lp->next;
    }
  
  job->start_time=dqs_get_gmt();
  dqs_write_job_to_disk(job);
  
  DEXIT;
  return;
}

/*****************************************************************/
int dqs_ck_jid_hold_list(lp)
     dqs_list_type *lp;
     
{
  
  dqs_list_type *hlp,*jlp;
  
  DENTER((DQS_EVENT,"dqs_ck_jid_hold_list"));
  
  hlp=lp;
  
  while (hlp)
    {
      jlp=Job_head;
      while (jlp)
	{
	  if (jlp->job->job_number==hlp->int0)
	    {
	      DPRINTF((DQS_EVENT,"JID %d must wait until JID has completed",
		       hlp->int0,jlp->job->job_number));
	      return(-1);
	    }
	  jlp=jlp->next;
	}
      hlp=hlp->next;
    }
  
  DEXIT;
  return(0);
  
}
/********************************************************/
int dqs_verify_queue_load (qname)
     char* qname;
{
  dqs_list_type *q_list = (dqs_list_type *) NULL;
  DENTER((DQS_EVENT,"dqs_verify_load"));
  q_list=Queue_head;
  while (q_list) {
    if (! strcmp (qname, q_list->queue->qname)) {
      if (q_list->queue->load_avg <
	  q_list->queue->load_alarm) {
	return (0);
      } else {
	DPRINTF((DQS_EVENT,"Load alarm is set for queue"));
	ERROR((DQS_EVENT,"Queue alarm is on, check load average"));
	return (-1);
	
	
      }
    }
    q_list = q_list->next;
  }
  return (-1);
}



























































