[root@offline-almalinux8-193-computing slurm-23.11.0]# ./configure --prefix=/tools/OSS/Slurm/23.11.0 --with-munge[root@offline-almalinux8-193-computing slurm-23.11.0]# make && make install
# slurm.conf file generated by configurator.html.# Put this file on all nodes of your cluster.# See the slurm.conf man page for more information.#ClusterName=ic-design-cluster-001
SlurmctldHost=offline-almalinux8-193-computing.icinfra.cn
SlurmctldHost=offline-almalinux8-194-computing.icinfra.cn
##DisableRootJobs=NO#EnforcePartLimits=NO#Epilog=#EpilogSlurmctld=#FirstJobId=1#MaxJobId=67043328#GresTypes=#GroupUpdateForce=0#GroupUpdateTime=600#JobFileAppend=0#JobRequeue=1#JobSubmitPlugins=lua#KillOnBadExit=0#LaunchType=launch/slurm#Licenses=foo*4,bar#MailProg=/bin/mail#MaxJobCount=10000#MaxStepCount=40000#MaxTasksPerNode=512#MpiDefault=#MpiParams=ports=#-##PluginDir=#PlugStackConfig=#PrivateData=jobsProctrackType=proctrack/cgroup
#Prolog=#PrologFlags=#PrologSlurmctld=#PropagatePrioProcess=0#PropagateResourceLimits=#PropagateResourceLimitsExcept=#RebootProgram=ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=6817
SlurmdPidFile=/var/run/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/spool/slurmd
SlurmUser=slurm
#SlurmdUser=root#SrunEpilog=#SrunProlog=StateSaveLocation=/var/spool/slurmctld
#SwitchType=#TaskEpilog=TaskPlugin=task/affinity,task/cgroup
#TaskProlog=#TopologyPlugin=topology/tree#TmpFS=/tmp#TrackWCKey=no#TreeWidth=#UnkillableStepProgram=#UsePAM=0### TIMERS#BatchStartTimeout=10#CompleteWait=0#EpilogMsgTime=2000#GetEnvTimeout=2#HealthCheckInterval=0#HealthCheckProgram=InactiveLimit=0
KillWait=30
#MessageTimeout=10#ResvOverRun=0MinJobAge=300
#OverTimeLimit=0SlurmctldTimeout=120
SlurmdTimeout=300
#UnkillableStepTimeout=60#VSizeFactor=0Waittime=0
### SCHEDULING#DefMemPerCPU=0#MaxMemPerCPU=0#SchedulerTimeSlice=30SchedulerType=sched/backfill
SelectType=select/cons_tres
### JOB PRIORITY#PriorityFlags=#PriorityType=priority/multifactor#PriorityDecayHalfLife=#PriorityCalcPeriod=#PriorityFavorSmall=#PriorityMaxAge=#PriorityUsageResetPeriod=#PriorityWeightAge=#PriorityWeightFairshare=#PriorityWeightJobSize=#PriorityWeightPartition=#PriorityWeightQOS=### LOGGING AND ACCOUNTING#AccountingStorageEnforce=0#AccountingStorageHost=#AccountingStoragePass=#AccountingStoragePort=#AccountingStorageType=#AccountingStorageUser=#AccountingStoreFlags=#JobCompHost=#JobCompLoc=#JobCompParams=#JobCompPass=#JobCompPort=JobCompType=jobcomp/none
#JobCompUser=#JobContainerType=JobAcctGatherFrequency=30
#JobAcctGatherType=SlurmctldDebug=info
SlurmctldLogFile=/var/log/slurmctld.log
SlurmdDebug=info
SlurmdLogFile=/var/log/slurmd.log
#SlurmSchedLogFile=#SlurmSchedLogLevel=#DebugFlags=### POWER SAVE SUPPORT FOR IDLE NODES (optional)#SuspendProgram=#ResumeProgram=#SuspendTimeout=#ResumeTimeout=#ResumeRate=#SuspendExcNodes=#SuspendExcParts=#SuspendRate=#SuspendTime=### COMPUTE NODESNodeName=offline-almalinux8-19[5-7]-computing.icinfra.cn State=UNKNOWN
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
[cloud-user@offline-almalinux8-193-computing ~]$ sudo /tools/OSS/Slurm/23.11.0/sbin/slurmctld -D
slurmctld: error: Configured MailProg is invalid
slurmctld: slurmctld version 23.11.0 started on cluster ic-design-cluster-001
slurmctld: error: _shutdown_bu_thread:send/recv offline-almalinux8-194-computing.icinfra.cn: Connection refused
slurmctld: No memory enforcing mechanism configured.
slurmctld: error: Could not open node state file /var/spool/slurmctld/node_state: No such file or directory
slurmctld: error: NOTE: Trying backup state save file. Information may be lost!
slurmctld: No node state file (/var/spool/slurmctld/node_state.old) to recover
slurmctld: error: Could not open job state file /var/spool/slurmctld/job_state: No such file or directory
slurmctld: error: NOTE: Trying backup state save file. Jobs may be lost!
slurmctld: No job state file (/var/spool/slurmctld/job_state.old) to recover
slurmctld: select/cons_tres: select_p_node_init: select/cons_tres SelectTypeParameters not specified, using default value: CR_Core_Memory
slurmctld: select/cons_tres: part_data_create_array: select/cons_tres: preparing for 1 partitions
slurmctld: error: Could not open reservation state file /var/spool/slurmctld/resv_state: No such file or directory
slurmctld: error: NOTE: Trying backup state save file. Reservations may be lost
slurmctld: No reservation state file (/var/spool/slurmctld/resv_state.old) to recover
slurmctld: error: Could not open trigger state file /var/spool/slurmctld/trigger_state: No such file or directory
slurmctld: error: NOTE: Trying backup state save file. Triggers may be lost!
slurmctld: No trigger state file (/var/spool/slurmctld/trigger_state.old) to recover
slurmctld: Reinitializing job accounting state
slurmctld: select/cons_tres: select_p_reconfigure: select/cons_tres: reconfigure
slurmctld: select/cons_tres: part_data_create_array: select/cons_tres: preparing for 1 partitions
slurmctld: Running as primary controller
[cloud-user@offline-almalinux8-197-computing ~]$ /tools/OSS/Slurm/23.11.0/bin/squeue
JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
2 debug wrap cloud-us R 1:27:49 1 offline-almalinux8-195-computing.icinfra.cn
I offer specialized consulting for EDA workflows, LSF/Slurm scheduling, and design environment standardization.
Turn these insights into production-ready solutions.