1 |
#!/bin/tcsh -f |
2 |
|
3 |
setenv PATH $JSOCROOT/bin/$JSOC_MACHINE':'$JSOCROOT/proj/globalhs/scripts':'$PATH |
4 |
|
5 |
# submit njobs jobs at once |
6 |
if (! $?GLOBALHS_V2TNJOBS) then |
7 |
set njobs=20 |
8 |
else |
9 |
set njobs=$GLOBALHS_V2TNJOBS |
10 |
endif |
11 |
|
12 |
# submit next batch of jobs when total number of sht jobs drops below jobthreshold |
13 |
if (! $?GLOBALHS_V2TJOBTHRESHOLD) then |
14 |
set jobthreshold=40 |
15 |
else |
16 |
set jobthreshold=$GLOBALHS_V2TJOBTHRESHOLD |
17 |
endif |
18 |
|
19 |
# if this script inherited a label in its environment, use it as the suffix for job names |
20 |
if (! $?GLOBALHS_LABEL) then |
21 |
set suff='' |
22 |
else |
23 |
set suff=.$GLOBALHS_LABEL |
24 |
endif |
25 |
|
26 |
set qsubtmp=/tmp29/$USER/qsubtmp |
27 |
mkdir -p $qsubtmp |
28 |
if (! $?GLOBALHS_V2TQUEUE) then |
29 |
set q=k.q |
30 |
else |
31 |
set q=$GLOBALHS_V2TQUEUE |
32 |
endif |
33 |
|
34 |
if ($q == k.q) then |
35 |
alias qsub qsub2 |
36 |
alias qstat qstat2 |
37 |
alias waittosubmit waittosubmit2 |
38 |
endif |
39 |
|
40 |
@ i = 1 |
41 |
while ($i <= $#argv ) |
42 |
set $argv[$i] |
43 |
@ i++ |
44 |
end |
45 |
|
46 |
if (! $?tag) then |
47 |
echo must specify parameter tag |
48 |
exit 1 |
49 |
endif |
50 |
|
51 |
if (! $?deltal) set deltal = 6 |
52 |
if (! $?segment) set segment = vradsum |
53 |
set tag2 = $tag'_'$segment |
54 |
|
55 |
echo `date` on $HOST |
56 |
echo $PWD |
57 |
echo $0 $argv |
58 |
|
59 |
if (! -es v2t.parms.blank) then |
60 |
echo parameter file blank missing: v2t.parms.blank is required |
61 |
exit 1 |
62 |
endif |
63 |
|
64 |
grep -v "^[[:space:]]*#" v2t.parms.blank | sed s@"[[:space:]]"@"\n"@g | grep -v '^[[:space:]]*$' > parms.tmp |
65 |
|
66 |
set in = `cat parms.tmp | grep in= | sed s/"'"/""/g | awk -F '[=[]' '{print $2}'` |
67 |
set trecstep = `show_info -j $in | grep -i '^keyword:t_rec_step' | cut -d, -f5` |
68 |
set ttotal = `cat parms.tmp | grep TTOTAL= | cut -d= -f2` |
69 |
set totalsecs = `durcon $ttotal` |
70 |
set ndt = `echo "$totalsecs/$trecstep" | bc` |
71 |
|
72 |
if (! -es ../ylm.parms.blank) then |
73 |
echo parameter file blank missing: ylm.parms.blank is required |
74 |
exit 1 |
75 |
endif |
76 |
|
77 |
set tstart = `cat ../ylm.parms.blank | grep -v "^[[:space:]]*#" | grep TSTART= | cut -d= -f2` |
78 |
if ($#tstart == 0) then |
79 |
@ line = `mkylms -H | grep -n "TSTART (time)" | cut -d: -f1` + 1 |
80 |
set tstart = `mkylms -H | sed -n $line'p' | awk '{print $3}'` |
81 |
endif |
82 |
set startsecs = `time_convert o=jsoc time=$tstart` |
83 |
|
84 |
set file = `cat ../ylm.parms.blank | grep -v "^[[:space:]]*#" | grep MODELIST= | cut -d= -f2` |
85 |
if (! -es ../$file) then |
86 |
echo MODELIST file $file is missing |
87 |
exit 1 |
88 |
endif |
89 |
|
90 |
set nim=`cat ../$file | wc -l` |
91 |
@ nchunks = ( ( $nim - 1 ) / $ndt ) + 1 |
92 |
#not necessary if while is replaced by foreach below |
93 |
#if ($njobs > $nchunks) set njobs=$nchunks |
94 |
|
95 |
set i=0 |
96 |
while ($i < $nchunks) |
97 |
set isecs = `echo "$startsecs + ($i * $ndt * $trecstep)" | bc` |
98 |
set istart = `time_convert o=cal zone=tai s=$isecs` |
99 |
@ index = ($i * $ndt) + 1 |
100 |
@ index2 = $index + $ndt - 1 |
101 |
if ($index2 > $nim) set index2 = $nim |
102 |
set lmin = `sed -n $index'p' ../$file | awk '{print $1}'` |
103 |
@ lmin = $lmin - $deltal |
104 |
if ($lmin < 0) set lmin=0 |
105 |
set lmax = `sed -n $index2'p' ../$file | awk '{print $1}'` |
106 |
@ lmax = $lmax + $deltal |
107 |
cat parms.tmp | sed s@RRRR@$istart/$ndt'm'@ | sed s/MMMM/$lmax/ | sed s/NNNN/$lmin/ | sed s/SSSS/$istart/ | sed s/GGGG/$segment/g | sed s/TTTT/$tag2/g > v2t.parms.$i |
108 |
@ r = $i % $njobs |
109 |
set subfile = subt.$r$suff'_'$tag2 |
110 |
if ($i < $njobs) then |
111 |
echo '#\!/bin/csh' > $subfile |
112 |
echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile |
113 |
echo 'setenv JSOC_DBUSER' $JSOC_DBUSER >> $subfile |
114 |
echo 'cd' $PWD >> $subfile |
115 |
endif |
116 |
echo \(time jv2ts @v2t.parms.$i\) '>&' v2t.log.$i >> $subfile |
117 |
echo echo \$status '>&' jv2ts.exitstatus.$i >> $subfile |
118 |
@ i++ |
119 |
end |
120 |
|
121 |
waittosubmit $jobthreshold subt |
122 |
set sublist=`/bin/ls subt.*_$tag2` |
123 |
foreach sub ($sublist) |
124 |
qsub -q $q -e $qsubtmp -o $qsubtmp $sub |
125 |
end |
126 |
|
127 |
#set i=0 |
128 |
#while ($i < $njobs) |
129 |
# qsub -q $q -e $qsubtmp -o $qsubtmp subt.$i$suff |
130 |
#@ i++ |
131 |
#end |
132 |
|
133 |
echo jobs submitted, start waiting |
134 |
|
135 |
set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l` |
136 |
while($njobsrunning > 0) |
137 |
sleep 60 |
138 |
set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l` |
139 |
end |
140 |
|
141 |
set expectedlist = `/bin/ls v2t.parms.[0-9]* | cut -d'.' -f 3` |
142 |
set ranlist = `/bin/ls v2t.log.* | cut -d'.' -f 3` |
143 |
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u` |
144 |
|
145 |
set errlist1 = `grep -Hv 0 jv2ts.exitstatus.* | awk -F '[.:]' '{print $3}'` |
146 |
set errlist2 = `grep -Hc "successful completion" v2t.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'` |
147 |
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq` |
148 |
|
149 |
set list = `echo $rerunlist $errlist | sed s/" "/"\n"/g | sort -n` |
150 |
if ($#list) then |
151 |
echo some jobs fail, rerunning |
152 |
echo rerunlist: $rerunlist > faillog.$segment |
153 |
echo errlist1: $errlist1 >> faillog.$segment |
154 |
echo errlist2: $errlist2 >> faillog.$segment |
155 |
mkdir -p savesubt |
156 |
mv subt.*_$tag2 savesubt |
157 |
|
158 |
#now have to start with 1 because that is the first index of list |
159 |
set i=1 |
160 |
while ($i <= $#list) |
161 |
@ r = $i % $njobs |
162 |
set subfile = subt.$r$suff'_'$tag2 |
163 |
if ($i <= $njobs) then |
164 |
echo '#\!/bin/csh' > $subfile |
165 |
echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile |
166 |
echo 'setenv JSOC_DBUSER' $JSOC_DBUSER >> $subfile |
167 |
echo 'cd' $PWD >> $subfile |
168 |
endif |
169 |
echo \(time jv2ts @v2t.parms.$list[$i]\) '>&' v2t.log.$list[$i] >> $subfile |
170 |
echo echo \$status '>&' jv2ts.exitstatus.$list[$i] >> $subfile |
171 |
@ i++ |
172 |
end |
173 |
|
174 |
waittosubmit $jobthreshold subt |
175 |
set sublist=`/bin/ls subt.*_$tag2` |
176 |
foreach sub ($sublist) |
177 |
qsub -q $q -e $qsubtmp -o $qsubtmp $sub |
178 |
end |
179 |
|
180 |
set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l` |
181 |
while($njobsrunning > 0) |
182 |
sleep 60 |
183 |
set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l` |
184 |
end |
185 |
|
186 |
set expectedlist = `/bin/ls v2t.parms.* | cut -d'.' -f 3` |
187 |
set ranlist = `/bin/ls v2t.log.* | cut -d'.' -f 3` |
188 |
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u` |
189 |
|
190 |
set errlist1 = `grep -Hv 0 jv2ts.exitstatus.* | awk -F '[.:]' '{print $3}'` |
191 |
set errlist2 = `grep -Hc "successful completion" v2t.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'` |
192 |
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq` |
193 |
|
194 |
if ($#rerunlist || $#errlist) then |
195 |
echo some jobs still fail, i give up |
196 |
exit 1 |
197 |
endif |
198 |
endif |
199 |
|
200 |
rm parms.tmp |
201 |
|
202 |
echo successful completion |
203 |
exit 0 |