1 |
#!/home/jsoc/bin/linux_x86_64/activeperl -w |
2 |
|
3 |
# udsumsforarch.pl jsoc_sums hmidb 5434 production many /home/arta/Projects/SUMS/UpdateSUMSArch/testsm.txt |
4 |
|
5 |
use DBI; |
6 |
use DBD::Pg; |
7 |
# ART - This module may not be installed. DO NOT USE CPAN to install modules. Use the ActiveState ppm. Unfortunately, |
8 |
# ActiveState does not provide this module. |
9 |
use Switch; |
10 |
use POSIX qw(strftime); |
11 |
use File::Copy "mv"; |
12 |
|
13 |
use constant kDEBUG => 0; |
14 |
|
15 |
# SUs that are archive pending must be set to DADP |
16 |
use constant kStatDADP => "2"; |
17 |
|
18 |
use constant kTQueryOnePerConn => "conn"; |
19 |
use constant kTQueryOnePerTrans => "xact"; |
20 |
use constant kTQueryManyPerTrans => "many"; |
21 |
|
22 |
use constant kUdListTableTapeFileInfo => "tmp_ulist_tapefiles"; |
23 |
use constant kInTypeList => 0; |
24 |
use constant kInTypeMd5 => 1; |
25 |
use constant kDelim => '|'; |
26 |
|
27 |
use constant kGTarBlock => "256"; |
28 |
|
29 |
use constant kDbTableSumMain => "sum_main"; |
30 |
use constant kDbTableSumPartnAlloc => "sum_partn_alloc"; |
31 |
use constant kDbTableSumFile => "sum_file"; |
32 |
|
33 |
use constant kDbTableSumMainDEBUG => "arta_main"; |
34 |
use constant kDbTableSumPartnAllocDEBUG => "arta_partn_alloc"; |
35 |
use constant kDbTableSumFileDEBUG => "arta_file"; |
36 |
|
37 |
my($err); |
38 |
|
39 |
my($dbname); # name of the db instance to connect to |
40 |
my($dbhost); # name of the db host on which the db instance resides |
41 |
my($dbport); # port on $dbhost through which connections are made |
42 |
my($dbuser); # database user name (to connect with) |
43 |
my($typequery); # one SU per connection, one SU per transaction, or one big query that does all SUs at the same time. |
44 |
my($dfilelist); # list of file paths - each file contains data to update sum_main with. |
45 |
|
46 |
my($dsn); # database connection arguments |
47 |
my($dbh); # database handle |
48 |
my($stmnt); |
49 |
my($row); |
50 |
my($rrows); |
51 |
my($line); |
52 |
my($rv); |
53 |
|
54 |
if ($#ARGV < 5) |
55 |
{ |
56 |
print "Improper argument list.\n"; |
57 |
exit(1); |
58 |
} |
59 |
|
60 |
$dbname = $ARGV[0]; |
61 |
$dbhost = $ARGV[1]; |
62 |
$dbport = $ARGV[2]; |
63 |
$dbuser = $ARGV[3]; |
64 |
$typequery = $ARGV[4]; |
65 |
$dfilelist = $ARGV[5]; |
66 |
|
67 |
$err = 0; |
68 |
|
69 |
if ($typequery eq kTQueryOnePerConn) |
70 |
{ |
71 |
print STDERR "One query per database connection unimplemented.\n"; |
72 |
$err = 1; |
73 |
} |
74 |
elsif ($typequery eq kTQueryOnePerTrans) |
75 |
{ |
76 |
print STDERR "One query per database transaction unimplemented.\n"; |
77 |
$err = 1; |
78 |
} |
79 |
elsif ($typequery eq kTQueryManyPerTrans) |
80 |
{ |
81 |
# batch sunums together |
82 |
my($fpath); |
83 |
my($ftype); |
84 |
my($timenow); |
85 |
my($tapeid); |
86 |
my($ttabrow); |
87 |
my(@md5csums); |
88 |
my($values); |
89 |
my($skip); |
90 |
my($maintable); |
91 |
my($partntable); |
92 |
my($filetable); |
93 |
|
94 |
if (dbconnect($dbname, $dbhost, $dbport, \$dbh)) |
95 |
{ |
96 |
# Open input file for reading - has 2 columns, group and filepath (path to a file provided by Keh-Cheng) |
97 |
if (open(FILELIST, "<$dfilelist")) |
98 |
{ |
99 |
# create temporary table to hold LIST files (series, sunum, sudir, fileid, tapeid) |
100 |
$stmnt = "CREATE TEMPORARY TABLE " . kUdListTableTapeFileInfo . "(sunum bigint, fileid integer, tapeid character varying(20))"; |
101 |
ExecStatement(\$dbh, $stmnt, 1, "Unable to create temporary table " . "'kUdListTableTapeFileInfo'" . ".\n"); |
102 |
|
103 |
$stmnt = "CREATE INDEX " . kUdListTableTapeFileInfo . "_idx on " . kUdListTableTapeFileInfo . "(sunum)"; |
104 |
ExecStatement(\$dbh, $stmnt, 1, "Unable to create index on temporary table " . "'kUdListTableTapeFileInfo'" . ".\n"); |
105 |
|
106 |
CommitTrans(\$dbh); |
107 |
|
108 |
if (kDEBUG) |
109 |
{ |
110 |
$maintable = kDbTableSumMainDEBUG; |
111 |
$partntable = kDbTableSumPartnAllocDEBUG; |
112 |
$filetable = kDbTableSumFileDEBUG; |
113 |
} |
114 |
else |
115 |
{ |
116 |
$maintable = kDbTableSumMain; |
117 |
$partntable = kDbTableSumPartnAlloc; |
118 |
$filetable = kDbTableSumFile; |
119 |
} |
120 |
|
121 |
# loop through filelist |
122 |
while (defined($line = <FILELIST>)) |
123 |
{ |
124 |
chomp($line); |
125 |
|
126 |
if ($line =~ /^\#/) |
127 |
{ |
128 |
next; |
129 |
} |
130 |
|
131 |
if ($line =~ /\s*(\S+)/) |
132 |
{ |
133 |
$fpath = $1; |
134 |
|
135 |
if ($fpath =~ /LIST\.(\w+)/) |
136 |
{ |
137 |
$tapeid = $1; |
138 |
$ftype = kInTypeList; |
139 |
} |
140 |
elsif ($fpath =~ /MD5SUM\.(\w+)/) |
141 |
{ |
142 |
$tapeid = $1; |
143 |
$ftype = kInTypeMd5; |
144 |
} |
145 |
else |
146 |
{ |
147 |
print STDERR "unsupported input file '$fpath'.\n"; |
148 |
next; |
149 |
} |
150 |
} |
151 |
else |
152 |
{ |
153 |
print STDERR "skipping invalid input file $line.\n"; |
154 |
next; |
155 |
} |
156 |
|
157 |
# There are two types of files that Keh-Cheng is producing: |
158 |
# 1. LIST files - cols: series, sunum, sudir, fileid, tapeid |
159 |
# 2. MD5SUM files - cols: fileid, md5sum |
160 |
$skip = 0; |
161 |
if (defined($fpath) && open(DATAFILE, "<$fpath")) |
162 |
{ |
163 |
if ($ftype == kInTypeList) |
164 |
{ |
165 |
$stmnt = "COPY " . kUdListTableTapeFileInfo . " (sunum, fileid, tapeid) FROM stdin WITH DELIMITER '" . kDelim . "'"; |
166 |
ExecStatement(\$dbh, $stmnt, 1, "Troubles copying data to temporary table '" . kUdListTableTapeFileInfo . "'.\n"); |
167 |
} |
168 |
|
169 |
while (defined($line = <DATAFILE>)) |
170 |
{ |
171 |
chomp($line); |
172 |
|
173 |
# The data file might have extra fields not needed for the update - strip those out |
174 |
if ($ftype == kInTypeList) |
175 |
{ |
176 |
if ($line =~ /\S+\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)/) |
177 |
{ |
178 |
$ttabrow = "$1|$2|$3"; |
179 |
} |
180 |
|
181 |
print "Putting row ($ttabrow).\n"; |
182 |
$rv = $dbh->pg_putcopydata("$ttabrow\n"); |
183 |
if (!$rv) |
184 |
{ |
185 |
print STDERR "Failure sending line '$line' to db.\n"; |
186 |
$skip = 1; |
187 |
last; |
188 |
} |
189 |
} |
190 |
else |
191 |
{ |
192 |
# Don't save to a temporary table - this file will be relatively small |
193 |
if ($line =~ /(\S+)\s+(\S+)/) |
194 |
{ |
195 |
# (tapeid, filenum, gtarblock, md5cksum) |
196 |
$values = "('$tapeid', $1, " . kGTarBlock . ", '$2')"; |
197 |
push(@md5csums, $values); |
198 |
} |
199 |
} |
200 |
} |
201 |
|
202 |
if ($ftype == kInTypeList) |
203 |
{ |
204 |
$rv = $dbh->pg_putcopyend(); |
205 |
if (!$rv) |
206 |
{ |
207 |
print STDERR "Failure ending table copy.\n"; |
208 |
$skip = 1; |
209 |
} |
210 |
else |
211 |
{ |
212 |
if (kDEBUG && 0) |
213 |
{ |
214 |
# test to see if this worked. |
215 |
$stmnt = "SELECT * from " . kUdListTableTapeFileInfo; |
216 |
$rrows = $dbh->selectall_arrayref($stmnt, undef); |
217 |
$err = !(NoErr($rrows, \$dbh, $stmnt)); |
218 |
|
219 |
foreach $row (@$rrows) |
220 |
{ |
221 |
print "tmp table row: $row->[0], $row->[1], $row->[2]\n"; |
222 |
} |
223 |
} |
224 |
} |
225 |
} |
226 |
|
227 |
close(DATAFILE); |
228 |
} |
229 |
else |
230 |
{ |
231 |
print "Unable to open '$fpath' for reading.\n"; |
232 |
next; |
233 |
} |
234 |
|
235 |
if (!$skip) |
236 |
{ |
237 |
CommitTrans(\$dbh); # successfully populated temporary table ($ftype == kInTypeList). |
238 |
# when $ftype == kInTypeMd5, this will result in and 'end' being |
239 |
# issued right after a 'begin', but this is harmless. |
240 |
|
241 |
if ($ftype == kInTypeList) |
242 |
{ |
243 |
# Create date string |
244 |
$timenow = strftime("%a %b %e %H:%M:%S %Y", localtime()); |
245 |
|
246 |
# Do a join that will update sum_main with data from temp table |
247 |
# SQL to update sum_main: |
248 |
# archive_status -> character varying(5) |
249 |
# arch_tape -> character varying(20) |
250 |
# arch_tape_fn -> integer |
251 |
# arch_tape_date -> timestamp(0) without time zone |
252 |
$stmnt = "UPDATE $maintable m SET archive_status = 'Y', arch_tape = ul.tapeid, arch_tape_fn = ul.fileid, arch_tape_date = '$timenow' FROM " . kUdListTableTapeFileInfo . " ul WHERE (m.ds_index = ul.sunum)"; |
253 |
ExecStatement(\$dbh, $stmnt, 1, "Troubles updating $maintable.\n"); |
254 |
|
255 |
# SQL to update sum_partn_alloc: |
256 |
# status -> integer |
257 |
# ds_index -> bigint |
258 |
$stmnt = "UPDATE $partntable m SET status = " . kStatDADP . " FROM " . kUdListTableTapeFileInfo . " ul WHERE (m.ds_index = ul.sunum)"; |
259 |
ExecStatement(\$dbh, $stmnt, 1, "Troubles updating $partntable.\n"); |
260 |
|
261 |
# Drop rows from temp table. |
262 |
$stmnt = "DELETE from " . kUdListTableTapeFileInfo; |
263 |
ExecStatement(\$dbh, $stmnt, 1, "Couldn't drop rows from temporary table '" . kUdListTableTapeFileInfo . "'.\n"); |
264 |
} |
265 |
else |
266 |
{ |
267 |
# Insert directly into sum_file (we could also use COPY to copy into a temp table, |
268 |
# then use a query and constant values for tapeid and gtarblock with the INSERT INTO statement): |
269 |
# tapeid -> character varying(20) |
270 |
# filenum -> integer |
271 |
# gtarblock -> integer |
272 |
# md5cksum -> character varying(36) |
273 |
foreach $values (@md5csums) |
274 |
{ |
275 |
$stmnt = "INSERT INTO $filetable (tapeid, filenum, gtarblock, md5cksum) VALUES $values"; |
276 |
ExecStatement(\$dbh, $stmnt, 1, "Troubles updating $filetable.\n"); |
277 |
} |
278 |
|
279 |
@md5csums = (); |
280 |
} |
281 |
|
282 |
# Done updating dbase for $fpath - move to ${fpath}.ingested |
283 |
CommitTrans(\$dbh); |
284 |
|
285 |
if (mv($fpath, "$fpath\.ingested") == 0) |
286 |
{ |
287 |
print STDERR "Failed to rename $fpath to $fpath\.ingested.\n"; |
288 |
} |
289 |
else |
290 |
{ |
291 |
print "Renamed $fpath to $fpath\.ingested\n"; |
292 |
} |
293 |
} |
294 |
else |
295 |
{ |
296 |
RollbackTrans(\$dbh); # problem populating temporary table ($ftype == kInTypeList) |
297 |
} |
298 |
} # loop over data files |
299 |
|
300 |
close(FILELIST); |
301 |
} |
302 |
|
303 |
$dbh->disconnect(); |
304 |
} |
305 |
} |
306 |
|
307 |
# AL FINAL |
308 |
exit($err); |
309 |
|
310 |
# SUBROUTINES |
311 |
sub dbconnect |
312 |
{ |
313 |
my($dbname) = $_[0]; |
314 |
my($dbhost) = $_[1]; |
315 |
my($dbport) = $_[2]; |
316 |
my($dbh) = $_[3]; # returned by reference |
317 |
|
318 |
my($dsn); |
319 |
|
320 |
# connect to the database |
321 |
$dsn = "dbi:Pg:dbname=$dbname;host=$dbhost;port=$dbport"; |
322 |
print "Connection to database with '$dsn' as user '$dbuser' ... "; |
323 |
|
324 |
# Despite ALL documentation saying otherwise, it looks like the error codes/string |
325 |
# provided by DBI are all UNDEFINED, unless there is some kind of failure. So, |
326 |
# never try to look at $dbh->err or $dbh->errstr if the call succeeded. |
327 |
|
328 |
$$dbh = DBI->connect($dsn, $dbuser, '', { AutoCommit => 0 }); # will need to put pass in .pg_pass |
329 |
|
330 |
if (defined($dbh)) |
331 |
{ |
332 |
print "success!\n"; |
333 |
} |
334 |
else |
335 |
{ |
336 |
print "failure!!!!\n"; |
337 |
$err = 1; |
338 |
} |
339 |
} |
340 |
|
341 |
sub ExecStatement |
342 |
{ |
343 |
my($dbh, $stmnt, $doit, $msg) = @_; |
344 |
my($res); |
345 |
|
346 |
print "executing db statement ==> $stmnt\n"; |
347 |
|
348 |
if ($doit) |
349 |
{ |
350 |
$res = $$dbh->do($stmnt); |
351 |
NoErr($res, $dbh, $stmnt) || die $msg; |
352 |
} |
353 |
} |
354 |
|
355 |
sub BeginTrans |
356 |
{ |
357 |
my($dbh) = $_[0]; |
358 |
|
359 |
print "starting transaction\n"; |
360 |
ExecStatement($dbh, "BEGIN", 1, "Couldn't begin transaction.\n"); |
361 |
} |
362 |
|
363 |
sub CommitTrans |
364 |
{ |
365 |
my($dbh) = $_[0]; |
366 |
|
367 |
print "committing transaction\n"; |
368 |
ExecStatement($dbh, "COMMIT", 1, "Couldn't begin transaction.\n"); |
369 |
} |
370 |
|
371 |
sub RollbackTrans |
372 |
{ |
373 |
my($dbh) = $_[0]; |
374 |
|
375 |
print "rolling-back transaction\n"; |
376 |
ExecStatement($dbh, "ROLLBACK", 1, "Couldn't begin transaction.\n"); |
377 |
} |
378 |
|
379 |
sub NoErr |
380 |
{ |
381 |
my($rv) = $_[0]; |
382 |
my($dbh) = $_[1]; |
383 |
my($stmnt) = $_[2]; |
384 |
my($ok) = 1; |
385 |
|
386 |
if (!defined($rv) || !$rv) |
387 |
{ |
388 |
if (defined($$dbh) && defined($$dbh->err)) |
389 |
{ |
390 |
print STDERR "Error " . $$dbh->errstr . ": Statement '$stmnt' failed.\n"; |
391 |
} |
392 |
|
393 |
$ok = 0; |
394 |
} |
395 |
|
396 |
return $ok; |
397 |
} |