#!/usr/bin/perl # Globals # use DBI; use vars qw/ %opt /; use strict vars; my $dbDir = "db"; my $sep = "\t"; my $tkName = "toolkit"; my $tkUser = "root"; my $tkPasswd = ""; my $accessionsTable = "Accessions"; my $namesTable = "Names"; my $linksTable = "Links"; my $accessionsFile; my $namesFile; my $linksFile; my $doTrace = 0; my $doQueryTrace = 0; my @accession_fields = (); my @names_fields = (); my @link_fields = (); sub usage() { print STDERR << "EOF"; usage: $0 [-vVDAd] -A filename : print ingets file for accessions to given file -N filename : print ingets file for names to given file -L filename : print sql import file for accession,name links to given file -a name : use for Accessions table -d dir : directory containing table files -h : print usage message -l name : use for Links table -n name : use for Names table -p paswd : toolkit database password -t name : use as toolkit database name -u user : toolkit database user name -v : verbose -V : verbose + SQL statement trace EOF exit(1); } my ($accDatabase, $toolkitDatabase) = init("A:N:L:a:n:l:t:u:p:hvVd:"); trace("dbDir\t $dbDir\n"); trace("Accessions Table\t $accessionsTable\tlodaded from\t$accessionsFile\n"); trace("Names Table\t $namesTable\tloaded from\t$namesFile\n"); trace("Links Table\t $linksTable\tloaded from\t$linksFile\n"); trace("tkName\t $tkName\n"); my @accession_fields = ( "accessionDate", "accessionNumber", "title", "extentNumber", "extentType", "acquistionType", "agreementReceived", "agreementSent", "inventory", "processingPlan", "accessRestrictions", "restrictionsApply", "rightsTransferred", "cataloged", "catalogedNote", "accessionDispositionNote", "conditionNote", "description" ); @names_fields = ( "Name_corporatePrimaryName", "Name_corporateSubordinate1", "Name_corporateSubordinate2", "Name_contactAddress1", "Name_contactCity", "Name_contactRegion", "Name_contactMailCode", "Name_contactCountry", "Name_contactPhone", "Name_contactEmail", "Name_personalPrimaryName", "Name_personalRestOfName", "Name_contactAddress1", "Name_contactCity", "Name_contactRegion", "Name_contactMailCode", "Name_contactCountry", "Name_contactPhone", "Name_contactEmail" ); my @link_fields = ( "accessionNumber", "tk_nameFunction" ); if (defined $accessionsFile) { doTheAccessions($accessionsFile, $accDatabase, \@accession_fields, $accessionsTable, $sep); } if (defined $namesFile) { doTheNames($namesFile, $accDatabase, \@names_fields, $namesTable, $sep); } if (defined $linksFile) { doTheLinks($linksFile, $accDatabase, $toolkitDatabase, \@link_fields, \@names_fields, $linksTable, $sep); } $accDatabase->disconnect(); if (defined $toolkitDatabase) { $toolkitDatabase->disconnect(); } sub doTheAccessions() { my ($file, $accDB, $accf, $accTable, $sep) = @_; my @acc_fields = @$accf; my (@data, $sth, $i); my ($nok, $nfail) = (0, 0); open(OUTFILE, "> $file") or die "Can't open $file\n"; # print out a header line for toolkit ingest file print OUTFILE join($sep, @acc_fields) . "\n"; my $sth = quDB($accDB, "SELECT " . join(",", @acc_fields) . " FROM $accTable "); while (@data = $sth->fetchrow_array()) { for ($i = 0; $i < @data; $i = $i + 1) { if ($i > 0) { print OUTFILE $sep; } print OUTFILE trim($data[$i]); } print OUTFILE "\n"; trace( "INGEST ACCESSION: " . arrToString(@data) . "\n"); $nok++; } $sth->finish(); print STDOUT "+++ Accessions:\tFile \'$file\'\t$nok OK\t$nfail FAILED\n"; } sub doTheNames() { my ($file, $accDB, $nmsF, $namesTable, $sep) = @_; my @names_fields = @$nmsF; my ($nok, $nfail) = (0, 0); open(OUTFILE, "> $file") or die "Can't open $file\n"; #open OUTFILE, "> $file" or die "Can't open $file\n";` # create fake accessions for each name my $sth = quDB($accDB, "SELECT " . join(",", @names_fields) . " FROM $namesTable "); # print out a header line for toolkit ingest file print OUTFILE join($sep, @names_fields) . $sep . 'Name_nameType' . $sep . 'nameFunction' . $sep . 'accessionNumber' . $sep . "title" . $sep . 'accessionDate' . "\n" ; my($id) = 1; while (my @namesData = $sth->fetchrow_array()) { my(%name) = makeNameHash(\@names_fields, \@namesData); $id ++; if (($name{"Name_nameType"} eq "person") || ($name{"Name_nameType"} eq "corporate")) { trace( "INGEST NAME: " . hashToString(%name)); print OUTFILE prtHash(\@names_fields, \%name) . $sep . $name{"Name_nameType"} . $sep . "Source" . $sep . "9999.000.$id" . $sep . "ingest-name $id" . $sep . "1000-01-1" . "\n"; $nok++; } else { print STDERR "Name record on line $id in $dbDir/$namesTable is kaput, has bad name type:\n" . hashToString(%name) . "\n"; $nfail++; } } $sth->finish(); print STDOUT "+++ Names:\tFile \'$file\'\t$nok OK\t$nfail FAILED\n"; } sub doTheLinks() { my($file, $accDB, $tkDB, $lnkf, $nmf, $linkTable, $sep) = @_; my ($nok, $nfail) = (0, 0); open(OUTFILE, "> $file") or die "Can't open $file\n"; my $insert_link = "INSERT INTO `ArchDescriptionNames` (" . " `archDescriptionNamesId` , `version` , `lastUpdated` , " . " `created` , `lastUpdatedBy` , " . " `createdBy` , `role` , `function` , `form` , ". " `primaryNameId` , `resourceId` , " . " `resourceComponentId` , `accessionId` , `digitalObjectId` " . " ) VALUES ( " . " NULL, '0', NOW() , NOW() , 'ingest', 'ingest', ''," . " 'tk_nameFunction', ''," . " 'tk_nameId', NULL , NULL , 'tk_accessionId', NULL );"; # read names; list and check my ($sth) = quDB($accDB, "SELECT " . join(",", @link_fields, @names_fields) . " FROM $linksTable "); my($line) = 1; while (my @data = $sth->fetchrow_array()) { $line++; my(%link) = makeLinkHash($accDB, $tkDB, $lnkf, $nmf, \@data); if (!defined $link{'tk_nameId'} || !defined $link{'tk_accessionId'} || !defined $link{'tk_nameFunction'}) { print STDERR "Link Record on line $line in $dbDir/$linksTable is kaput\n\n"; $nfail++; } else { trace( "INGEST LINK: " . hashToString(%link)); my $stmt = $insert_link; $stmt =~ s/tk_nameId/$link{'tk_nameId'}/; $stmt =~ s/tk_accessionId/$link{'tk_accessionId'}/; $stmt =~ s/tk_nameFunction/$link{'tk_nameFunction'}/; print OUTFILE "$stmt\n"; $nok++; } } print STDOUT "+++ Links:\tFile \'$file\'\t$nok OK\t$nfail FAILED\n"; $sth->finish(); } sub makeNameHash() { my ($k, $v) = @_; my (@vals) = @$v; my @keys = @$k; my(%hash) = (); for (my $i = 0; $i < @keys; $i = $i + 1) { $hash{$keys[$i]} = cleanUpNameValue($keys[$i], $vals[$i]); } if ($hash{"Name_personalPrimaryName"}) { if ($hash{"Name_corporatePrimaryName"}) { $hash{"Name_nameType"} = "person and corporate"; } else { $hash{"Name_nameType"} = "person"; } } else { if ($hash{"Name_corporatePrimaryName"}) { $hash{"Name_nameType"} = "corporate"; } else { $hash{"Name_nameType"} = "neither person nor corporate"; } } return %hash; } sub makeLinkHash() { my($accDB, $tkDB, $lnkf, $nmf, $dat) = @_; my(@vals) = @$dat; my(@link_fields) = @$lnkf; my(@names_fields) = @$nmf; my(%hash) = (); my($i); for ($i = 0; $i < @link_fields; $i = $i + 1) { $hash{$link_fields[$i]} = $vals[$i]; } # for consistency reasons: # check whether Names table in dbDir/Names contains given name my $stmt = "SELECT $names_fields[0] FROM $namesTable WHERE"; for ($i = 0; $i < @names_fields; $i = $i + 1) { if (defined $vals[$i]) { $hash{$names_fields[$i]} = $vals[$i + @link_fields]; $stmt = $stmt . " AND " . "$names_fields[$i] = \'" . escapeQuotes($hash{$names_fields[$i]}) . "\' "; } } $stmt =~ s/WHERE AND/WHERE/; my $res = quDB($accDB, $stmt); my @data = $res->fetchrow_array(); if (@data != 1) { print STDERR "Can't find Name in $dbDir/$namesTable;\n" . hashToString(%hash); return (); } # get nameId from toolkit database my $stmt = "SELECT nameId FROM Names WHERE"; for ($i = 0; $i < @names_fields; $i = $i + 1) { if (defined $vals[$i]) { my $field = $names_fields[$i]; my $val = cleanUpNameValue($field, $hash{$field}); $val = escapeQuotes($val) ; $field =~ s/Name_//; $stmt = $stmt . " AND " . "$field = \'$val\' "; } } $stmt =~ s/WHERE AND/WHERE/; my $res = quDB($tkDB, $stmt); my @data = $res->fetchrow_array(); if (@data != 1) { print STDERR "Can't find Name in $tkName database\n" . hashToString(%hash); return (); } $hash{'tk_nameId'} = $data[0]; # find accession in toolkit database $hash{'tk_accessionId'} = getTkAccId($tkDB, $hash{'accessionNumber'}); if (!defined $hash{'tk_accessionId'}) { print STDERR "Can't find Accession ($hash{'accessionNumber'}) in toolkit database\n" . hashToString(%hash); return (); } return %hash; } sub getTkAccId() { my ($dbh, $accId) = @_; my ($ac1, $ac2, $ac3, $ac4) = split('\.', $accId); my ($res) = quDB($dbh, "SELECT accessionID FROM Accessions " . "WHERE `accessionNumber1` = \'$ac1\' " . "AND `accessionNumber2` = \'$ac2\' " . "AND `accessionNumber3` = \'$ac3\' " . "AND `accessionNumber4` = \'$ac4\' ") ; my @data = $res->fetchrow_array(); return $data[0]; } sub prtHash() { my ($k, $h) = @_; my @keys = @$k; my(%names) = %$h; my($first) = 1; my($str) = ""; for my $i (0 .. @keys -1) { if ($first) { $first = 0; } else { $str = $str . $sep; } $str = $str . $names{$keys[$i]}; } return $str; } # # Command line options processing # sub init() { my($opt_string) = @_; my($rc) = 0; my($tkDB, $accDB); use Getopt::Std; getopts( "$opt_string", \%opt ) or usage(); usage() if $opt{h}; if ($opt{'v'}) { $doTrace = 1; } if ($opt{'V'}) { $doQueryTrace = 1; } if ($doQueryTrace) { $doTrace = 1; } if ($opt{'A'}) { $accessionsFile = $opt{'A'}; } if ($opt{'N'}) { $namesFile = $opt{'N'}; } if ($opt{'L'}) { $linksFile = $opt{'L'}; } if ($opt{'a'}) { $accessionsTable = $opt{'a'}; } if ($opt{'n'}) { $namesTable = $opt{'n'}; } if ($opt{'l'}) { $linksTable = $opt{'l'}; } if ($opt{'d'}) { $dbDir = $opt{'d'}; } if ($opt{'t'}) { $tkName = $opt{'t'}; } if ($opt{'u'}) { $tkUser = $opt{'u'}; } if ($opt{'p'}) { $tkPasswd = $opt{'p'}; } if ((!defined $namesFile) && (!defined $accessionsFile) && (!defined $linksFile)) { print STDERR "Must give one of -N -L -N options\n"; $rc = 1; } else { trace("connecting to database DBI:CSV:$dbDir\n"); $accDB = DBI->connect("DBI:CSV:f_dir=$dbDir") || die "could not connect to $dbDir"; if (defined $linksFile) { trace("connecting to database DBI:mysql:$tkName\n"); $tkDB = DBI->connect("DBI:mysql:$tkName", $tkUser, $tkPasswd) || die "could not connect to :mysql:$tkName"; } if ((defined $namesFile && !checkTable($accDB, $namesTable)) || (defined $accessionsFile && !checkTable($accDB, $accessionsTable)) || (defined $linksFile && !checkTable($accDB, $linksTable))) { $rc = 1; } } if ($rc != 0) { exit($rc); } return ($accDB, $tkDB); } sub checkTable { my($accDB, $table) = @_; if ($table !~ /^[a-zA-Z0-9_]+$/) { print STDERR "Table names may contain letters, digits, and '_' only: ". " $table\n"; return 0; } my $sth = quDB($accDB, "SELECT * FROM $table"); trace ("#$table: " . $sth->rows . "\n"); if ($sth->rows < 1) { print STDERR "No/Empty table $dbDir" . "/$table\n"; } return $sth->rows; } sub quDB() { my($db, $query) = @_; if ($doQueryTrace != 0) { print STDERR $query . "\n"; } my($sth) = $db->prepare($query); $sth->execute(); return $sth; } sub cleanUpNameValue() { my($field, $val) = @_; my $val = trim($val); if ($field =~ /Name_corporate/) { $val =~ s/\.*$//; } return $val; } sub hashToString() { my(%record) = @_; my($str) = ""; while ( my ($key, $value) = each(%record) ) { if (defined $value && $value ne "") { $str .= "\t($key) => ($value)\n"; } } return $str; } sub arrToString() { my(@fields) = @_; my($str) = ""; for (my $i = 0; $i < @fields; $i++) { $str .= "(" . trim($fields[$i]) . ") "; } return $str; } sub trim() { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; $string =~ s/\t+/ /; #$string =~ s/\n*$//; return $string; } sub escapeQuotes() { my ($str) = @_; $str =~ s/'/''/g; return $str; } sub trace() { my($txt) = @_; if ($doTrace != 0) { print STDERR $txt; } }