5 use lib '/openils/lib/perl5/';
8 use OpenILS::Utils::Fieldmapper;
9 use Digest::MD5 qw/md5_hex/;
10 use OpenSRF::Utils::JSON;
11 use OpenILS::Application::AppUtils;
12 use OpenSRF::AppSession;
15 use Unicode::Normalize;
19 use Time::HiRes qw/time/;
22 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
27 #MARC::Charset->ignore_errors(1);
29 my ($config, $idlfile, $marctype, $enc) =
30 ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8');
32 my (@files, @trash_fields, $quiet, $startid, $verbose, $directload, $overwritetcn);
34 my ($baseorg,$overdrive_prefix,$tcn_prefix) = ('SITKA','http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\=',"Overdrive_");
36 my @req_fields = ('856');
40 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML
41 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset
42 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml
43 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV)
44 'required_fields=s' => \@req_fields, # skip any records missing these fields
45 'trash=s' => \@trash_fields, # fields to remove from all processed records
46 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml
47 'startid=i' => \$startid, #starting ID
48 'direct_load' => \$directload, #starting ID
49 'overwritetcn' => \$overwritetcn, #starting ID
50 'quiet' => \$quiet, # do not output progress count
51 'verbose' => \$verbose # do not output progress count
54 my $U = "OpenILS::Application::AppUtils";
55 OpenSRF::System->bootstrap_client( config_file => $config );
56 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
57 my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
59 @trash_fields = split(/,/,join(',',@trash_fields));
60 @req_fields = split(/,/,join(',',@req_fields));
63 MARC::Charset->ignore_errors(1);
64 MARC::Charset->assume_encoding($enc);
67 if (uc($marctype) eq 'XML') {
73 @files = @ARGV if (!@files);
75 Fieldmapper->import(IDL => $idlfile);
77 select STDERR; $| = 1;
78 select STDOUT; $| = 1;
80 my $batch = new MARC::Batch ( $marctype, @files );
82 $batch->warnings_off();
88 my $id = $startid || 1;
90 PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
95 # Skip records that don't contain a required field (like '245', for example)
96 foreach my $req_field (@req_fields) {
97 if (!$rec->field("$req_field")) {
98 warn "\n!!! Record $count missing required field $req_field, skipping record.\n";
104 # Overdrive - specific code
108 my $tcn_source = 'Overdrive';
110 my $caption = 'DOWNLOADABLE AUDIOBOOK';
112 # this is the base 856 field we're going to generate separate fields for each org unit we're scoping at
116 URIFIELD: foreach my $uri ($rec->field('856')){
118 # Overdrive uses a $3 for Excerpts, we want to keep this intact so carry on then
119 next URIFIELD if ($uri->subfield('3'));
121 # we need a $u for a URL, if we don't have this it is bad
122 my $url = $uri->subfield('u');
124 warn "856 has no URL in rec $id. Skipping";
127 if($uri->subfield('z')){
128 $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/);
129 $uri->delete_subfield(code => 'z');
130 $uri->add_subfields('y' => 'Click to access online (library card required)');
132 next unless($url =~ m/($overdrive_prefix)/);
134 # trim out Overdrive's magical GUID-looking ID thingy
135 my $overdrivekey = $url;
136 $overdrivekey =~ s/($overdrive_prefix)//g;
139 $tcn_value = $tcn_prefix . $overdrivekey;
142 $uri->add_subfields('9' => $baseorg);
146 # add some arbitrary stuff as prescribed by our cataloguer overlords
147 $rec = adjust_leader($rec);
148 $rec = process_custom_fields($rec,$caption);
151 # END Overdrive - specific code
155 $rec->delete_field($_) for ($rec->field(@trash_fields));
157 my $field901 = MARC::Field->new(
165 $rec->insert_fields_ordered($field901);
167 #print $rec->as_formatted();
171 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
172 $xml =~ s/^<\?xml.+\?\s*>//go;
173 $xml =~ s/>\s+</></go;
174 $xml =~ s/\p{Cc}//go;
175 $xml = OpenILS::Application::AppUtils->entityize($xml);
176 $xml =~ s/[\x00-\x1f]//go;
178 my $bib = new Fieldmapper::biblio::record_entry;
184 $bib->create_date('now');
186 $bib->edit_date('now');
187 $bib->tcn_source($tcn_source);
188 $bib->tcn_value($tcn_value);
189 $bib->last_xact_id('IMPORT-'.$starttime);
191 if(my $dupetcn = $U->simplereq("open-ils.search","open-ils.search.biblio.tcn",$tcn_value)){
192 if($dupetcn->{count} > 0){
193 my $newid = $dupetcn->{ids}[0];
196 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
197 my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1);
198 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
199 print STDERR "overwriting TCN: $tcn_value (id:$id/newid:$newid)\n" if $verbose;
201 print STDERR "duplicate TCN: $tcn_value - (id: $id) not imported \n" if $verbose;
208 my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.retrieve",$id)->gather(1);
210 if($req->tcn_value ne $tcn_value){
211 print STDERR "incoming record (id:$id/tcn:$tcn_value) does not match record (id:".$req->id."/tcn:".$req->tcn_value.")!\n" if $verbose;
215 } elsif (($req->tcn_value eq $tcn_value) && $overwritetcn){
216 print STDERR "matching record overlaied with incoming record (id:$id/tcn:$tcn_value)\n" if $verbose;
218 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
219 $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1);
220 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
223 print STDERR "importing new record (id:$id/tcn:$tcn_value)\n" if $verbose;
224 $bib->id(); # clear the id so it's autogen'd
225 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
226 $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.create",$bib)->gather(1);
227 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
231 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
234 if (!$quiet){# && !($count % 50)) {
235 print STDERR "\r$count\t". $count / (time - $starttime);
239 $cstore->disconnect();
243 my $leader = $rec->leader();
244 #$leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader));
245 $leader = substr($leader,0,9) . 'a' . substr($leader,10,length($leader));
246 $rec->leader($leader);
250 sub process_custom_fields{
255 push @newfields, MARC::Field->new(
257 a => "Requires OverDrive Media Console"
259 push @newfields, MARC::Field->new(
264 push @newfields, MARC::Field->new(
269 $rec->insert_fields_ordered(@newfields);