5 use lib '/openils/lib/perl5/';
8 use OpenILS::Utils::Fieldmapper;
9 use Digest::MD5 qw/md5_hex/;
10 use OpenSRF::Utils::JSON;
11 use OpenILS::Application::AppUtils;
13 use Unicode::Normalize;
17 use Time::HiRes qw/time/;
20 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
24 #MARC::Charset->ignore_errors(1);
26 my ($config, $idlfile, $marctype, $enc) =
27 ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8');
29 my (@files, @trash_fields, @req_fields, $quiet, $startid);
31 my @targetorg = ('BPR','BFN','BTE');
33 @req_fields = ('856');
34 my $overdrive_prefix = 'http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\=';
35 my $tcn_prefix = "LtG_";
38 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML
39 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset
40 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml
41 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV)
42 'required_fields=s' => \@req_fields, # skip any records missing these fields
43 'trash=s' => \@trash_fields, # fields to remove from all processed records
44 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml
45 'startid=i' => \$startid, #starting ID
46 'quiet' => \$quiet # do not output progress count
49 @trash_fields = split(/,/,join(',',@trash_fields));
50 @req_fields = split(/,/,join(',',@req_fields));
53 MARC::Charset->ignore_errors(1);
54 MARC::Charset->assume_encoding($enc);
57 if (uc($marctype) eq 'XML') {
63 @files = @ARGV if (!@files);
65 Fieldmapper->import(IDL => $idlfile);
67 select STDERR; $| = 1;
68 select STDOUT; $| = 1;
70 my $batch = new MARC::Batch ( $marctype, @files );
72 $batch->warnings_off();
78 my $id = 1 || $startid;
80 PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
82 print STDERR "======\n";
86 # Skip records that don't contain a required field (like '245', for example)
87 foreach my $req_field (@req_fields) {
88 if (!$rec->field("$req_field")) {
89 warn "\n!!! Record $count missing required field $req_field, skipping record.\n";
95 # Overdrive - specific code
99 my $tcn_source = 'Library To Go';
101 my $caption = 'DOWNLOADABLE AUDIOBOOK';
103 # this is the base 856 field we're going to generate separate fields for each org unit we're scoping at
107 URIFIELD: foreach my $uri ($rec->field('856')){
109 # Overdrive uses a $3 for Excerpts, we want to keep this intact so carry on then
110 next URIFIELD if ($uri->subfield('3'));
112 # we need a $u for a URL, if we don't have this it is bad
113 my $url = $uri->subfield('u');
115 warn "856 has no URL in rec $id. Skipping";
118 # this record has been through Evergreen if a $9 exists somewhere
119 # instead, we can presumably pull the tcn from the 901 and generate our base field that way
120 if($uri->subfield('9')){
121 # if this doesn't match our prefix, ignore it, we only care about our current prefix
122 next unless($url =~ m/($overdrive_prefix)/);
125 # delete $9 subfield and use this as a base uri field
126 $baseurifield = $uri->clone;
127 $rec->delete_fields($uri);
132 if($uri->subfield('z')){
133 $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/);
134 $uri->delete_subfield(code => 'z');
135 $uri->add_subfields('z' => 'Click to access online (library card required)');
137 next unless($url =~ m/($overdrive_prefix)/);
139 # trim out Overdrive's magical GUID-looking ID thingy
140 my $overdrivekey = $url;
141 $overdrivekey =~ s/($overdrive_prefix)//g;
144 $tcn_value = $tcn_prefix . $overdrivekey;
146 # we have a base for our scoped fields
147 $baseurifield = $uri->clone;
149 $rec->delete_fields($uri);
156 # add some arbitrary stuff as prescribed by our cataloguer overlords
157 $rec = adjust_leader($rec);
158 $rec = process_custom_fields($rec);
161 # add a scoped field for each org unit in our array
164 my $newfield = $baseurifield->clone();
165 $newfield->add_subfields('9' => $_);
166 $rec->insert_fields_ordered($newfield);
168 die;# $rec->as_formatted();
173 # END Overdrive - specific code
177 $rec->delete_field($_) for ($rec->field(@trash_fields));
179 my $field901 = MARC::Field->new(
187 $rec->insert_fields_ordered($field901);
189 print $rec->as_formatted();
194 $tcn_value = $rec->subfield('901' => 'a');
195 $tcn_source = $rec->subfield('901' => 'b');
196 $id = $rec->subfield('901' => 'c');
198 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
199 $xml =~ s/^<\?xml.+\?\s*>//go;
200 $xml =~ s/>\s+</></go;
201 $xml =~ s/\p{Cc}//go;
202 $xml = OpenILS::Application::AppUtils->entityize($xml);
203 $xml =~ s/[\x00-\x1f]//go;
205 my $bib = new Fieldmapper::biblio::record_entry;
211 $bib->create_date('now');
213 $bib->edit_date('now');
214 $bib->tcn_source($tcn_source);
215 $bib->tcn_value($tcn_value);
216 $bib->last_xact_id('IMPORT-'.$starttime);
218 #print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
220 if (!$quiet){# && !($count % 50)) {
221 print STDERR "\r$count\t". $count / (time - $starttime);
227 my $leader = $rec->leader();
228 $leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader));
229 $rec->leader($leader);
233 sub process_custom_fields{
239 push @newfields, MARC::Field->new(
241 a => "Requires OverDrive Media Console"
243 push @newfields, MARC::Field->new(
248 push @newfields, MARC::Field->new(
253 $rec->insert_fields_ordered(@newfields);