EG install scripts: typo fix
[sitka/sitka-tools.git] / marc_import_overdrive / overdrive-import.pl
CommitLineData
38891e1e
JF
1#!/usr/bin/perl
2use strict;
3use warnings;
4
5use lib '/openils/lib/perl5/';
6
7use Error qw/:try/;
8use OpenILS::Utils::Fieldmapper;
9use Digest::MD5 qw/md5_hex/;
10use OpenSRF::Utils::JSON;
11use OpenILS::Application::AppUtils;
4856f9b1
JF
12use OpenSRF::AppSession;
13use OpenSRF::System;
38891e1e
JF
14use Data::Dumper;
15use Unicode::Normalize;
16use Encode;
17
18use FileHandle;
19use Time::HiRes qw/time/;
20use Getopt::Long;
21use MARC::Batch;
22use MARC::File::XML ( BinaryEncoding => 'utf-8' );
23use MARC::Charset;
24use DBI;
25
4856f9b1 26
38891e1e
JF
27#MARC::Charset->ignore_errors(1);
28
29my ($config, $idlfile, $marctype, $enc) =
30 ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8');
31
4856f9b1
JF
32my (@files, @trash_fields, $quiet, $startid, $verbose, $directload, $overwritetcn);
33
34my ($baseorg,$overdrive_prefix,$tcn_prefix) = ('SITKA','http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\=',"Overdrive_");
38891e1e 35
4856f9b1 36my @req_fields = ('856');
38891e1e 37
38891e1e
JF
38
39GetOptions(
40 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML
41 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset
42 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml
43 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV)
44 'required_fields=s' => \@req_fields, # skip any records missing these fields
45 'trash=s' => \@trash_fields, # fields to remove from all processed records
46 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml
47 'startid=i' => \$startid, #starting ID
4856f9b1
JF
48 'direct_load' => \$directload, #starting ID
49 'overwritetcn' => \$overwritetcn, #starting ID
50 'quiet' => \$quiet, # do not output progress count
51 'verbose' => \$verbose # do not output progress count
38891e1e
JF
52);
53
4856f9b1
JF
54my $U = "OpenILS::Application::AppUtils";
55OpenSRF::System->bootstrap_client( config_file => $config );
56Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
57my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
58
38891e1e
JF
59@trash_fields = split(/,/,join(',',@trash_fields));
60@req_fields = split(/,/,join(',',@req_fields));
61
62if ($enc) {
63 MARC::Charset->ignore_errors(1);
64 MARC::Charset->assume_encoding($enc);
65}
66
67if (uc($marctype) eq 'XML') {
68 'open'->use(':utf8');
69} else {
70 bytes->use();
71}
72
73@files = @ARGV if (!@files);
74
75Fieldmapper->import(IDL => $idlfile);
76
77select STDERR; $| = 1;
78select STDOUT; $| = 1;
79
80my $batch = new MARC::Batch ( $marctype, @files );
81$batch->strict_off();
82$batch->warnings_off();
83
84my $starttime = time;
85my $rec;
86my $count = 0;
87
4856f9b1 88my $id = $startid || 1;
38891e1e
JF
89
90PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
91 next if ($rec == -1);
38891e1e
JF
92 $id++;
93 $count++;
94
95 # Skip records that don't contain a required field (like '245', for example)
96 foreach my $req_field (@req_fields) {
97 if (!$rec->field("$req_field")) {
98 warn "\n!!! Record $count missing required field $req_field, skipping record.\n";
99 next PROCESS;
100 }
101 }
102
103# -----------------
104# Overdrive - specific code
105# -----------------
106
107 my $tcn_value;
4856f9b1 108 my $tcn_source = 'Overdrive';
38891e1e
JF
109
110 my $caption = 'DOWNLOADABLE AUDIOBOOK';
111
112 # this is the base 856 field we're going to generate separate fields for each org unit we're scoping at
113 my $baseurifield;
114
115 # check all 856s
116 URIFIELD: foreach my $uri ($rec->field('856')){
117
118 # Overdrive uses a $3 for Excerpts, we want to keep this intact so carry on then
119 next URIFIELD if ($uri->subfield('3'));
120
121 # we need a $u for a URL, if we don't have this it is bad
122 my $url = $uri->subfield('u');
123 if(!$url){
124 warn "856 has no URL in rec $id. Skipping";
125 }
126
38891e1e
JF
127 if($uri->subfield('z')){
128 $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/);
129 $uri->delete_subfield(code => 'z');
1cfb727d 130 $uri->add_subfields('y' => 'Click to access online (library card required)');
38891e1e
JF
131
132 next unless($url =~ m/($overdrive_prefix)/);
133
134 # trim out Overdrive's magical GUID-looking ID thingy
135 my $overdrivekey = $url;
136 $overdrivekey =~ s/($overdrive_prefix)//g;
137
138 # make it TCN-ish
139 $tcn_value = $tcn_prefix . $overdrivekey;
140
4856f9b1
JF
141 # add our base $9
142 $uri->add_subfields('9' => $baseorg);
38891e1e
JF
143 }
144 }
145
38891e1e
JF
146 # add some arbitrary stuff as prescribed by our cataloguer overlords
147 $rec = adjust_leader($rec);
1cfb727d 148 $rec = process_custom_fields($rec,$caption);
38891e1e 149
38891e1e
JF
150# -----------------
151# END Overdrive - specific code
152# -----------------
153
154
155 $rec->delete_field($_) for ($rec->field(@trash_fields));
156
157 my $field901 = MARC::Field->new(
158 '901' => ('', ''),
159 a => $tcn_value,
160 b => $tcn_source,
161 c => $id,
162 );
163
164
165 $rec->insert_fields_ordered($field901);
166
4856f9b1 167 #print $rec->as_formatted();
38891e1e 168
4856f9b1 169 #next PROCESS;
38891e1e
JF
170
171 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
172 $xml =~ s/^<\?xml.+\?\s*>//go;
173 $xml =~ s/>\s+</></go;
174 $xml =~ s/\p{Cc}//go;
175 $xml = OpenILS::Application::AppUtils->entityize($xml);
176 $xml =~ s/[\x00-\x1f]//go;
177
178 my $bib = new Fieldmapper::biblio::record_entry;
179 $bib->id($id);
180 $bib->active('t');
181 $bib->deleted('f');
182 $bib->marc($xml);
183 $bib->creator(0);
184 $bib->create_date('now');
185 $bib->editor(0);
186 $bib->edit_date('now');
187 $bib->tcn_source($tcn_source);
188 $bib->tcn_value($tcn_value);
189 $bib->last_xact_id('IMPORT-'.$starttime);
190
4856f9b1
JF
191 if(my $dupetcn = $U->simplereq("open-ils.search","open-ils.search.biblio.tcn",$tcn_value)){
192 if($dupetcn->{count} > 0){
193 my $newid = $dupetcn->{ids}[0];
194 if($overwritetcn){
195 $bib->id($newid);
196 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
197 my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1);
198 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
199 print STDERR "overwriting TCN: $tcn_value (id:$id/newid:$newid)\n" if $verbose;
200 } else {
201 print STDERR "duplicate TCN: $tcn_value - (id: $id) not imported \n" if $verbose;
202 }
203 next PROCESS;
204 }
205 }
206
207 if($directload){
208 my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.retrieve",$id)->gather(1);
209 if($req){
210 if($req->tcn_value ne $tcn_value){
1cfb727d 211 print STDERR "incoming record (id:$id/tcn:$tcn_value) does not match record (id:".$req->id."/tcn:".$req->tcn_value.")!\n" if $verbose;
4856f9b1
JF
212 if($overwritetcn){
213
214 }
215 } elsif (($req->tcn_value eq $tcn_value) && $overwritetcn){
216 print STDERR "matching record overlaied with incoming record (id:$id/tcn:$tcn_value)\n" if $verbose;
217
218 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
219 $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1);
220 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
221 }
222 } else {
223 print STDERR "importing new record (id:$id/tcn:$tcn_value)\n" if $verbose;
1cfb727d 224 $bib->id(); # clear the id so it's autogen'd
4856f9b1
JF
225 $cstore->request("open-ils.cstore.transaction.begin")->gather(1);
226 $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.create",$bib)->gather(1);
227 $cstore->request("open-ils.cstore.transaction.commit")->gather(1);
228 }
229 print Dumper($req);
230 } else {
231 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
232 }
38891e1e
JF
233
234 if (!$quiet){# && !($count % 50)) {
235 print STDERR "\r$count\t". $count / (time - $starttime);
236 }
237}
238
4856f9b1
JF
239$cstore->disconnect();
240
38891e1e
JF
241sub adjust_leader {
242 my $rec = shift;
243 my $leader = $rec->leader();
4856f9b1
JF
244 #$leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader));
245 $leader = substr($leader,0,9) . 'a' . substr($leader,10,length($leader));
38891e1e
JF
246 $rec->leader($leader);
247 return $rec;
248}
249
250sub process_custom_fields{
251 my $rec = shift;
252 my $caption = shift;
38891e1e
JF
253 my @newfields;
254
255 push @newfields, MARC::Field->new(
256 '538' => (' ', ' '),
257 a => "Requires OverDrive Media Console"
258 );
259 push @newfields, MARC::Field->new(
260 '594' => (' ', ' '),
261 a => "Library To Go"
262 );
263
264 push @newfields, MARC::Field->new(
265 '655' => (' ', '4'),
266 'a' => $caption
267 );
268
269 $rec->insert_fields_ordered(@newfields);
270 return $rec;
271}
272