Commit | Line | Data |
---|---|---|
38891e1e JF |
1 | #!/usr/bin/perl |
2 | use strict; | |
3 | use warnings; | |
4 | ||
5 | use lib '/openils/lib/perl5/'; | |
6 | ||
7 | use Error qw/:try/; | |
8 | use OpenILS::Utils::Fieldmapper; | |
9 | use Digest::MD5 qw/md5_hex/; | |
10 | use OpenSRF::Utils::JSON; | |
11 | use OpenILS::Application::AppUtils; | |
4856f9b1 JF |
12 | use OpenSRF::AppSession; |
13 | use OpenSRF::System; | |
38891e1e JF |
14 | use Data::Dumper; |
15 | use Unicode::Normalize; | |
16 | use Encode; | |
17 | ||
18 | use FileHandle; | |
19 | use Time::HiRes qw/time/; | |
20 | use Getopt::Long; | |
21 | use MARC::Batch; | |
22 | use MARC::File::XML ( BinaryEncoding => 'utf-8' ); | |
23 | use MARC::Charset; | |
24 | use DBI; | |
25 | ||
4856f9b1 | 26 | |
38891e1e JF |
27 | #MARC::Charset->ignore_errors(1); |
28 | ||
29 | my ($config, $idlfile, $marctype, $enc) = | |
30 | ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8'); | |
31 | ||
4856f9b1 JF |
32 | my (@files, @trash_fields, $quiet, $startid, $verbose, $directload, $overwritetcn); |
33 | ||
34 | my ($baseorg,$overdrive_prefix,$tcn_prefix) = ('SITKA','http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\=',"Overdrive_"); | |
38891e1e | 35 | |
4856f9b1 | 36 | my @req_fields = ('856'); |
38891e1e | 37 | |
38891e1e JF |
38 | |
39 | GetOptions( | |
40 | 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML | |
41 | 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset | |
42 | 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml | |
43 | 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV) | |
44 | 'required_fields=s' => \@req_fields, # skip any records missing these fields | |
45 | 'trash=s' => \@trash_fields, # fields to remove from all processed records | |
46 | 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml | |
47 | 'startid=i' => \$startid, #starting ID | |
4856f9b1 JF |
48 | 'direct_load' => \$directload, #starting ID |
49 | 'overwritetcn' => \$overwritetcn, #starting ID | |
50 | 'quiet' => \$quiet, # do not output progress count | |
51 | 'verbose' => \$verbose # do not output progress count | |
38891e1e JF |
52 | ); |
53 | ||
4856f9b1 JF |
54 | my $U = "OpenILS::Application::AppUtils"; |
55 | OpenSRF::System->bootstrap_client( config_file => $config ); | |
56 | Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL")); | |
57 | my $cstore = OpenSRF::AppSession->connect('open-ils.cstore'); | |
58 | ||
38891e1e JF |
59 | @trash_fields = split(/,/,join(',',@trash_fields)); |
60 | @req_fields = split(/,/,join(',',@req_fields)); | |
61 | ||
62 | if ($enc) { | |
63 | MARC::Charset->ignore_errors(1); | |
64 | MARC::Charset->assume_encoding($enc); | |
65 | } | |
66 | ||
67 | if (uc($marctype) eq 'XML') { | |
68 | 'open'->use(':utf8'); | |
69 | } else { | |
70 | bytes->use(); | |
71 | } | |
72 | ||
73 | @files = @ARGV if (!@files); | |
74 | ||
75 | Fieldmapper->import(IDL => $idlfile); | |
76 | ||
77 | select STDERR; $| = 1; | |
78 | select STDOUT; $| = 1; | |
79 | ||
80 | my $batch = new MARC::Batch ( $marctype, @files ); | |
81 | $batch->strict_off(); | |
82 | $batch->warnings_off(); | |
83 | ||
84 | my $starttime = time; | |
85 | my $rec; | |
86 | my $count = 0; | |
87 | ||
4856f9b1 | 88 | my $id = $startid || 1; |
38891e1e JF |
89 | |
90 | PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { | |
91 | next if ($rec == -1); | |
38891e1e JF |
92 | $id++; |
93 | $count++; | |
94 | ||
95 | # Skip records that don't contain a required field (like '245', for example) | |
96 | foreach my $req_field (@req_fields) { | |
97 | if (!$rec->field("$req_field")) { | |
98 | warn "\n!!! Record $count missing required field $req_field, skipping record.\n"; | |
99 | next PROCESS; | |
100 | } | |
101 | } | |
102 | ||
103 | # ----------------- | |
104 | # Overdrive - specific code | |
105 | # ----------------- | |
106 | ||
107 | my $tcn_value; | |
4856f9b1 | 108 | my $tcn_source = 'Overdrive'; |
38891e1e JF |
109 | |
110 | my $caption = 'DOWNLOADABLE AUDIOBOOK'; | |
111 | ||
112 | # this is the base 856 field we're going to generate separate fields for each org unit we're scoping at | |
113 | my $baseurifield; | |
114 | ||
115 | # check all 856s | |
116 | URIFIELD: foreach my $uri ($rec->field('856')){ | |
117 | ||
118 | # Overdrive uses a $3 for Excerpts, we want to keep this intact so carry on then | |
119 | next URIFIELD if ($uri->subfield('3')); | |
120 | ||
121 | # we need a $u for a URL, if we don't have this it is bad | |
122 | my $url = $uri->subfield('u'); | |
123 | if(!$url){ | |
124 | warn "856 has no URL in rec $id. Skipping"; | |
125 | } | |
126 | ||
38891e1e JF |
127 | if($uri->subfield('z')){ |
128 | $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/); | |
129 | $uri->delete_subfield(code => 'z'); | |
130 | $uri->add_subfields('z' => 'Click to access online (library card required)'); | |
131 | ||
132 | next unless($url =~ m/($overdrive_prefix)/); | |
133 | ||
134 | # trim out Overdrive's magical GUID-looking ID thingy | |
135 | my $overdrivekey = $url; | |
136 | $overdrivekey =~ s/($overdrive_prefix)//g; | |
137 | ||
138 | # make it TCN-ish | |
139 | $tcn_value = $tcn_prefix . $overdrivekey; | |
140 | ||
4856f9b1 JF |
141 | # add our base $9 |
142 | $uri->add_subfields('9' => $baseorg); | |
38891e1e JF |
143 | } |
144 | } | |
145 | ||
38891e1e JF |
146 | # add some arbitrary stuff as prescribed by our cataloguer overlords |
147 | $rec = adjust_leader($rec); | |
148 | $rec = process_custom_fields($rec); | |
149 | ||
38891e1e JF |
150 | # ----------------- |
151 | # END Overdrive - specific code | |
152 | # ----------------- | |
153 | ||
154 | ||
155 | $rec->delete_field($_) for ($rec->field(@trash_fields)); | |
156 | ||
157 | my $field901 = MARC::Field->new( | |
158 | '901' => ('', ''), | |
159 | a => $tcn_value, | |
160 | b => $tcn_source, | |
161 | c => $id, | |
162 | ); | |
163 | ||
164 | ||
165 | $rec->insert_fields_ordered($field901); | |
166 | ||
4856f9b1 | 167 | #print $rec->as_formatted(); |
38891e1e | 168 | |
4856f9b1 | 169 | #next PROCESS; |
38891e1e JF |
170 | |
171 | (my $xml = $rec->as_xml_record()) =~ s/\n//sog; | |
172 | $xml =~ s/^<\?xml.+\?\s*>//go; | |
173 | $xml =~ s/>\s+</></go; | |
174 | $xml =~ s/\p{Cc}//go; | |
175 | $xml = OpenILS::Application::AppUtils->entityize($xml); | |
176 | $xml =~ s/[\x00-\x1f]//go; | |
177 | ||
178 | my $bib = new Fieldmapper::biblio::record_entry; | |
179 | $bib->id($id); | |
180 | $bib->active('t'); | |
181 | $bib->deleted('f'); | |
182 | $bib->marc($xml); | |
183 | $bib->creator(0); | |
184 | $bib->create_date('now'); | |
185 | $bib->editor(0); | |
186 | $bib->edit_date('now'); | |
187 | $bib->tcn_source($tcn_source); | |
188 | $bib->tcn_value($tcn_value); | |
189 | $bib->last_xact_id('IMPORT-'.$starttime); | |
190 | ||
4856f9b1 JF |
191 | if(my $dupetcn = $U->simplereq("open-ils.search","open-ils.search.biblio.tcn",$tcn_value)){ |
192 | if($dupetcn->{count} > 0){ | |
193 | my $newid = $dupetcn->{ids}[0]; | |
194 | if($overwritetcn){ | |
195 | $bib->id($newid); | |
196 | $cstore->request("open-ils.cstore.transaction.begin")->gather(1); | |
197 | my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1); | |
198 | $cstore->request("open-ils.cstore.transaction.commit")->gather(1); | |
199 | print STDERR "overwriting TCN: $tcn_value (id:$id/newid:$newid)\n" if $verbose; | |
200 | } else { | |
201 | print STDERR "duplicate TCN: $tcn_value - (id: $id) not imported \n" if $verbose; | |
202 | } | |
203 | next PROCESS; | |
204 | } | |
205 | } | |
206 | ||
207 | if($directload){ | |
208 | my $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.retrieve",$id)->gather(1); | |
209 | if($req){ | |
210 | if($req->tcn_value ne $tcn_value){ | |
211 | print STDERR "incoming record (id:$id/tcn:$tcn_value) does not match record (id:".$req->id."/tcn:".$rec->tcn_value.")!\n" if $verbose; | |
212 | if($overwritetcn){ | |
213 | ||
214 | } | |
215 | } elsif (($req->tcn_value eq $tcn_value) && $overwritetcn){ | |
216 | print STDERR "matching record overlaied with incoming record (id:$id/tcn:$tcn_value)\n" if $verbose; | |
217 | ||
218 | $cstore->request("open-ils.cstore.transaction.begin")->gather(1); | |
219 | $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.update",$bib)->gather(1); | |
220 | $cstore->request("open-ils.cstore.transaction.commit")->gather(1); | |
221 | } | |
222 | } else { | |
223 | print STDERR "importing new record (id:$id/tcn:$tcn_value)\n" if $verbose; | |
224 | ||
225 | $cstore->request("open-ils.cstore.transaction.begin")->gather(1); | |
226 | $req = $cstore->request("open-ils.cstore.direct.biblio.record_entry.create",$bib)->gather(1); | |
227 | $cstore->request("open-ils.cstore.transaction.commit")->gather(1); | |
228 | } | |
229 | print Dumper($req); | |
230 | } else { | |
231 | print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; | |
232 | } | |
38891e1e JF |
233 | |
234 | if (!$quiet){# && !($count % 50)) { | |
235 | print STDERR "\r$count\t". $count / (time - $starttime); | |
236 | } | |
237 | } | |
238 | ||
4856f9b1 JF |
239 | $cstore->disconnect(); |
240 | ||
38891e1e JF |
241 | sub adjust_leader { |
242 | my $rec = shift; | |
243 | my $leader = $rec->leader(); | |
4856f9b1 JF |
244 | #$leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader)); |
245 | $leader = substr($leader,0,9) . 'a' . substr($leader,10,length($leader)); | |
38891e1e JF |
246 | $rec->leader($leader); |
247 | return $rec; | |
248 | } | |
249 | ||
250 | sub process_custom_fields{ | |
251 | my $rec = shift; | |
252 | my $caption = shift; | |
253 | ||
254 | my @newfields; | |
255 | ||
256 | push @newfields, MARC::Field->new( | |
257 | '538' => (' ', ' '), | |
258 | a => "Requires OverDrive Media Console" | |
259 | ); | |
260 | push @newfields, MARC::Field->new( | |
261 | '594' => (' ', ' '), | |
262 | a => "Library To Go" | |
263 | ); | |
264 | ||
265 | push @newfields, MARC::Field->new( | |
266 | '655' => (' ', '4'), | |
267 | 'a' => $caption | |
268 | ); | |
269 | ||
270 | $rec->insert_fields_ordered(@newfields); | |
271 | return $rec; | |
272 | } | |
273 |