Commit | Line | Data |
---|---|---|
38891e1e JF |
1 | #!/usr/bin/perl |
2 | use strict; | |
3 | use warnings; | |
4 | ||
5 | use lib '/openils/lib/perl5/'; | |
6 | ||
7 | use Error qw/:try/; | |
8 | use OpenILS::Utils::Fieldmapper; | |
9 | use Digest::MD5 qw/md5_hex/; | |
10 | use OpenSRF::Utils::JSON; | |
11 | use OpenILS::Application::AppUtils; | |
12 | use Data::Dumper; | |
13 | use Unicode::Normalize; | |
14 | use Encode; | |
15 | ||
16 | use FileHandle; | |
17 | use Time::HiRes qw/time/; | |
18 | use Getopt::Long; | |
19 | use MARC::Batch; | |
20 | use MARC::File::XML ( BinaryEncoding => 'utf-8' ); | |
21 | use MARC::Charset; | |
22 | use DBI; | |
23 | ||
24 | #MARC::Charset->ignore_errors(1); | |
25 | ||
26 | my ($config, $idlfile, $marctype, $enc) = | |
27 | ('/srv/openils/conf/opensrf_core.xml', '/srv/openils/conf/fm_IDL.xml', 'USMARC', 'utf8'); | |
28 | ||
29 | my (@files, @trash_fields, @req_fields, $quiet, $startid); | |
30 | ||
31 | my @targetorg = ('BPR','BFN','BTE'); | |
32 | ||
33 | @req_fields = ('856'); | |
34 | my $overdrive_prefix = 'http\:\/\/downloads\.bclibrary\.ca\/ContentDetails\.htm\?ID\='; | |
35 | my $tcn_prefix = "LtG_"; | |
36 | ||
37 | GetOptions( | |
38 | 'marctype=s' => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML | |
39 | 'encoding=s' => \$enc, # set assumed MARC encoding for MARC::Charset | |
40 | 'config=s' => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml | |
41 | 'file=s' => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV) | |
42 | 'required_fields=s' => \@req_fields, # skip any records missing these fields | |
43 | 'trash=s' => \@trash_fields, # fields to remove from all processed records | |
44 | 'xml_idl=s' => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml | |
45 | 'startid=i' => \$startid, #starting ID | |
46 | 'quiet' => \$quiet # do not output progress count | |
47 | ); | |
48 | ||
49 | @trash_fields = split(/,/,join(',',@trash_fields)); | |
50 | @req_fields = split(/,/,join(',',@req_fields)); | |
51 | ||
52 | if ($enc) { | |
53 | MARC::Charset->ignore_errors(1); | |
54 | MARC::Charset->assume_encoding($enc); | |
55 | } | |
56 | ||
57 | if (uc($marctype) eq 'XML') { | |
58 | 'open'->use(':utf8'); | |
59 | } else { | |
60 | bytes->use(); | |
61 | } | |
62 | ||
63 | @files = @ARGV if (!@files); | |
64 | ||
65 | Fieldmapper->import(IDL => $idlfile); | |
66 | ||
67 | select STDERR; $| = 1; | |
68 | select STDOUT; $| = 1; | |
69 | ||
70 | my $batch = new MARC::Batch ( $marctype, @files ); | |
71 | $batch->strict_off(); | |
72 | $batch->warnings_off(); | |
73 | ||
74 | my $starttime = time; | |
75 | my $rec; | |
76 | my $count = 0; | |
77 | ||
78 | my $id = 1 || $startid; | |
79 | ||
80 | PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { | |
81 | next if ($rec == -1); | |
82 | print STDERR "======\n"; | |
83 | $id++; | |
84 | $count++; | |
85 | ||
86 | # Skip records that don't contain a required field (like '245', for example) | |
87 | foreach my $req_field (@req_fields) { | |
88 | if (!$rec->field("$req_field")) { | |
89 | warn "\n!!! Record $count missing required field $req_field, skipping record.\n"; | |
90 | next PROCESS; | |
91 | } | |
92 | } | |
93 | ||
94 | # ----------------- | |
95 | # Overdrive - specific code | |
96 | # ----------------- | |
97 | ||
98 | my $tcn_value; | |
99 | my $tcn_source = 'Library To Go'; | |
100 | ||
101 | my $caption = 'DOWNLOADABLE AUDIOBOOK'; | |
102 | ||
103 | # this is the base 856 field we're going to generate separate fields for each org unit we're scoping at | |
104 | my $baseurifield; | |
105 | ||
106 | # check all 856s | |
107 | URIFIELD: foreach my $uri ($rec->field('856')){ | |
108 | ||
109 | # Overdrive uses a $3 for Excerpts, we want to keep this intact so carry on then | |
110 | next URIFIELD if ($uri->subfield('3')); | |
111 | ||
112 | # we need a $u for a URL, if we don't have this it is bad | |
113 | my $url = $uri->subfield('u'); | |
114 | if(!$url){ | |
115 | warn "856 has no URL in rec $id. Skipping"; | |
116 | } | |
117 | ||
118 | # this record has been through Evergreen if a $9 exists somewhere | |
119 | # instead, we can presumably pull the tcn from the 901 and generate our base field that way | |
120 | if($uri->subfield('9')){ | |
121 | # if this doesn't match our prefix, ignore it, we only care about our current prefix | |
122 | next unless($url =~ m/($overdrive_prefix)/); | |
123 | ||
124 | # if it does: | |
125 | # delete $9 subfield and use this as a base uri field | |
126 | $baseurifield = $uri->clone; | |
127 | $rec->delete_fields($uri); | |
128 | next URIFIELD; | |
129 | } | |
130 | ||
131 | ||
132 | if($uri->subfield('z')){ | |
133 | $caption = 'EBOOK' if ($uri->subfield('z') =~ /Book/); | |
134 | $uri->delete_subfield(code => 'z'); | |
135 | $uri->add_subfields('z' => 'Click to access online (library card required)'); | |
136 | ||
137 | next unless($url =~ m/($overdrive_prefix)/); | |
138 | ||
139 | # trim out Overdrive's magical GUID-looking ID thingy | |
140 | my $overdrivekey = $url; | |
141 | $overdrivekey =~ s/($overdrive_prefix)//g; | |
142 | ||
143 | # make it TCN-ish | |
144 | $tcn_value = $tcn_prefix . $overdrivekey; | |
145 | ||
146 | # we have a base for our scoped fields | |
147 | $baseurifield = $uri->clone; | |
148 | ||
149 | $rec->delete_fields($uri); | |
150 | } | |
151 | } | |
152 | ||
153 | if(!$baseurifield){ | |
154 | die "the horror!"; | |
155 | } | |
156 | # add some arbitrary stuff as prescribed by our cataloguer overlords | |
157 | $rec = adjust_leader($rec); | |
158 | $rec = process_custom_fields($rec); | |
159 | ||
160 | ||
161 | # add a scoped field for each org unit in our array | |
162 | foreach(@targetorg){ | |
163 | if($baseurifield){ | |
164 | my $newfield = $baseurifield->clone(); | |
165 | $newfield->add_subfields('9' => $_); | |
166 | $rec->insert_fields_ordered($newfield); | |
167 | } else { | |
168 | die;# $rec->as_formatted(); | |
169 | } | |
170 | } | |
171 | ||
172 | # ----------------- | |
173 | # END Overdrive - specific code | |
174 | # ----------------- | |
175 | ||
176 | ||
177 | $rec->delete_field($_) for ($rec->field(@trash_fields)); | |
178 | ||
179 | my $field901 = MARC::Field->new( | |
180 | '901' => ('', ''), | |
181 | a => $tcn_value, | |
182 | b => $tcn_source, | |
183 | c => $id, | |
184 | ); | |
185 | ||
186 | ||
187 | $rec->insert_fields_ordered($field901); | |
188 | ||
189 | print $rec->as_formatted(); | |
190 | ||
191 | next PROCESS; | |
192 | ||
193 | ||
194 | $tcn_value = $rec->subfield('901' => 'a'); | |
195 | $tcn_source = $rec->subfield('901' => 'b'); | |
196 | $id = $rec->subfield('901' => 'c'); | |
197 | ||
198 | (my $xml = $rec->as_xml_record()) =~ s/\n//sog; | |
199 | $xml =~ s/^<\?xml.+\?\s*>//go; | |
200 | $xml =~ s/>\s+</></go; | |
201 | $xml =~ s/\p{Cc}//go; | |
202 | $xml = OpenILS::Application::AppUtils->entityize($xml); | |
203 | $xml =~ s/[\x00-\x1f]//go; | |
204 | ||
205 | my $bib = new Fieldmapper::biblio::record_entry; | |
206 | $bib->id($id); | |
207 | $bib->active('t'); | |
208 | $bib->deleted('f'); | |
209 | $bib->marc($xml); | |
210 | $bib->creator(0); | |
211 | $bib->create_date('now'); | |
212 | $bib->editor(0); | |
213 | $bib->edit_date('now'); | |
214 | $bib->tcn_source($tcn_source); | |
215 | $bib->tcn_value($tcn_value); | |
216 | $bib->last_xact_id('IMPORT-'.$starttime); | |
217 | ||
218 | #print OpenSRF::Utils::JSON->perl2JSON($bib)."\n"; | |
219 | ||
220 | if (!$quiet){# && !($count % 50)) { | |
221 | print STDERR "\r$count\t". $count / (time - $starttime); | |
222 | } | |
223 | } | |
224 | ||
225 | sub adjust_leader { | |
226 | my $rec = shift; | |
227 | my $leader = $rec->leader(); | |
228 | $leader = substr($leader,0,5) . 'm' . substr($leader,6,length($leader)); | |
229 | $rec->leader($leader); | |
230 | return $rec; | |
231 | } | |
232 | ||
233 | sub process_custom_fields{ | |
234 | my $rec = shift; | |
235 | my $caption = shift; | |
236 | ||
237 | my @newfields; | |
238 | ||
239 | push @newfields, MARC::Field->new( | |
240 | '538' => (' ', ' '), | |
241 | a => "Requires OverDrive Media Console" | |
242 | ); | |
243 | push @newfields, MARC::Field->new( | |
244 | '594' => (' ', ' '), | |
245 | a => "Library To Go" | |
246 | ); | |
247 | ||
248 | push @newfields, MARC::Field->new( | |
249 | '655' => (' ', '4'), | |
250 | 'a' => $caption | |
251 | ); | |
252 | ||
253 | $rec->insert_fields_ordered(@newfields); | |
254 | return $rec; | |
255 | } | |
256 |