[rt18107] Cleaned up access_pathmap.pl
[sitka/sitka-tools.git] / deployment / integrity-checker.pl
CommitLineData
089f11fb 1#!/usr/bin/perl
bcd49ef3
LW
2use strict;
3use warnings;
089f11fb
JD
4# On Ubuntu, you'll want the following packages:
5# - libconfig-simple-perl
6# - libgit-repository-perl
7# - libdate-manip-perl
8use Config::Simple;
9use File::Find;
10use File::stat;
11use Date::Manip qw/ParseDate UnixDate/;
12use Time::localtime;
13use Git::Repository;
14use Git::Repository::Command;
15use Getopt::Long;
16use Data::Dumper;
17
bcd49ef3 18my ($help, $config_file, $repo_path, $all, $check_files, $hash_file, $since, $git_output, $deployed_output);
089f11fb
JD
19my $branch = 'HEAD';
20my $remote = 'origin';
21my @components;
22
23GetOptions(
24 'help' => \$help, # show help message and exit
25 'config=s' => \$config_file, # INI file for path mappings
26 'repo=s' => \$repo_path, # location of git repo
27 'branch=s' => \$branch, # git branch/head to check against (defaults to 'HEAD')
28 'remote=s' => \$remote, # git remote to pull from (defaults to 'origin')
29 'component=s' => \@components, # parts of EG to be checked (each component is a block in the INI file)
30 'all' => \$all, # check all components specified in config (overrides --component)
31 'check-files' => \$check_files, # check deployed files
32 'hash-file=s' => \$hash_file, # file containing git hashes (overrides --repo)
33 'since=s' => \$since, # check only files modified since this time
34 'git-output=s' => \$git_output, # output file for git hashes (optional)
35 'deployed-output=s' => \$deployed_output # output file for hashes of deployed files (optional)
36);
37
38if ($help) {
39 print <<"HELP";
40USAGE:
41 $0 --config pathmap.ini --repo /path/to/evergreen.git [ --component perl [ --component tt2 ] | --all ] --check-files [ --since <date> ]
42 $0 --config pathmap.ini --repo /path/to/evergreen.git [ --component perl [ --component tt2 ... ] | --all ] --print-git-hashes <git-hashes.txt>
43 $0 --config pathmap.ini --repo /path/to/evergreen.git [ --component perl [ --component tt2 ... ] | --all ] --print-deployed-hashes <deployed-hashes.txt>
44
45OPTIONS:
46 --help
47 Show help message and exit.
48 --config
49 Location of INI file for path mappings.
50 --repo
51 Location of git repo (overridden by --hash-file).
52 --branch
53 Git branch (head) to check against (defaults to HEAD).
54 --remote
55 Git remote to pull from (defaults to origin).
56 --component
57 Parts of EG to be checked (each component is a block in the config file).
58 You can use this option multiple times: --component perl --component web
59 --all
60 Check all components specified in config file (overrides --component).
61 --check-files
62 Get file hashes from git, then check deployed files to see if they match.
63 --hash-file
64 File containing file hashes from git. If you use this option, you don't
65 need to specify a git repo using the --repo option.
66 Use case: pull hashes from git once, copy the resulting file to multiple
67 servers, then check the deployed code against the file instead of pulling
68 hashes from git individually on each server.
69 --since
70 Only calculate hashes if file has been modified since the specified time.
71 --print-git-hashes
72 Get file hashes from git repo and print/append to the specified file.
73 Can be used with --check-files and --print-deployed-hashes.
74 --print-deployed-hashes
75 Print git-like hashes for deployed files.
76 Can be used with --check-files and --print-git-hashes.
77
78HELP
79 exit;
80}
81
82# specify all possible components (--all option);
c6a6221d 83@components = split('\n', `./access_pathmap.pl --config $config_file`) if ($all);
089f11fb
JD
84
85if ($git_output) {
86 open (GITOUTPUT, '>>', $git_output) or die "Could not open $git_output: $!\n";
87}
88if ($deployed_output) {
89 open (DEPLOYEDOUTPUT, '>>', $deployed_output) or die "Could not open $deployed_output: $!\n";
90}
91
92my %git_hashes;
93
94# optionally read in git hashes from file
95if ($hash_file) {
96 open (HASHFILE, '<', $hash_file) or die "Could not open $hash_file: $!\n";
97 while (<HASHFILE>) {
98 my ($hash, $file) = split(/\s+/, $_, 2);
99 $git_hashes{$file} = $hash;
100 }
101 close HASHFILE;
102}
103
104foreach my $component (@components) {
bcd49ef3 105 my @paths = split('\n', `./access_pathmap.pl --config $config_file --component $component`);
089f11fb
JD
106
107 # if no hash file was supplied, grab git hashes from repo
108 if (!$hash_file) {
109
110 # load git repo
111 die "No repo specified\n" unless ($repo_path);
112 $repo_path =~ s|/$||;
113 $repo_path = "$repo_path/.git" unless ($repo_path =~ /\.git$/);
114 my $repo = Git::Repository->new( git_dir => $repo_path ) or die "Could not load git repo $repo_path: $!\n";
115
116 # ensure git repo is up-to-date
117 if ($branch ne 'HEAD') {
118 $repo->run( 'pull' => $remote );
119 $repo->run( 'checkout' => $branch ); # TODO: is this necessary?
120 }
121
122 # get hashes from git
bcd49ef3 123 foreach my $srcpath (@paths) {
089f11fb
JD
124 # use git-ls-tree to traverse the file tree starting at $srcpath
125 # e.g. `git ls-tree -r HEAD Open-ILS/src/perlmods/lib`
126 my @tree = $repo->run( 'ls-tree' => '-r', $branch, $srcpath );
127 foreach my $file (@tree) {
128 my ($mode, $type, $hash, $filename) = split(/\s+/, $file, 4);
129 $git_hashes{$filename} = $hash;
130 print GITOUTPUT $hash, "\t", $filename, "\n" if ($git_output);
131 }
132 }
133 }
134
135 # check deployed files
136 if ($check_files || $deployed_output) {
bcd49ef3
LW
137 foreach my $srcpath (@paths) {
138 my $destpath = `./access_pathmap.pl --config $config_file --component $component --srcpath $srcpath`;
089f11fb
JD
139
140 # for each file in the destination path, push the file's absolute path to @files;
141 # output will include symlinked files, but will not include directories
bcd49ef3
LW
142 # clear @files for each time through loop
143 my @files = ();
089f11fb
JD
144 find( { wanted => sub { push @files, $_ if -f }, follow => 1, no_chdir => 1 }, $destpath );
145
146 foreach my $file (@files) {
089f11fb
JD
147 if ($since) {
148 # convert $since to seconds since epoch
149 my $since_ts = UnixDate($since, '%s');
150 # get $file modification time as seconds since epoch
151 my $file_ts = ctime(stat($file)->mtime);
152
153 next unless $file_ts > $since_ts;
154 }
155
156 # you can calculate what the git hash would be
157 # for any file using `git hash-object <file>`;
158 # you don't even need to be in a git repo to run it!
159 my $hash = Git::Repository->run( 'hash-object', $file );
160
161 if ($check_files) {
162 my $srcfile = $file;
163 $srcfile =~ s|^$destpath|$srcpath|;
164
165 if (!$git_hashes{$srcfile}) {
166 print "untracked\t$file\n";
167 } elsif ($git_hashes{$srcfile} ne $hash) {
168 print "modified\t$file\n";
169 }
170 }
bcd49ef3 171 print DEPLOYEDOUTPUT $hash, "\t", $file, "\n" if ($deployed_output);
089f11fb
JD
172
173 }
174 }
175 }
176}
177
178close (GITOUTPUT) if ($git_output);
179close (DEPLOYEDOUTPUT) if ($deployed_output);
180