| Line | |
|---|
| 1 | #!/usr/bin/perl
|
|---|
| 2 |
|
|---|
| 3 | use strict;
|
|---|
| 4 | use warnings 'all';
|
|---|
| 5 |
|
|---|
| 6 | use LWP::Simple qw /$ua getstore/;
|
|---|
| 7 |
|
|---|
| 8 | my %urls;
|
|---|
| 9 |
|
|---|
| 10 | my @dummy = qw(
|
|---|
| 11 | http://something.here
|
|---|
| 12 | http://www.pvhp.com
|
|---|
| 13 | );
|
|---|
| 14 | my %dummy;
|
|---|
| 15 |
|
|---|
| 16 | @dummy{@dummy} = ();
|
|---|
| 17 |
|
|---|
| 18 | foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) {
|
|---|
| 19 | open my $fh => $file or die "Failed to open $file: $!\n";
|
|---|
| 20 | while (<$fh>) {
|
|---|
| 21 | if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) {
|
|---|
| 22 | my $url = $&;
|
|---|
| 23 | $url =~ s/\.$//;
|
|---|
| 24 | $urls {$url} ||= { };
|
|---|
| 25 | $urls {$url} {$file} = 1;
|
|---|
| 26 | }
|
|---|
| 27 | }
|
|---|
| 28 | close $fh;
|
|---|
| 29 | }
|
|---|
| 30 |
|
|---|
| 31 | sub fisher_yates_shuffle {
|
|---|
| 32 | my $deck = shift; # $deck is a reference to an array
|
|---|
| 33 | my $i = @$deck;
|
|---|
| 34 | while (--$i) {
|
|---|
| 35 | my $j = int rand ($i+1);
|
|---|
| 36 | @$deck[$i,$j] = @$deck[$j,$i];
|
|---|
| 37 | }
|
|---|
| 38 | }
|
|---|
| 39 |
|
|---|
| 40 | my @urls = keys %urls;
|
|---|
| 41 |
|
|---|
| 42 | fisher_yates_shuffle(\@urls);
|
|---|
| 43 |
|
|---|
| 44 | sub todo {
|
|---|
| 45 | warn "(", scalar @urls, " URLs)\n";
|
|---|
| 46 | }
|
|---|
| 47 |
|
|---|
| 48 | my $MAXPROC = 40;
|
|---|
| 49 | my $MAXURL = 10;
|
|---|
| 50 | my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL;
|
|---|
| 51 |
|
|---|
| 52 | select(STDERR); $| = 1;
|
|---|
| 53 | select(STDOUT); $| = 1;
|
|---|
| 54 |
|
|---|
| 55 | while (@urls) {
|
|---|
| 56 | my @list;
|
|---|
| 57 | my $pid;
|
|---|
| 58 | my $i;
|
|---|
| 59 |
|
|---|
| 60 | todo();
|
|---|
| 61 |
|
|---|
| 62 | for ($i = 0; $i < $MAXFORK; $i++) {
|
|---|
| 63 | $list[$i] = [ splice @urls, 0, $MAXURL ];
|
|---|
| 64 | $pid = fork;
|
|---|
| 65 | die "Failed to fork: $!\n" unless defined $pid;
|
|---|
| 66 | last unless $pid; # Child.
|
|---|
| 67 | }
|
|---|
| 68 |
|
|---|
| 69 | if ($pid) {
|
|---|
| 70 | # Parent.
|
|---|
| 71 | warn "(waiting)\n";
|
|---|
| 72 | 1 until -1 == wait; # Reap.
|
|---|
| 73 | } else {
|
|---|
| 74 | # Child.
|
|---|
| 75 | foreach my $url (@{$list[$i]}) {
|
|---|
| 76 | my $code = getstore $url, "/dev/null";
|
|---|
| 77 | next if $code == 200;
|
|---|
| 78 | my $f = join ", " => keys %{$urls {$url}};
|
|---|
| 79 | printf "%03d %s: %s\n" => $code, $url, $f;
|
|---|
| 80 | }
|
|---|
| 81 |
|
|---|
| 82 | exit;
|
|---|
| 83 | }
|
|---|
| 84 | }
|
|---|
| 85 |
|
|---|
| 86 | __END__
|
|---|
Note:
See
TracBrowser
for help on using the repository browser.