package Algorithm::Diff::XS; use 5.006; use strict; use warnings; use vars '$VERSION'; use Algorithm::Diff; BEGIN { $VERSION = '0.04'; require XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); my $code = do { open my $fh, '<', $INC{'Algorithm/Diff.pm'} or die "Cannot read $INC{'Algorithm/Diff.pm'}: $!"; local $/; <$fh>; }; { no warnings; local $@; $code =~ s/Algorithm::Diff/Algorithm::Diff::XS/g; $code =~ s/sub LCSidx/sub LCSidx_old/g; $code = "#line 1 " . __FILE__ . "\n$code"; eval $code; die $@ if $@; } no warnings 'redefine'; sub LCSidx { my $lcs = Algorithm::Diff::XS->_CREATE_; my ( @l, @r ); for my $chunk ( $lcs->_LCS_(@_) ) { push @l, $chunk->[0]; push @r, $chunk->[1]; } return ( \@l, \@r ); } } sub _line_map_ { my $ctx = shift; my %lines; push @{ $lines{ $_[$_] } }, $_ for 0 .. $#_; # values MUST be SvIOK \%lines; } sub _LCS_ { my ( $ctx, $a, $b ) = @_; my ( $amin, $amax, $bmin, $bmax ) = ( 0, $#$a, 0, $#$b ); while ( $amin <= $amax and $bmin <= $bmax and $a->[$amin] eq $b->[$bmin] ) { $amin++; $bmin++; } while ( $amin <= $amax and $bmin <= $bmax and $a->[$amax] eq $b->[$bmax] ) { $amax--; $bmax--; } my $h = $ctx->_line_map_( @$b[ $bmin .. $bmax ] ); # line numbers are off by $bmin return $amin + _core_loop_( $ctx, $a, $amin, $amax, $h ) + ( $#$a - $amax ) unless wantarray; my @lcs = _core_loop_( $ctx, $a, $amin, $amax, $h ); if ( $bmin > 0 ) { $_->[1] += $bmin for @lcs; # correct line numbers } map( [ $_ => $_ ], 0 .. ( $amin - 1 ) ), @lcs, map( [ $_ => ++$bmax ], ( $amax + 1 ) .. $#$a ); } 1; __END__ =head1 NAME Algorithm::Diff::XS - Algorithm::Diff with XS core loop =head1 SYNOPSIS # Drop-in replacement to Algorithm::Diff, but "compact_diff" # and C will run much faster for large data sets. use Algorithm::Diff::XS qw( compact_diff LCSidx ); =head1 DESCRIPTION This module is a simple re-packaging of Joe Schaefer's excellent but not very well-known L with a drop-in interface that simply re-uses the installed version of the L module. Note that only the C function is optimized in XS at the moment, which means only C will get significantly faster for large data sets, while C and C will run in identical speed as C. =head1 BENCHMARK Rate Algorithm::Diff Algorithm::Diff::XS Algorithm::Diff 14.7/s -- -98% Algorithm::Diff::XS 806/s 5402% -- The benchmarking script is as below: my @data = ([qw/a b d/ x 50], [qw/b a d c/ x 50]); cmpthese( 500, { 'Algorithm::Diff' => sub { Algorithm::Diff::compact_diff(@data) }, 'Algorithm::Diff::XS' => sub { Algorithm::Diff::XS::compact_diff(@data) }, }); =head1 SEE ALSO L, L. =head1 AUTHORS Audrey Tang Ecpan@audreyt.orgE =head1 COPYRIGHT Copyright 2008 by Audrey Tang Ecpan@audreyt.orgE. Contains derived code copyrighted 2003 by Joe Schaefer, Ejoe+cpan@sunstarsys.comE. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut