#!/usr/bin/perl # This script computes substitution rates for a set of given alignments that # maybe used to generate a mutation rate matrix for use with SCONE. It will # search the alignment for all blocks containing three given species - a # reference species, a sister species, and an outgroup species. This triple # should be selected so that the reference and sister are sufficiently close to # ensure the chance of double-hits is negligible. Some code that is commented # out allows the use of Perl Bit-Vectors to mask out non-neutral sequences # (however you choose to define neutral). # The output of this script must be fed to compute-matrix.pl to actually generate # the matrix file. #use Bit::Vector; if (!@ARGV) { print STDERR "Usage: subst.pl \n"; } # load neutral vectors, one per chromosome, into %vector $reference = shift @ARGV; $sister = shift @ARGV; $outgroup = shift @ARGV; open ALIAS, shift @ARGV; while () { chomp; @a = split; $alias{$a[1]} = $a[0]; $use{$a[0]} = 1; } close ALIAS; @let = ('a','t','g','c'); for ($i1=0;$i1<4;$i1++) { for ($i2=0;$i2<4;$i2++) { for ($i3=0;$i3<4;$i3++) { for ($j1=0;$j1<4;$j1++) { for ($j2=0;$j2<4;$j2++) { for ($j3=0;$j3<4;$j3++) { for ($k1=0;$k1<4;$k1++) { for ($k2=0;$k2<4;$k2++) { for ($k3=0;$k3<4;$k3++) { $freq{ $let[$i1].$let[$i2].$let[$i3]."_". $let[$j1].$let[$j2].$let[$j3]."_". $let[$k1].$let[$k2].$let[$k3] } = 0; } } } } } } } } } while (<>) { if (/^#/) {next} if (/a /) { if (++$count%1000==0) { print STDERR "."; } undef %seq; undef %useq; $abort = 0; while (<>) { chomp; if (/^$/) { last } if (/s /) { @a = split; $a[6] =~ tr/ATGC/atgc/; @enc = split(/\./,$a[1]); $sname = $enc[0]; if ($alias{$sname} eq $reference) { $chr = $enc[1]; $start = $a[2]; $len = $a[3]; $end = $start + $len; } if (defined $alias{$sname}) { @{$seq{$alias{$sname}}} = split(//,$a[6]); } } } $x = scalar keys %seq; if ($x < 3) { next } if ($abort) { next } $r = $seq{$reference}; $c = $seq{$sister}; $b = $seq{$outgroup}; $nuc = $start-1; for $pos (0..@{$r}-1) { if (${$r}[$pos] ne '-') { $nuc++; if ($pos && $pos < @{$r}-1) { if (${$r}[$pos-1] ne '-' && ${$r}[$pos+1] ne '-') { #&& $vector{$chr}->bit_test($nuc) #&& $vector{$chr}->bit_test($nuc-1) #&& $vector{$chr}->bit_test($nuc+1)) { # it's ALL good. $key = ${$r}[$pos-1].${$r}[$pos].${$r}[$pos+1]."_". ${$c}[$pos-1].${$c}[$pos].${$c}[$pos+1]."_". ${$b}[$pos-1].${$b}[$pos].${$b}[$pos+1]; if ($key !~ /-/ && $key !~ /n/) { $freq{$key}++; } } } } } } } for $key (keys %freq) { print "$key $freq{$key}\n"; }