Hi all!
I have two separate codes and want to integrate
them into one.
(1)1st code is to use a short sequence of width as 8 or 16
as a template to generate 100 short sequences of the same
width and store them in a txt file A.
(2)2nd code is to read 100 short sequences from the txt file A and 100 long sequences of width 200 from a txt file B and then replaced a substring of each long sequence using each short sequence. This code will lead to two txt files C and D. File C will hold 100 replaced long sequences while D will hold the starting sites of each replacement.
Since I need to generate 200 file C or more, that will be tedious for me to input the short template sequence each time mannually.
I hope to integrate these two codes as one to handle a file E which holds 200 short template sequences. And the output should be 200 file C.
Can anybody suggest me how to do this? I am appending my two codes here.
Thank buddies,
1st code
*******************************************************
#!/usr/bin/perl
use strict;
use warnings;
my $N_Sequences = 100;
my @Motif = split(//,'TTTATAAT'); # This is the template short sequence
my @Alphabet = split(//,'ACGT');
my $P_Consensus = 0.85; # This is the probability of dominant letter
# ====== Globals ==========================
my @Probabilities; # Stores the probability of each character
# ====== Program ==========================
open (OUT_NORM, ">short_sequences16_1.txt") or die "Unable to open file :$!";
for (my $i=0; $i < $N_Sequences; $i++) {
for (my $j=0; $j < scalar(@Motif); $j++) {
loadConsensusCharacter($Motif[$j]);
addNoiseToDistribution();
convertToIntervals();
print OUT_NORM (getRandomCharacter(rand(1.0)));
}
print OUT_NORM "\n";
}
exit();
# ====== Subroutines =======================
#
sub loadConsensusCharacter {
my ($char) = @_;
my $Found = 'FALSE';
for (my $i=0; $i < scalar(@Alphabet); $i++) {
if ( $char eq $Alphabet[$i]) {
$Probabilities[$i] = 1.0;
$Found = 'TRUE';
} else {
$Probabilities[$i] = 0.0;
}
}
if ($Found eq 'FALSE') {
die("Panic: Motif-Character\"$char\" was not found in Alphabet.
Aborting.\n");
}
return();
}
# ==========================================
sub addNoiseToDistribution {
my $P_NonConsensus = ( 1.0-$P_Consensus) / (scalar(@Alphabet) - 1);
for (my $i=0; $i < scalar(@Probabilities); $i++) {
if ( $Probabilities[$i] == 1.0 ) {
$Probabilities[$i] = $P_Consensus;
} else {
$Probabilities[$i] = $P_NonConsensus;
}
}
return();
}
# ==========================================
sub convertToIntervals {
my $Sum = 0;
for (my $i=1; $i < scalar(@Probabilities); $i++) {
$Probabilities[$i] += $Probabilities[$i-1];
}
return();
}
# ==========================================
sub getRandomCharacter {
my ($RandomNumber) = @_;
my $i=0;
for ($i=0; $i < scalar(@Probabilities); $i++) {
if ($Probabilities[$i] > $RandomNumber) { last; }
}
return($Alphabet[$i]);
}
*******************************************************
2nd code
********************************************************
#!/usr/bin/perl
use strict;
use warnings;
my (@short, @long,$x,$r, $output_norm);
open (SHORT, "< short_sequences16_1.txt");
chomp (@short = <SHORT>);
close SHORT;
open (LONG, "< long_sequences.txt");
chomp (@long = <LONG>);
close LONG;
open (OUT_INITIAL, "> output_1.txt");
open (OUT_REPLACED, "> output_2.txt");
for ($x=0; $x<=$#short; $x++) {
$r=int(rand(length ($long[$x]) - length ($short[$x]) + 1));
print OUT_INITIAL ">SeqName$x\n$long[$x]\n";
print OUT_REPLACED "SeqName$x\n" . $r. "\n";
}
close OUT_INITIAL;
close OUT_REPLACED;
********************************************************
I have two separate codes and want to integrate
them into one.
(1)1st code is to use a short sequence of width as 8 or 16
as a template to generate 100 short sequences of the same
width and store them in a txt file A.
(2)2nd code is to read 100 short sequences from the txt file A and 100 long sequences of width 200 from a txt file B and then replaced a substring of each long sequence using each short sequence. This code will lead to two txt files C and D. File C will hold 100 replaced long sequences while D will hold the starting sites of each replacement.
Since I need to generate 200 file C or more, that will be tedious for me to input the short template sequence each time mannually.
I hope to integrate these two codes as one to handle a file E which holds 200 short template sequences. And the output should be 200 file C.
Can anybody suggest me how to do this? I am appending my two codes here.
Thank buddies,
1st code
*******************************************************
#!/usr/bin/perl
use strict;
use warnings;
my $N_Sequences = 100;
my @Motif = split(//,'TTTATAAT'); # This is the template short sequence
my @Alphabet = split(//,'ACGT');
my $P_Consensus = 0.85; # This is the probability of dominant letter
# ====== Globals ==========================
my @Probabilities; # Stores the probability of each character
# ====== Program ==========================
open (OUT_NORM, ">short_sequences16_1.txt") or die "Unable to open file :$!";
for (my $i=0; $i < $N_Sequences; $i++) {
for (my $j=0; $j < scalar(@Motif); $j++) {
loadConsensusCharacter($Motif[$j]);
addNoiseToDistribution();
convertToIntervals();
print OUT_NORM (getRandomCharacter(rand(1.0)));
}
print OUT_NORM "\n";
}
exit();
# ====== Subroutines =======================
#
sub loadConsensusCharacter {
my ($char) = @_;
my $Found = 'FALSE';
for (my $i=0; $i < scalar(@Alphabet); $i++) {
if ( $char eq $Alphabet[$i]) {
$Probabilities[$i] = 1.0;
$Found = 'TRUE';
} else {
$Probabilities[$i] = 0.0;
}
}
if ($Found eq 'FALSE') {
die("Panic: Motif-Character\"$char\" was not found in Alphabet.
Aborting.\n");
}
return();
}
# ==========================================
sub addNoiseToDistribution {
my $P_NonConsensus = ( 1.0-$P_Consensus) / (scalar(@Alphabet) - 1);
for (my $i=0; $i < scalar(@Probabilities); $i++) {
if ( $Probabilities[$i] == 1.0 ) {
$Probabilities[$i] = $P_Consensus;
} else {
$Probabilities[$i] = $P_NonConsensus;
}
}
return();
}
# ==========================================
sub convertToIntervals {
my $Sum = 0;
for (my $i=1; $i < scalar(@Probabilities); $i++) {
$Probabilities[$i] += $Probabilities[$i-1];
}
return();
}
# ==========================================
sub getRandomCharacter {
my ($RandomNumber) = @_;
my $i=0;
for ($i=0; $i < scalar(@Probabilities); $i++) {
if ($Probabilities[$i] > $RandomNumber) { last; }
}
return($Alphabet[$i]);
}
*******************************************************
2nd code
********************************************************
#!/usr/bin/perl
use strict;
use warnings;
my (@short, @long,$x,$r, $output_norm);
open (SHORT, "< short_sequences16_1.txt");
chomp (@short = <SHORT>);
close SHORT;
open (LONG, "< long_sequences.txt");
chomp (@long = <LONG>);
close LONG;
open (OUT_INITIAL, "> output_1.txt");
open (OUT_REPLACED, "> output_2.txt");
for ($x=0; $x<=$#short; $x++) {
$r=int(rand(length ($long[$x]) - length ($short[$x]) + 1));
print OUT_INITIAL ">SeqName$x\n$long[$x]\n";
print OUT_REPLACED "SeqName$x\n" . $r. "\n";
}
close OUT_INITIAL;
close OUT_REPLACED;
********************************************************