Difference between revisions of "Exercise 5.5"
Jump to navigation
Jump to search
(New page: Return to Week 2 <br> Exercise 5.5 in <i>Beginning Perl for Bioinformatics</i></b> <pre> </pre>) |
|||
Line 4: | Line 4: | ||
<pre> | <pre> | ||
+ | #!/usr/bin/perl | ||
+ | use warnings; | ||
+ | use strict; | ||
+ | #Erika Phelps | ||
+ | #28 Sept 2009 | ||
+ | #Exercise 5.5 | ||
+ | |||
+ | #Pseudocode: | ||
+ | |||
+ | #Open a file, read it into a string, and close it again. | ||
+ | #For each amino acid, check whether hydrophobic. | ||
+ | #Keep a count of whether hydrophobic is true or false. | ||
+ | #Calculate percentage of hydrophobic out of all a.a's read. | ||
+ | #Exit program. | ||
+ | |||
+ | #Define variables | ||
+ | |||
+ | my $protein_filename = 0; | ||
+ | my $protein_sequence = 0; | ||
+ | my $count_hydrophobic = 0; | ||
+ | my $count_other = 0; | ||
+ | my $percentage_hydrophobic = 0; | ||
+ | my @protein_string = 0; | ||
+ | |||
+ | |||
+ | #Get the polypeptide information | ||
+ | |||
+ | print "Please type the filename of the protein data: "; | ||
+ | |||
+ | $protein_filename = <STDIN>; | ||
+ | |||
+ | chomp $protein_filename; | ||
+ | |||
+ | #Check to see if file exists. | ||
+ | |||
+ | unless ( -e $protein_filename) { | ||
+ | |||
+ | print "File \"$protein_filename \" does not seem to exist.\n"; | ||
+ | exit; | ||
+ | } | ||
+ | |||
+ | #Check to see if file can be opened. | ||
+ | |||
+ | unless ( open(PROTEINFILE, $protein_filename) ) { | ||
+ | |||
+ | print "Cannot open file \"$protein_filename\"\n\n"; | ||
+ | exit; | ||
+ | } | ||
+ | |||
+ | @protein_string = <PROTEINFILE>; | ||
+ | |||
+ | close PROTEINFILE; | ||
+ | |||
+ | $protein_sequence = join( '', @protein_string); | ||
+ | |||
+ | #Remove whitespace | ||
+ | $protein_sequence =~ s/\s//g; | ||
+ | |||
+ | #In a loop, check each amino acid to determine whether hydrophobic or other | ||
+ | |||
+ | while ($protein_sequence =~ /A/ig) {$count_hydrophobic++} | ||
+ | while ($protein_sequence =~ /I/ig) {$count_hydrophobic++} | ||
+ | while ($protein_sequence =~ /L/ig) {$count_hydrophobic++} | ||
+ | while ($protein_sequence =~ /V/ig) {$count_hydrophobic++} | ||
+ | while ($protein_sequence =~ /[^AILV]/ig) {$count_other++} | ||
+ | |||
+ | #Calculate the percentage of hydrophobic a.a's in the chain | ||
+ | |||
+ | $percentage_hydrophobic = ($count_hydrophobic/$count_other*100); | ||
+ | |||
+ | #Print out the results | ||
+ | |||
+ | print "In the file \"$protein_filename\", $percentage_hydrophobic % are hydrophobic.\n"; | ||
+ | |||
+ | exit; | ||
+ | |||
+ | #Could easily make more specific by reading AILV separately then adding | ||
+ | #together for final calc, so that we can print out how many of each | ||
+ | #specific hydrophobic a.a. there are. | ||
</pre> | </pre> |
Revision as of 20:38, 28 September 2009
Return to Week 2
Exercise 5.5 in Beginning Perl for Bioinformatics
#!/usr/bin/perl use warnings; use strict; #Erika Phelps #28 Sept 2009 #Exercise 5.5 #Pseudocode: #Open a file, read it into a string, and close it again. #For each amino acid, check whether hydrophobic. #Keep a count of whether hydrophobic is true or false. #Calculate percentage of hydrophobic out of all a.a's read. #Exit program. #Define variables my $protein_filename = 0; my $protein_sequence = 0; my $count_hydrophobic = 0; my $count_other = 0; my $percentage_hydrophobic = 0; my @protein_string = 0; #Get the polypeptide information print "Please type the filename of the protein data: "; $protein_filename = <STDIN>; chomp $protein_filename; #Check to see if file exists. unless ( -e $protein_filename) { print "File \"$protein_filename \" does not seem to exist.\n"; exit; } #Check to see if file can be opened. unless ( open(PROTEINFILE, $protein_filename) ) { print "Cannot open file \"$protein_filename\"\n\n"; exit; } @protein_string = <PROTEINFILE>; close PROTEINFILE; $protein_sequence = join( '', @protein_string); #Remove whitespace $protein_sequence =~ s/\s//g; #In a loop, check each amino acid to determine whether hydrophobic or other while ($protein_sequence =~ /A/ig) {$count_hydrophobic++} while ($protein_sequence =~ /I/ig) {$count_hydrophobic++} while ($protein_sequence =~ /L/ig) {$count_hydrophobic++} while ($protein_sequence =~ /V/ig) {$count_hydrophobic++} while ($protein_sequence =~ /[^AILV]/ig) {$count_other++} #Calculate the percentage of hydrophobic a.a's in the chain $percentage_hydrophobic = ($count_hydrophobic/$count_other*100); #Print out the results print "In the file \"$protein_filename\", $percentage_hydrophobic % are hydrophobic.\n"; exit; #Could easily make more specific by reading AILV separately then adding #together for final calc, so that we can print out how many of each #specific hydrophobic a.a. there are.