Exercise 5.5

From Earlham CS Department
Revision as of 21:24, 28 September 2009 by Erika (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Return to Week 2
Exercise 5.5 in Beginning Perl for Bioinformatics

use warnings;
use strict;

#Erika Phelps
#28 Sept 2009
#Exercise 5.5


#Open a file, read it into a string, and close it again.
#For each amino acid, check whether hydrophobic.
#Keep a count of whether hydrophobic is true or false.
#Calculate percentage of hydrophobic out of all a.a's read.
#Exit program.

#Define variables

my $protein_filename = 0;
my $protein_sequence = 0;
my $count_hydrophobic = 0;
my $count_other = 0;
my $percentage_hydrophobic = 0;
my @protein_string = 0;

#Get the polypeptide information

print "Please type the filename of the protein data: ";

$protein_filename = <STDIN>;

chomp $protein_filename;

#Check to see if file exists.

unless ( -e $protein_filename) {
    print "File \"$protein_filename \" does not seem to exist.\n";

#Check to see if file can be opened.

unless ( open(PROTEINFILE, $protein_filename) ) {

    print "Cannot open file \"$protein_filename\"\n\n";

@protein_string = <PROTEINFILE>;


$protein_sequence = join( '', @protein_string);

#Remove whitespace
$protein_sequence =~ s/\s//g;

#In a loop, check each amino acid to determine whether hydrophobic or other

while ($protein_sequence =~ /A/ig) {$count_hydrophobic++}
while ($protein_sequence =~ /I/ig) {$count_hydrophobic++}
while ($protein_sequence =~ /L/ig) {$count_hydrophobic++}
while ($protein_sequence =~ /V/ig) {$count_hydrophobic++}
while ($protein_sequence =~ /[^AILV]/ig) {$count_other++}

#Calculate the percentage of hydrophobic a.a's in the chain
# $rounded = sprintf("%.3f", $number);

$percentage_hydrophobic = sprintf("%.1f",$count_hydrophobic/$count_other*100);

#Print out the results

print "In the file \"$protein_filename\", $percentage_hydrophobic % are hydrophobic.\n";


#Could easily make more specific by reading AILV separately then adding
#together for final calc, so that we can print out how many of each
#specific hydrophobic a.a. there are.