#!/usr/local/bin/perl -w # ################## MEDLINE FIELD EXTRACTION AND TABLE SUMMARY ############# # # # ALL FIELDS # # # # PROGRAM 4: File opening and processing # # # # By Mike Galsworthy # # # ############################################################################### use Getopt::Long; my ($web); GetOptions ("web:s" => \$web); my $filename = $web . ".txt"; open FILE, "<$filename" or die $!; if (!($showtop)) { $showtop = 10; #arbitrary default value } my $AU = '(nothing)'; my $MEDLINEcount = 0; my $Male = 0; my $Female = 0; my $Animals = 0; my $Humans = 0; my $InfantP = 0; my $InfantN = 0; my $Infant = 0; my $ChildP = 0; my $Child = 0; my $Adolescent = 0; my $Adult = 0; my $MiddleAged = 0; my $Aged = 0; my $Aged80over = 0; my $YearsTotal = 0; while () { chomp; if (/^QRY /) { $term = substr($_,6); } if (/^CNT /) { $Count = substr($_,6); } if (/^KP /) { $keep = substr($_,6); } if (/^STAT- /) { $STAT = substr($_,6); if ($STAT =~ /MEDLINE/) { $MEDLINEcount++; } } if (/^AU /) { $AU = substr($_,6); if ($term =~ m/($AU)/i) { $AU = "[$AU]"; } $tableAU{$AU}++; } if (/^TA /) { $TA = substr($_,6); if ($term =~ m/($TA)/i) { $TA = "[$TA]"; } $tableTA{$TA}++; } if (/^MH /) { $MH = substr($_,6); if ($MH =~ /\*/) { # Starred terms $MHstarred = $MH; $tableMHstarred{$MHstarred}++; } if ($MH =~ /^\*/) { #to take the "*" off $MH = substr($MH,1); #Major MeSH Headings } my $slash = index($MH, '/'); if ($slash > 0) { # if slash $MH = substr($MH,0,$slash) # then remove } # if ($term =~ m/($MH)/i) { # $MH = "[$MH]"; # } if ($MH eq "Male") { ############################## $Male++; # # $MH = "removed"; # # } # Male : Female # if ($MH eq "Female") { # # $Female++; # # $MH = "removed"; ############################## } if ($MH eq "Animals") { ############################## $Animals++; # # $MH = "removed"; # # } # Humans : Animals # if ($MH eq "Humans") { # # $Humans++; # # $MH = "removed"; ############################## } if ($MH eq "Infant, Premature") { $InfantP++; $MH = "removed"; } if ($MH eq "Infant, Newborn") { $InfantN++; $MH = "removed"; } if ($MH eq "Infant") { $Infant++; $MH = "removed"; } if ($MH eq "Child, Preschool") { $ChildP++; $MH = "removed"; } if ($MH eq "Child") { $Child++; $MH = "removed"; } if ($MH eq "Adolescent") { $Adolescent++; $MH = "removed"; } if ($MH eq "Adult") { $Adult++; $MH = "removed"; } if ($MH eq "Middle Aged") { $MiddleAged++; $MH = "removed"; } if ($MH eq "Aged") { $Aged++; $MH = "removed"; } if ($MH eq "Aged, 80 and over") { $Aged80over++; $MH = "removed"; } $tableMH{$MH}++; } if (/^DP /) { $DP = substr($_,6,4); # cut at 10 to leave only years $tableDP{$DP}++; } } print "There were $MEDLINEcount records with MEDLINE entries (i.e. with data for research ratios, age profiles and MeSH tables)"; print "

Here are the top $showtop Authors:

"; my @sorted_authors = sort { $tableAU{$b} <=> $tableAU{$a} || $b cmp $a } keys %tableAU; $countAU = $showtop; foreach $AU (@sorted_authors) { if ($countAU > 0) { print ''; $countAU--; } } print "
'; print $tableAU{$AU}; print ''; print $AU; print '
"; print "

Here are the top $showtop Journals:

"; $countTA = $showtop; foreach $TA (sort{$tableTA{$b} <=> $tableTA{$a}} keys %tableTA) { if ($countTA > 0) { print ''; $countTA--; } } print "
'; print $tableTA{$TA}; print ''; print $TA; print '
"; print "
Research ratios:

"; print ''; print ''; print ''; print ''; print "
Humans:Animals
'; print $Humans; print ':'; print $Animals; print '

"; print ""; print ''; print ''; print ''; print ''; print "
Male:Female
'; print $Male; print ':'; print $Female; print '
"; print "
Age profile of (Human) subject populations:

"; print ''; print ''; print ''; print ''; print ''; print ''; print ''; print ''; print ''; print ''; print "
Infant, premature: '; print $InfantP; print '
Infant, newborn: '; print $InfantN; print '
Infant: '; print $Infant; print '
Child, preschool: '; print $ChildP; print '
Child: '; print $Child; print '
Adolescent: '; print $Adolescent; print '
Adult: '; print $Adult; print '
Middle Aged: '; print $MiddleAged; print '
Aged: '; print $Aged; print '
Aged, 80 and over: '; print $Aged80over; print '
"; print "

Here are the top $showtop Major MeSH Headings:

"; delete $tableMH{"removed"}; $countMH = $showtop; foreach $MH (sort{$tableMH{$b} <=> $tableMH{$a}} keys %tableMH) { if ($countMH > 0) { print ''; $countMH--; } } print "
'; print $tableMH{$MH}; print ''; print $MH; print '
"; print "

Here are the top $showtop starred MeSH terms:

"; $countMHstarred = $showtop; foreach $MHstarred (sort{$tableMHstarred{$b} <=> $tableMHstarred{$a}} keys %tableMHstarred) { if ($countMHstarred > 0) { print ''; $countMHstarred--; } } print "
'; print $tableMHstarred{$MHstarred}; print ''; print $MHstarred; print '
"; print "

Here are the results (number of papers) sorted by year:

"; foreach $DP (reverse sort (keys (%tableDP))) { $YearsTotal = $YearsTotal + $tableDP{$DP}; print ''; } print "
'; print $tableDP{$DP}; print ''; print $DP; print '
"; my $PercentDone = ($YearsTotal/$Count) * 100; my $PercentDoneRounded = sprintf("%.1f", $PercentDone); if ($Count>$YearsTotal) { print "
NOTE: These dates tail off abruptly because I only processed "; print "the first $YearsTotal records ($PercentDoneRounded%) out of the $Count associated with that query.
"; print "Use the Timeline button to quickly break down all $Count records into their year of publication.
"; } close FILE; unlink($filename); chop ($term); chop ($Count); open FILE, ">>log.txt" or die $!; print FILE " [file complete: $term ($YearsTotal)]\r\n"; close FILE;