#!/usr/local/bin/perl -w
###############################################################################
# Mike Galsworthy's "Free Text to MeSH" converter #
# #
# #
# PROGRAM 1 : Retrieving PubMed articles in Medline format #
# #
# (credit to Oleg Khovayko for key Medline download parts of script) #
# #
# #
###############################################################################
#### PART 1. SETTING COMMAND-LINE TERMS #######################################
use Getopt::Long;
my ($term, $keep, $retmax);
GetOptions ("query:s" => \$term, "records=i" => \$keep, "batch=i" => \$retmax);
$term =~ s/~q~q~/\'/g;
if (!($term)) {
die "No query term given. Use the --query 'term here' option.\n";
}
#### PART 2. E-SEARCH ON THE QUERY TERM (USING "HISTORY") #####################
use LWP::Simple;
$base = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?".
"db=Pubmed". "&".
"retmax=100". "&".
"usehistory=y". "&".
"term=";
$signature = '&tool=medsum&email=mike_galsworthy@yahoo.co.uk';
$esearch = get($base . $term . $signature);
$esearch =~
m|(\d+).*(\d+).*(\S+)|s;
my $Count = $1;
my $QueryKey = $2;
my $WebEnv = $3;
print "QRY - $term\n"; # For info at end of output
print "CNT - $Count\n"; # For info at end of output
print "KP - $keep\n"; # For info at end of output
#### PART 3. E-SEARCH ON THE QUERY TERM (USING "HISTORY") #####################
if (!($keep)) {$keep = 1000;} # default value for records to keep
if ($Count < $keep) {$keep = $Count;} # in case keeping more than query count
if (!($retmax)) {$retmax = 500;} # default batch value (recommended max)
if ($keep < $retmax) {$retmax = $keep;} # in case batch larger than records
####### LOOP to write data into file in batches (of 300 max) ##################
#
my $retstart; #
for($retstart = 0; $retstart < $keep; $retstart += $retmax) { #
my $efetch = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?". #
"rettype=medline". "&". #
"retmode=text". "&". #
"retstart=$retstart". "&". #
"retmax=$retmax". "&". #
"db=Pubmed". "&". #
"query_key=$QueryKey". "&". #
"WebEnv=$WebEnv". "$signature"; #
#
my $efetch_result = get($efetch); #
#
print "$efetch_result\n"; #
if ($retstart > 0) {sleep 3;} #3 second time delay between batch calls #
} #
########################## END OF LOOP ########################################
print "KP - $keep\n"; # For info at end of output
open FILE, ">>log.txt" or die $!;
print FILE "MI - $term ($Count) \r\n";
close FILE;