#!/usr/local/bin/perl -w ############################################################################### # Mike Galsworthy's "Free Text to MeSH" converter # # # # # # PROGRAM 1 : Retrieving PubMed articles in Medline format # # # # (credit to Oleg Khovayko for key Medline download parts of script) # # # # # ############################################################################### #### PART 1. SETTING COMMAND-LINE TERMS ####################################### use Getopt::Long; my ($term, $keep, $retmax); GetOptions ("query:s" => \$term, "records=i" => \$keep, "batch=i" => \$retmax); $term =~ s/~q~q~/\'/g; if (!($term)) { die "No query term given. Use the --query 'term here' option.\n"; } #### PART 2. E-SEARCH ON THE QUERY TERM (USING "HISTORY") ##################### use LWP::Simple; $base = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?". "db=Pubmed". "&". "retmax=100". "&". "usehistory=y". "&". "term="; $signature = '&tool=medsum&email=mike_galsworthy@yahoo.co.uk'; $esearch = get($base . $term . $signature); $esearch =~ m|(\d+).*(\d+).*(\S+)|s; my $Count = $1; my $QueryKey = $2; my $WebEnv = $3; print "QRY - $term\n"; # For info at end of output print "CNT - $Count\n"; # For info at end of output print "KP - $keep\n"; # For info at end of output #### PART 3. E-SEARCH ON THE QUERY TERM (USING "HISTORY") ##################### if (!($keep)) {$keep = 1000;} # default value for records to keep if ($Count < $keep) {$keep = $Count;} # in case keeping more than query count if (!($retmax)) {$retmax = 500;} # default batch value (recommended max) if ($keep < $retmax) {$retmax = $keep;} # in case batch larger than records ####### LOOP to write data into file in batches (of 300 max) ################## # my $retstart; # for($retstart = 0; $retstart < $keep; $retstart += $retmax) { # my $efetch = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?". # "rettype=medline". "&". # "retmode=text". "&". # "retstart=$retstart". "&". # "retmax=$retmax". "&". # "db=Pubmed". "&". # "query_key=$QueryKey". "&". # "WebEnv=$WebEnv". "$signature"; # # my $efetch_result = get($efetch); # # print "$efetch_result\n"; # if ($retstart > 0) {sleep 3;} #3 second time delay between batch calls # } # ########################## END OF LOOP ######################################## print "KP - $keep\n"; # For info at end of output open FILE, ">>log.txt" or die $!; print FILE "MI - $term ($Count) \r\n"; close FILE;