2019-02-05 19:30:40 -08:00
|
|
|
#!/bin/bash
|
2020-02-13 12:11:29 -08:00
|
|
|
if [ "$1" == "-h" ] ; then
|
|
|
|
|
echo "
|
2021-10-19 00:34:23 -07:00
|
|
|
sdoi - search for a unique identifier (doi or pmid) on doi.org and/or pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.
|
2020-02-13 12:11:29 -08:00
|
|
|
|
|
|
|
|
usage:
|
|
|
|
|
sdoi.sh 'doi'
|
|
|
|
|
sdoi.sh 'doi' download.pdf
|
|
|
|
|
|
|
|
|
|
depends:
|
2021-09-18 15:46:10 -07:00
|
|
|
xsltproc, xmllint - xml processing programs from libxml
|
2020-02-13 12:11:29 -08:00
|
|
|
pubmed2bibtex.xsl - xml processor stylesheet
|
|
|
|
|
|
|
|
|
|
defaults:
|
|
|
|
|
Set the three required default file locations (xsl file, bib file, pdf directory)
|
|
|
|
|
"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
2019-02-05 19:30:40 -08:00
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
set -e #exit if an error
|
|
|
|
|
# set -v -x -e #debugging
|
|
|
|
|
|
2019-02-05 19:30:40 -08:00
|
|
|
#Setup defaults
|
2021-05-04 19:23:54 -07:00
|
|
|
doi=$1
|
|
|
|
|
fn=$2
|
2020-02-13 12:11:29 -08:00
|
|
|
styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
|
2023-05-15 10:49:24 -04:00
|
|
|
bibdFileOut=${bibdFileOut:-$HOME/projects/learn/bibd/OMEGA.bib}
|
|
|
|
|
pdfPathOut=${pdfPathOut:-$HOME/projects/learn/bibd/papers}
|
2019-02-05 19:30:40 -08:00
|
|
|
relPath=$(basename $pdfPathOut)
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
#define functions
|
|
|
|
|
import_bib() {
|
2021-04-08 20:52:17 -07:00
|
|
|
#decide whether to process and move an associated pdf or just exit
|
|
|
|
|
if [ -z "$fn" ]; then
|
|
|
|
|
append_bibfile
|
|
|
|
|
clean_up
|
|
|
|
|
else
|
|
|
|
|
extract_name
|
|
|
|
|
append_pdf
|
|
|
|
|
append_bibfile
|
|
|
|
|
clean_up
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
fetchBib_pubmed() {
|
2021-04-08 20:52:17 -07:00
|
|
|
#request pubmed xml and transform into bibtex
|
|
|
|
|
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
|
2023-05-15 10:49:24 -04:00
|
|
|
xsltproc --novalid $styleSheet $tmpBib.xml >> $tmpBib
|
2021-04-08 20:52:17 -07:00
|
|
|
}
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
fetchBib_doiDotOrg() {
|
2021-10-19 00:34:23 -07:00
|
|
|
#request bibtex from doi.org
|
2021-04-08 20:52:17 -07:00
|
|
|
echo "pubmed id not found, trying doi.org.."
|
2021-10-19 00:34:23 -07:00
|
|
|
curl -LH 'Accept: application/x-bibtex' "https//dx.doi.org/"$doi >> $tmpBib
|
2021-04-08 20:52:17 -07:00
|
|
|
echo -e "\n" >> $tmpBib
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
extract_name() {
|
2019-02-05 19:30:40 -08:00
|
|
|
#extract some strings to make a nice filename for the pdf
|
|
|
|
|
key="LastName";
|
2021-10-19 00:34:23 -07:00
|
|
|
author=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
|
2019-02-05 19:30:40 -08:00
|
|
|
|
|
|
|
|
key="MedlineTA";
|
2021-10-19 00:34:23 -07:00
|
|
|
journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')
|
2019-02-05 19:30:40 -08:00
|
|
|
|
2021-09-18 15:46:10 -07:00
|
|
|
key="Year";
|
|
|
|
|
year=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
|
2021-04-08 20:52:17 -07:00
|
|
|
}
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
append_bibfile() {
|
2021-04-08 20:52:17 -07:00
|
|
|
#import bibtex
|
2023-05-15 10:49:24 -04:00
|
|
|
#replace pubmed field with pmid
|
|
|
|
|
sed -i -E "s|(\W*)pubmed = |\1pmid = |" $tmpBib
|
|
|
|
|
#grep for a uid (doi) in case its already in db
|
2021-04-08 20:52:17 -07:00
|
|
|
if [[ -z $(rg $doi $bibdFileOut) ]]; then
|
|
|
|
|
echo "importing $tmpBib"
|
|
|
|
|
cat $tmpBib >> $bibdFileOut
|
|
|
|
|
else
|
|
|
|
|
echo "$doi already found in $bibdFileOut, exiting"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
append_pdf() {
|
|
|
|
|
#move pdf file to papers repository, add file name to bibtex url field
|
2021-10-19 00:34:23 -07:00
|
|
|
fn2=${author}_${journal}$year-$uid.pdf
|
|
|
|
|
echo "moving $fn to $pdfPathOut/$fn2"
|
2019-02-05 19:30:40 -08:00
|
|
|
mv $fn $pdfPathOut/$fn2
|
2021-10-19 00:34:23 -07:00
|
|
|
#insert local path to pdf into the retrieved bibtex url field
|
2021-05-04 19:23:54 -07:00
|
|
|
sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
|
2021-04-08 20:52:17 -07:00
|
|
|
}
|
2019-02-05 19:30:40 -08:00
|
|
|
|
|
|
|
|
|
2021-05-04 19:23:54 -07:00
|
|
|
clean_up() {
|
2019-02-05 19:30:40 -08:00
|
|
|
#clean up
|
2023-05-15 10:49:24 -04:00
|
|
|
rm -f $tmpBib $tmpBib.xml
|
2021-04-08 20:52:17 -07:00
|
|
|
exit 1
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-03 21:03:40 -07:00
|
|
|
|
2021-10-19 00:34:23 -07:00
|
|
|
#main function
|
|
|
|
|
##test whether the given unique identifier (doi) is an actual doi, else assume its a pmid
|
|
|
|
|
if [[ -z $(echo $doi | grep "^10." -) ]]; then
|
|
|
|
|
searchField="pmid"
|
|
|
|
|
else
|
|
|
|
|
searchField="doi"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=$searchField&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
|
2021-05-03 21:03:40 -07:00
|
|
|
|
|
|
|
|
tmpBib=$(mktemp -p ./ --suffix=.bib)
|
|
|
|
|
|
|
|
|
|
if [ -z "$uid" ]; then
|
|
|
|
|
fetchBib_doiDotOrg
|
|
|
|
|
else
|
|
|
|
|
fetchBib_pubmed
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ -s "$tmpBib" ]; then
|
|
|
|
|
import_bib
|
|
|
|
|
else
|
|
|
|
|
echo "sorry, doi not found.."
|
2023-05-15 10:49:24 -04:00
|
|
|
exit 1
|
2021-05-03 21:03:40 -07:00
|
|
|
fi
|
|
|
|
|
|