sdoi.sh

#!/bin/bash
if [ "$1" == "-h" ] ; then
    echo "
    sdoi - search for a unique identifier (doi or pmid) on doi.org and/or pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.

         usage:
          sdoi.sh 'doi'
          sdoi.sh 'doi' download.pdf

         depends: 
          xsltproc, xmllint - xml processing programs from libxml
          pubmed2bibtex.xsl - xml processor stylesheet

         defaults:
          Set the three required default file locations (xsl file, bib file, pdf directory)
          "
    exit 0
fi 

set -e #exit if an error
# set -v -x -e #debugging

#Setup defaults
doi=$1
fn=$2
styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
bibdFileOut=${bibdFileOut:-$HOME/projects/learn/bibd/OMEGA.bib}
pdfPathOut=${pdfPathOut:-$HOME/projects/learn/bibd/papers}
relPath=$(basename $pdfPathOut)

#define functions
import_bib() {
  #decide whether to process and move an associated pdf or just exit
  if [ -z "$fn" ]; then
    append_bibfile
    clean_up
  else
    extract_name
    append_pdf
    append_bibfile
    clean_up
  fi
}

fetchBib_pubmed() {
  #request pubmed xml and transform into bibtex
  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
  xsltproc --novalid $styleSheet $tmpBib.xml >> $tmpBib
}

fetchBib_doiDotOrg() {
  #request bibtex from doi.org
  echo "pubmed id not found, trying doi.org.."
  curl -LH 'Accept: application/x-bibtex' "https//dx.doi.org/"$doi >> $tmpBib
  echo -e "\n" >> $tmpBib
}

extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
  author=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')

  key="MedlineTA"; 
  journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml | tr -d ' ')

  key="Year";
  year=$(xmllint --xpath "string(//$key)" $tmpBib.xml)
}

append_bibfile() {
  #import bibtex
  #replace pubmed field with pmid
  sed -i -E "s|(\W*)pubmed = |\1pmid = |" $tmpBib
  #grep for a uid (doi) in case its already in db
  if [[ -z $(rg $doi $bibdFileOut) ]]; then
    echo "importing $tmpBib"
    cat $tmpBib >> $bibdFileOut
  else
    echo "$doi already found in $bibdFileOut, exiting"
  fi
}


append_pdf() {
  #move pdf file to papers repository, add file name to bibtex url field
  fn2=${author}_${journal}$year-$uid.pdf
  echo "moving $fn to $pdfPathOut/$fn2"
  mv $fn $pdfPathOut/$fn2
  #insert local path to pdf into the retrieved bibtex url field
  sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
}


clean_up() {
  #clean up
  rm -f $tmpBib $tmpBib.xml
  exit 1
}


#main function
##test whether the given unique identifier (doi) is an actual doi, else assume its a pmid 
if [[ -z $(echo $doi | grep "^10." -) ]]; then
  searchField="pmid"
else
  searchField="doi"
fi

uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=$searchField&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")

tmpBib=$(mktemp -p ./ --suffix=.bib)

if [ -z "$uid" ]; then
  fetchBib_doiDotOrg
else
  fetchBib_pubmed
fi

if [ -s "$tmpBib" ]; then
  import_bib
else
  echo "sorry, doi not found.."
  exit 1
fi
init lab bin 2019-02-05 19:30:40 -08:00			`#!/bin/bash`
helpFound 2020-02-13 12:11:29 -08:00			`if [ "$1" == "-h" ] ; then`
			`echo "`
dedupe fixes 2021-10-19 00:34:23 -07:00			`sdoi - search for a unique identifier (doi or pmid) on doi.org and/or pubmed and append bibtex entry to bibtex db. Optionally import a downloaded pdf.`
helpFound 2020-02-13 12:11:29 -08:00
			`usage:`
			`sdoi.sh 'doi'`
			`sdoi.sh 'doi' download.pdf`

			`depends:`
camping summer 2021-09-18 15:46:10 -07:00			`xsltproc, xmllint - xml processing programs from libxml`
helpFound 2020-02-13 12:11:29 -08:00			`pubmed2bibtex.xsl - xml processor stylesheet`

			`defaults:`
			`Set the three required default file locations (xsl file, bib file, pdf directory)`
			`"`
			`exit 0`
			`fi`
init lab bin 2019-02-05 19:30:40 -08:00
simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`set -e #exit if an error`
			`# set -v -x -e #debugging`

init lab bin 2019-02-05 19:30:40 -08:00			`#Setup defaults`
simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`doi=$1`
			`fn=$2`
helpFound 2020-02-13 12:11:29 -08:00			`styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}`
sbib improve and rg bool 2023-05-15 10:49:24 -04:00			`bibdFileOut=${bibdFileOut:-$HOME/projects/learn/bibd/OMEGA.bib}`
			`pdfPathOut=${pdfPathOut:-$HOME/projects/learn/bibd/papers}`
init lab bin 2019-02-05 19:30:40 -08:00			`relPath=$(basename $pdfPathOut)`

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`#define functions`
			`import_bib() {`
spring library 2021-04-08 20:52:17 -07:00			`#decide whether to process and move an associated pdf or just exit`
			`if [ -z "$fn" ]; then`
			`append_bibfile`
			`clean_up`
			`else`
			`extract_name`
			`append_pdf`
			`append_bibfile`
			`clean_up`
			`fi`
			`}`

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`fetchBib_pubmed() {`
spring library 2021-04-08 20:52:17 -07:00			`#request pubmed xml and transform into bibtex`
			`curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml`
sbib improve and rg bool 2023-05-15 10:49:24 -04:00			`xsltproc --novalid $styleSheet $tmpBib.xml >> $tmpBib`
spring library 2021-04-08 20:52:17 -07:00			`}`

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`fetchBib_doiDotOrg() {`
dedupe fixes 2021-10-19 00:34:23 -07:00			`#request bibtex from doi.org`
spring library 2021-04-08 20:52:17 -07:00			`echo "pubmed id not found, trying doi.org.."`
dedupe fixes 2021-10-19 00:34:23 -07:00			`curl -LH 'Accept: application/x-bibtex' "https//dx.doi.org/"$doi >> $tmpBib`
spring library 2021-04-08 20:52:17 -07:00			`echo -e "\n" >> $tmpBib`
			`}`

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`extract_name() {`
init lab bin 2019-02-05 19:30:40 -08:00			`#extract some strings to make a nice filename for the pdf`
			`key="LastName";`
dedupe fixes 2021-10-19 00:34:23 -07:00			`author=$(xmllint --xpath "string(//$key)" $tmpBib.xml \| tr -d ' ')`
init lab bin 2019-02-05 19:30:40 -08:00
			`key="MedlineTA";`
dedupe fixes 2021-10-19 00:34:23 -07:00			`journal=$(xmllint --xpath "string(//$key)" $tmpBib.xml \| tr -d ' ')`
init lab bin 2019-02-05 19:30:40 -08:00
camping summer 2021-09-18 15:46:10 -07:00			`key="Year";`
			`year=$(xmllint --xpath "string(//$key)" $tmpBib.xml)`
spring library 2021-04-08 20:52:17 -07:00			`}`

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`append_bibfile() {`
spring library 2021-04-08 20:52:17 -07:00			`#import bibtex`
sbib improve and rg bool 2023-05-15 10:49:24 -04:00			`#replace pubmed field with pmid`
			`sed -i -E "s\|(\W*)pubmed = \|\1pmid = \|" $tmpBib`
			`#grep for a uid (doi) in case its already in db`
spring library 2021-04-08 20:52:17 -07:00			`if [[ -z $(rg $doi $bibdFileOut) ]]; then`
			`echo "importing $tmpBib"`
			`cat $tmpBib >> $bibdFileOut`
			`else`
			`echo "$doi already found in $bibdFileOut, exiting"`
			`fi`
			`}`


simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`append_pdf() {`
			`#move pdf file to papers repository, add file name to bibtex url field`
dedupe fixes 2021-10-19 00:34:23 -07:00			`fn2=${author}_${journal}$year-$uid.pdf`
			`echo "moving $fn to $pdfPathOut/$fn2"`
init lab bin 2019-02-05 19:30:40 -08:00			`mv $fn $pdfPathOut/$fn2`
dedupe fixes 2021-10-19 00:34:23 -07:00			`#insert local path to pdf into the retrieved bibtex url field`
simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`sed -i -E "s\|(\Wurl = \{).(\}.*)\|\1$relPath/$fn2\2\|" $tmpBib`
spring library 2021-04-08 20:52:17 -07:00			`}`
init lab bin 2019-02-05 19:30:40 -08:00

simplify sbib, sdoi 2021-05-04 19:23:54 -07:00			`clean_up() {`
init lab bin 2019-02-05 19:30:40 -08:00			`#clean up`
sbib improve and rg bool 2023-05-15 10:49:24 -04:00			`rm -f $tmpBib $tmpBib.xml`
spring library 2021-04-08 20:52:17 -07:00			`exit 1`
			`}`

spring work 2021-05-03 21:03:40 -07:00
dedupe fixes 2021-10-19 00:34:23 -07:00			`#main function`
			`##test whether the given unique identifier (doi) is an actual doi, else assume its a pmid`
			`if [[ -z $(echo $doi \| grep "^10." -) ]]; then`
			`searchField="pmid"`
			`else`
			`searchField="doi"`
			`fi`

			`uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=$searchField&retmode=xml" \| grep -E "<Id>[0-9]+</Id>" \| sed -E "s\|<Id>([0-9]+)</Id>\|\1\|")`
spring work 2021-05-03 21:03:40 -07:00
			`tmpBib=$(mktemp -p ./ --suffix=.bib)`

			`if [ -z "$uid" ]; then`
			`fetchBib_doiDotOrg`
			`else`
			`fetchBib_pubmed`
			`fi`

			`if [ -s "$tmpBib" ]; then`
			`import_bib`
			`else`
			`echo "sorry, doi not found.."`
sbib improve and rg bool 2023-05-15 10:49:24 -04:00			`exit 1`
spring work 2021-05-03 21:03:40 -07:00			`fi`