simplify sbib, sdoi

2021-05-04 19:23:54 -07:00
parent 29956101f2
commit 61cc931e94
7 changed files with 133 additions and 156 deletions
--- a/spmid.sh
+++ b/spmid.sh
@@ -7,61 +7,101 @@ if [ "$1" == "-h" ] ; then
          spmid.sh '12345678' download.pdf

         depends: 
-          xsltproc - xml processor, from GNOME project
-          pubmed2bibtex.xsl - xml processor stylesheet
-
-         defaults:
-          Set the three required default file locations (xsl file, bib file, pdf directory)
+          sdoi.sh
 
    "
    exit 0
 fi

+#TODO: deprecate this function, add pmid logic into sdoi
+
+set -e #exit if an error
+# set -v -x -e #debugging
+
 #Setup defaults
+uid=$1
+fn=$2
 styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
 bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
 pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
 relPath=$(basename $pdfPathOut)
-uid=$1
-fn=$2

-set -e #exit if an error
+#define functions
+import_bib() {
+  #decide whether to process and move an associated pdf or just exit
+  if [ -z "$fn" ]; then
+    append_bibfile
+    clean_up
+  else
+    extract_name
+    append_pdf
+    append_bibfile
+    clean_up
+  fi
+}

-#request pubmed xml and transform into bibtex
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
-xsltproc --novalid $styleSheet $uid.xml > $uid.bib
+fetchBib_pubmed() {
+  #request pubmed xml and transform into bibtex
+  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
+  xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
+}

-#decide whether to process and move an associated pdf or just exit
-if [ -z "$fn" ]; then
-  #clean up
-  rm $uid.xml $uid.bib
-  exit 1
-else
+fetchBib_doiDotOrg() {
+  echo "pubmed id not found, trying doi.org.."
+  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
+  echo -e "\n" >> $tmpBib
+}
+
+extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key="MedlineTA"; 
-  journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key1="PubDate"; 
-  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")

+}
+
+append_bibfile() {
+  #import bibtex
+  #first grep for a uid (doi) in case its already in db
+  if [[ -z $(rg $doi $bibdFileOut) ]]; then
+    echo "importing $tmpBib"
+    cat $tmpBib >> $bibdFileOut
+  else
+    echo "$doi already found in $bibdFileOut, exiting"
+  fi
+}
+
+
+append_pdf() {
  fn2=${author}_${journal}$year-$uid.pdf
-
-  #move pdf file to papers repository, add file name to bibtex file field
+  #move pdf file to papers repository, add file name to bibtex url field
  mv $fn $pdfPathOut/$fn2
  echo "moved to $pdfPathOut/$fn2"
-  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib
+  sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
+}

-  if [[ -z $(rg $uid $bibdFileOut) ]]; then
-    #import bibtex
-    echo "importing $uid.bib"
-    cat $uid.bib >> $bibdFileOut
-  else
-    echo "$uid already found in $bibdFileOut, exiting"
-  fi
-  
+
+clean_up() {
  #clean up
-  rm $uid.xml $uid.bib
+  rm -f $tmpBib $tmpBib.xml
+  exit 1
+}
+
+
+#main
+tmpBib=$(mktemp -p ./ --suffix=.bib)
+
+fetchBib_pubmed
+
+if [ -s "$tmpBib" ]; then
+  import_bib
+else
+  echo "sorry, doi not found.."
+  clean_up
 fi
+