simplify sbib, sdoi

2021-05-04 19:23:54 -07:00
parent 29956101f2
commit 61cc931e94
7 changed files with 133 additions and 156 deletions
--- a/30
+++ b/30
@@ -1,30 +0,0 @@
-#!/bin/bash
-if [ "$1" == "-h" ] ; then
-    echo "
-    bibview - search for citekey in a bibjson.json and preview pdf
-         usage:
-          bibview
-
-         depends: 
-          fzf
-          zathura (or other fast pdf viewer)
-
-         defaults:
-          Set the required default file locations (csl file, bib file)
- 
-    "
-    exit 0
-fi
-#Setup defaults
-bibdFile=${2:-$HOME/projects/bibd/OMEGA.json}
-#pandoc-citeproc --bib2json ~/projects/bibd/OMEGA.bib > OMEGA.json
-cd $(dirname $bibdFile)
-set -e #exit if an error
-
-# export citeKey=$1
-# doiStr=$(jq -r '.[] | select(.id==env.citeKey).DOI' $bibdFile)
-# urlStr=$(jq -r '.[] | select(.id==env.citeKey).URL' $bibdFile)
-
-#actually this is the good one, opens pdfs quickly
-jq -r '.[] | [.id, .title, .abstract, .keyword, .DOI, .PMID, .author[]?.family, .issued[]?[0]?[0], .["container-title"], .URL] | join("  ")' $bibdFile | fzf --preview 'echo {}' --preview-window=:up:70%:wrap --bind "enter:execute-silent(zathura {-1} &)"
-
--- a/8
+++ b/8
@@ -15,19 +15,13 @@ if [ "$1" == "-h" ] ; then
    exit 0
 fi

-#Setup defaults
-# cslFile=${2:-$HOME/projects/bibd/bibd-md.csl}
-# bibdFile=${3:-$HOME/projects/bibd/OMEGA.bib}
-# cd $(dirname $bibdFile)
 set -e #exit if an error
-inFlag=${1:-"-i --files"}
-

 if [ "$1" == "p" ]; then

    ls *.pdf | fzf --preview 'pdftotext -l 2 -nopgbrk -q {1} -' \
        --preview-window=up:70% --bind "enter:execute-silent(zathura {} &)" \
-        --bind "ctrl-s:execute(pdf2bib.sh {})+reload(ls *.pdf)"
+        --bind "ctrl-s:execute(spdf.sh {})+reload(ls *.pdf)"
    exit 1
 fi

--- a/41
+++ b/41
@@ -1,11 +1,14 @@
 #!/bin/bash
 if [ "$1" == "-h" ] ; then
    echo "
-    sbib - search for citekey in a bibtex.bib (or maybe eventually bibjson.json) file and return a bibliography entry in markdown or possibly other format
+    sbib - search for citekey in a bibtex.bib or bibjson.json file, preview pdf, or return a bibliography entry in markdown or other format
+
         usage:
          sbib
          # use 'a' flag to search all 
          sbib a
+          # use 'j' flag for bibjson quick search and preview
+          sbib j
          # pass a custom citation style and database
          sbib a citeprocStyle.csl bibdFile.json

@@ -16,6 +19,7 @@ if [ "$1" == "-h" ] ; then
          bat or less
          pandoc
          pandoc-citeproc
+          zathura (or other fast pdf viewer)
          echo
          wl-copy

@@ -29,16 +33,12 @@ fi
 #Setup defaults
 cslFile=${2:-$HOME/projects/bibd/bibd-md.csl}
 bibdFile=${3:-$HOME/projects/bibd/OMEGA.bib}
+# bibdFile=${2:-$HOME/projects/bibd/OMEGA.json}
 cd $(dirname $bibdFile)
 set -e #exit if an error

-#set pattern to the null character '\0' for search all, else search only cite keys
-if [ "$1" == "a" ]; then
-    sPattern='\0'
-else
-    sPattern="@[a-zA-Z_-]+\{"
-fi

+view_bib() {
 # Use fzf to search citation
 # Initial scroll offset is set to the line number of each line
 # of git grep output *minus* 5 lines (-5)
@@ -70,3 +70,30 @@ rm $tmpName
 # fzf --preview="file {}" --preview-window=down:1
 echo $outCitation
 echo $outCitation | wl-copy
+}
+
+
+view_json() {
+# export citeKey=$1
+# doiStr=$(jq -r '.[] | select(.id==env.citeKey).DOI' $bibdFile)
+# urlStr=$(jq -r '.[] | select(.id==env.citeKey).URL' $bibdFile)
+
+#actually this is the good one, opens pdfs quickly
+jq -r '.[] | [.id, .title, .abstract, .keyword, .DOI, .PMID, .author[]?.family, .issued[]?[0]?[0], .["container-title"], .URL] | join("  ")' $bibdFile | fzf --preview 'echo {}' --preview-window=:up:70%:wrap --bind "enter:execute-silent(zathura {-1} &)"
+}
+
+
+
+#set pattern to the null character '\0' for search all, else search only cite keys
+if [ "$1" == "a" ]; then
+    sPattern='\0'
+else
+    sPattern="@[a-zA-Z_-]+\{"
+fi
+
+if [ "$1" == "j" ]; then
+    bibdFile=${3:-$HOME/projects/bibd/OMEGA.json}
+    view_json
+else
+    view_bib
+fi
--- a/sdoi.sh
+++ b/sdoi.sh
@@ -17,18 +17,19 @@ if [ "$1" == "-h" ] ; then
    exit 0
 fi 

+set -e #exit if an error
+# set -v -x -e #debugging
+
 #Setup defaults
+doi=$1
+fn=$2
 styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
 bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
 pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
 relPath=$(basename $pdfPathOut)
-doi=$1
-fn=$2

-set -e #exit if an error
-# set -v -x -e #debugging
-
-function import_bib {
+#define functions
+import_bib() {
  #decide whether to process and move an associated pdf or just exit
  if [ -z "$fn" ]; then
    append_bibfile
@@ -41,19 +42,19 @@ function import_bib {
  fi
 }

-function fetchBib_pubmed {
+fetchBib_pubmed() {
  #request pubmed xml and transform into bibtex
  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
  xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
 }

-function fetchBib_doiDotOrg {
+fetchBib_doiDotOrg() {
  echo "pubmed id not found, trying doi.org.."
  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
  echo -e "\n" >> $tmpBib
 }

-function extract_name {
+extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
  author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
@@ -66,7 +67,7 @@ function extract_name {

 }

-function append_bibfile {
+append_bibfile() {
  #import bibtex
  #first grep for a uid (doi) in case its already in db
  if [[ -z $(rg $doi $bibdFileOut) ]]; then
@@ -78,22 +79,23 @@ function append_bibfile {
 }


-function append_pdf {
+append_pdf() {
  fn2=${author}_${journal}$year-$uid.pdf
-  #move pdf file to papers repository, add file name to bibtex file field
+  #move pdf file to papers repository, add file name to bibtex url field
  mv $fn $pdfPathOut/$fn2
  echo "moved to $pdfPathOut/$fn2"
-  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
+  sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
 }


-function clean_up {
+clean_up() {
  #clean up
  rm -f $tmpBib $tmpBib.xml
  exit 1
 }


+#main
 uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")

 tmpBib=$(mktemp -p ./ --suffix=.bib)
--- a/pdf2bib.sh
+++ b/pdf2bib.sh
@@ -17,17 +17,12 @@ if [ "$1" == "-h" ] ; then
    exit 0
 fi

+set -e #exit if an error
+# set -v -x -e #debugging
+
 #Setup defaults
-styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
-bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
-pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
-relPath=$(basename $pdfPathOut)
 fn=$1

-set -e #exit if an error
-
-echo "using $pdfPathOut"
-echo "using $bibdFileOut"

 #try to extract doi from pdf and retrieve a pubmed id
 #for 'DOI:' syntax
@@ -56,43 +51,5 @@ if [ -z "$doi" ]; then
  exit 1
 fi

+sdoi.sh $doi $fn

-## TODO: dedupe this with sdoi.sh
-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
-
-if [ -z "$uid" ]; then
-  echo "pubmed id not found"
-  exit 1
-fi
-
-#request pubmed xml and transform into bibtex
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
-xsltproc --novalid $styleSheet $uid.xml > $uid.bib
-
-#extract some strings to make a nice filename for the pdf
-key="LastName"; 
-author=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
-
-key="MedlineTA"; 
-journal=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
-
-key1="PubDate"; 
-key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")
-
-fn2=${author}_${journal}$year-$uid.pdf
-
-#move pdf file to papers repository, add file name to bibtex file field
-mv $fn $pdfPathOut/$fn2
-echo "moved to $pdfPathOut/$fn2"
-sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib
-
-if [[ -z $(rg $uid $bibdFileOut) ]]; then
-  #import bibtex
-  echo "importing $uid.bib"
-  cat $uid.bib >> $bibdFileOut
-else
-  echo "$uid already found in $bibdFileOut, exiting"
-fi
-
-#clean up
-rm $uid.xml $uid.bib
--- a/spmid.sh
+++ b/spmid.sh
@@ -7,61 +7,101 @@ if [ "$1" == "-h" ] ; then
          spmid.sh '12345678' download.pdf

         depends: 
-          xsltproc - xml processor, from GNOME project
-          pubmed2bibtex.xsl - xml processor stylesheet
-
-         defaults:
-          Set the three required default file locations (xsl file, bib file, pdf directory)
+          sdoi.sh
 
    "
    exit 0
 fi

+#TODO: deprecate this function, add pmid logic into sdoi
+
+set -e #exit if an error
+# set -v -x -e #debugging
+
 #Setup defaults
+uid=$1
+fn=$2
 styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
 bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
 pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
 relPath=$(basename $pdfPathOut)
-uid=$1
-fn=$2

-set -e #exit if an error
+#define functions
+import_bib() {
+  #decide whether to process and move an associated pdf or just exit
+  if [ -z "$fn" ]; then
+    append_bibfile
+    clean_up
+  else
+    extract_name
+    append_pdf
+    append_bibfile
+    clean_up
+  fi
+}

-#request pubmed xml and transform into bibtex
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
-xsltproc --novalid $styleSheet $uid.xml > $uid.bib
+fetchBib_pubmed() {
+  #request pubmed xml and transform into bibtex
+  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
+  xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
+}

-#decide whether to process and move an associated pdf or just exit
-if [ -z "$fn" ]; then
-  #clean up
-  rm $uid.xml $uid.bib
-  exit 1
-else
+fetchBib_doiDotOrg() {
+  echo "pubmed id not found, trying doi.org.."
+  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
+  echo -e "\n" >> $tmpBib
+}
+
+extract_name() {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key="MedlineTA"; 
-  journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key1="PubDate"; 
-  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")

+}
+
+append_bibfile() {
+  #import bibtex
+  #first grep for a uid (doi) in case its already in db
+  if [[ -z $(rg $doi $bibdFileOut) ]]; then
+    echo "importing $tmpBib"
+    cat $tmpBib >> $bibdFileOut
+  else
+    echo "$doi already found in $bibdFileOut, exiting"
+  fi
+}
+
+
+append_pdf() {
  fn2=${author}_${journal}$year-$uid.pdf
-
-  #move pdf file to papers repository, add file name to bibtex file field
+  #move pdf file to papers repository, add file name to bibtex url field
  mv $fn $pdfPathOut/$fn2
  echo "moved to $pdfPathOut/$fn2"
-  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib
+  sed -i -E "s|(\W*url = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
+}

-  if [[ -z $(rg $uid $bibdFileOut) ]]; then
-    #import bibtex
-    echo "importing $uid.bib"
-    cat $uid.bib >> $bibdFileOut
-  else
-    echo "$uid already found in $bibdFileOut, exiting"
-  fi
-  
+
+clean_up() {
  #clean up
-  rm $uid.xml $uid.bib
+  rm -f $tmpBib $tmpBib.xml
+  exit 1
+}
+
+
+#main
+tmpBib=$(mktemp -p ./ --suffix=.bib)
+
+fetchBib_pubmed
+
+if [ -s "$tmpBib" ]; then
+  import_bib
+else
+  echo "sorry, doi not found.."
+  clean_up
 fi
+
--- a/spubmed.sh
+++ b/spubmed.sh
@@ -18,20 +18,16 @@ if [ "$1" == "-h" ] ; then
    exit 0
 fi

-#Setup defaults
-styleSheet=${pubmedStyleSheet:-$HOME/bin/pubmed2bibtex.xsl}
-bibdFileOut=${bibdFileOut:-$HOME/projects/bibd/OMEGA.bib}
-pdfPathOut=${pdfPathOut:-$HOME/projects/bibd/papers}
-relPath=$(basename $pdfPathOut)
+set -e #exit if an error
+# set -v -x -e #debugging

+#Setup defaults
 author=$1
 journal=$2
 year=$3

-set -e #exit if an error
-
 #curl's option globoff needed for using brackets in a uri
-uid=$(curl -s --globoff "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$author[au]+AND+$journal[ta]+AND+$year[dp]&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
+uid=$(curl -s --globoff "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$author[au]+AND+$journal[ta]+AND+$year[dp]&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")

 if [ -z "$uid" ]; then
  echo "pubmed id not found"
@@ -45,16 +41,7 @@ if [[ $(echo $uid | wc -w) -gt 1 ]]; then
  exit 1
 fi

-echo $uid | xclip -selection clipboard
 echo $uid
+# echo $uid | xclip -selection clipboard
+echo $uid | wl-copy

-# #request pubmed xml and transform into bibtex
-# curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
-# xsltproc --novalid $styleSheet $uid.xml > $uid.bib
-# 
-# #import bibtex
-# echo "importing $uid.bib"
-# cat $uid.bib >> $bibdFileOut
-# 
-# #clean up
-# rm $uid.xml $uid.bib