spring library
This commit is contained in:
19
pdf2bib.sh
19
pdf2bib.sh
@@ -31,23 +31,24 @@ echo "using $bibdFileOut"
|
||||
|
||||
#try to extract doi from pdf and retrieve a pubmed id
|
||||
#for 'DOI:' syntax
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi:(.+)#\1#")
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi:(.+)|\1|")
|
||||
|
||||
doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.*doi:? ?/?(10.+)#\1#")
|
||||
# search for doi string between first page last page 10
|
||||
doi=$(pdftotext -q -f 1 -l 10 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.*doi:? ?/?(10.+)|\1|")
|
||||
|
||||
|
||||
#for 'https://doi.org' syntax
|
||||
if [ -z "$doi" ]; then
|
||||
doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org/(10.+)#\1#")
|
||||
doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org/(10.+)|\1|")
|
||||
fi
|
||||
|
||||
# for 'https://doi.org' syntax
|
||||
# if [ -z "$doi" ]; then
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org\/(.+)#\1#")
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org\/(.+)|\1|")
|
||||
# fi
|
||||
#
|
||||
# if [ -z "$doi" ]; then
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi ?(.+)#\1#")
|
||||
# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi ?(.+)|\1|")
|
||||
# fi
|
||||
|
||||
if [ -z "$doi" ]; then
|
||||
@@ -57,7 +58,7 @@ fi
|
||||
|
||||
|
||||
## TODO: dedupe this with sdoi.sh
|
||||
uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
|
||||
uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
|
||||
|
||||
if [ -z "$uid" ]; then
|
||||
echo "pubmed id not found"
|
||||
@@ -70,13 +71,13 @@ xsltproc --novalid $styleSheet $uid.xml > $uid.bib
|
||||
|
||||
#extract some strings to make a nice filename for the pdf
|
||||
key="LastName";
|
||||
author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
|
||||
author=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
|
||||
|
||||
key="MedlineTA";
|
||||
journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
|
||||
journal=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")
|
||||
|
||||
key1="PubDate";
|
||||
key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
|
||||
key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")
|
||||
|
||||
fn2=${author}_${journal}$year-$uid.pdf
|
||||
|
||||
|
||||
Reference in New Issue
Block a user