Scholar Code Samples

R

require(httr)
getScholarlyData <- function(token, query){
	url <- 'https://api.lens.org/scholarly/search'
	headers <- c('Authorization' = token, 'Content-Type' = 'application/json')
	httr::POST(url = url, add_headers(.headers=headers), body = query)
}
token <- 'your-access-token'
request <- '{
	"query": {
		"match_phrase": {
			"author.affiliation.name": "Harvard University"
		}
	},
	"size": 1,
	"sort": [{
		"year_published": "desc"
	}]
}'
data <- getScholarlyData(token, request)
content(data, "text")

R - Cursor Based Pagination

Packages <- c("dplyr", "httr", "jsonlite")
lapply(Packages, library, character.only = TRUE)

token <- 'Your Access Token'

# this sets the maximum number of records returned per query
max_results <- 100 

getLensData<- function(token, query){
  url <- 'https://api.lens.org/scholarly/search'
  headers <- c('Authorization' = token, 'Content-Type' = 'application/json')
  httr::POST(url = url, add_headers(.headers=headers), body = query)
}

request <- paste0('{
	"query":  "malaria",
	"size": "',max_results,'",
	"scroll": "1m",
	"include": ["lens_id", "authors", "publication_type", "title"]
}')


data <- getLENSData(token, request)

record_json <- content(data, "text")

# convert json output from article search to list
record_list <- fromJSON(record_json) 

#convert it to a data frame
record_df <- data.frame(record_list) 
total<- record_list[["total"]]


# if a result contains more than the max number of records per request, use cursor based pagination
if(total > max_results) {
  
  #calculate the number of queries needed for those with more than the max number of results
  sets <- ceiling(record_list[["total"]] / max_results) 
  
  # extract the scroll id from the query to go back to the same search
  scroll_id <- record_list[["scroll_id"]] 
  
  # loop through the sets of results needed to bring back all records into a data frame
  for (i in 2:sets){ 
    #extract the latest scroll_id from the last query
    scroll_id <- record_list[["scroll_id"]] 
    
    # new query based on scroll_id and including 'include' for efficiency
    request <- paste0('{"scroll_id": "', 
                      scroll_id,
                      '", "include": ["lens_id", "authors", "publication_type", "title"]
                      }')
    
    # perform article search and extract text results
    data <- getLENSData(token, request)
    record_json <- httr::content(data, "text")
    
    # convert json output from article search to list
    record_list <- jsonlite::fromJSON(record_json) 
    new_df <- data.frame(record_list)
    
    # bind the latest search data frame to the previous data frame
    record_df <- dplyr::bind_rows(record_df,new_df) 
  } 
}
Credit: Neal Haddaway

Python

import requests
url = 'https://api.lens.org/scholarly/search'
data = '''{
     "query": {
           "match_phrase":{
                "author.affiliation.name": "Harvard University"
           }
     },
     "size": 1,
     "sort": [
           {
                "year_published": "desc"
           }
     ]
}'''
headers = {'Authorization': 'Bearer your-access-token', 'Content-Type': 'application/json'}
response = requests.post(url, data=data, headers=headers)
if response.status_code != requests.codes.ok:
  print(response.status_code)
else:
  print(response.text)

Python - Cursor Based Pagination

import requests
import time
url = 'https://api.lens.org/scholarly/search'

# include fields
include = '''["patent_citations", "lens_id"]'''
# request body with scroll time of 1 minute
request_body = '''{
     "query": "Malaria",
     "size": 100,
     "scroll":"1m",
     "include": %s
}''' % include
headers = {'Authorization': 'Bearer YOUR-TOKEN', 'Content-Type': 'application/json'}

# Recursive function to scroll through paginated results
def scroll(scroll_id):
  # Change the request_body to prepare for next scroll api call
  # Make sure to append the include fields to make faster response
  if scroll_id is not None:
    global request_body
    request_body = '''{"scroll_id": "%s", "include": %s, "scroll": "1m"}''' % (scroll_id, include)

  # make api request
  response = requests.post(url, data=request_body, headers=headers) 

  # If rate-limited, wait for n seconds and proceed the same scroll id
  # Since scroll time is 1 minutes, it will give sufficient time to wait and proceed
  if response.status_code == requests.codes.too_many_requests:
    time.sleep(8)
    scroll(scroll_id)
  # If the response is not ok here, better to stop here and debug it
  elif response.status_code != requests.codes.ok:
    print(response.json())
  # If the response is ok, do something with the response, take the new scroll id and iterate
  else:
    json = response.json()
    if json.get('results') is not None and json['results'] > 0:
        scroll_id = json['scroll_id'] # Extract the new scroll id from response
        print(json['data']) #DO something with your data
        scroll(scroll_id)

# start recursive scrolling
scroll(scroll_id=None)

Python - Cursor Based Pagination - List of Identifiers

Use this script of you have a large list of Lens identifiers to send in the request. Sends 5,000 identifiers per request.

import requests
import time
import itertools

url = 'https://api.lens.org/scholarly/search'
headers = {'Authorization': 'Bearer YOUR-TOKEN', 'Content-Type': 'application/json'}

# Recursive function to scroll through paginated results
def scroll(scroll_id, request_body):
  # Change the request_body to prepare for next scroll api call
  # Make sure to append the include fields to make faster response
  if scroll_id is not None:
    request_body = '''{"scroll_id": "%s", "include": %s}''' % (scroll_id, include)

  # make api request
  response = requests.post(url, data=request_body, headers=headers) 

  # If rate-limited, wait for n seconds and proceed the same scroll id
  # Since scroll time is 1 minutes, it will give sufficient time to wait and proceed
  if response.status_code == requests.codes.too_many_requests:
    time.sleep(8)
    scroll(scroll_id, request_body)
  # If the response is not ok here, better to stop here and debug it
  elif response.status_code != requests.codes.ok:
    print(response.json())
  # If the response is ok, do something with the response, take the new scroll id and iterate
  else:
    json = response.json()
    scroll_id = json['scroll_id'] # Extract the new scroll id from response
    print(json['data']) #DO something with your data
    scroll(scroll_id, request_body)

include = '''["lens_id", "patent_citations"]'''
identifiers = [list of Lens identifiers which can be more than 10K]
# take 5000 at a time from the list and scroll for the 5000
for ids in itertools.islice(identifiers, 5000):
  request_body = '''{
    "query": {
        "terms":  {
          "lens_id":'''+(json.dumps(ids))+'''
      }
    },
    "include": %s
  }''' % include

  # start recursive scrolling
  scroll(scroll_id=None, request_body)

Java

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;

public class JavaSample {

    public static void main(String[] args) {
        try {
            URL url = new URL("https://api.lens.org/scholarly/search");
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setConnectTimeout(30000);
            conn.setDoOutput(true);
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Content-Type", "application/json");
            conn.setRequestProperty("Authorization", "Bearer your-access-token");

            String request = "{\"query\":{\"match_phrase\":{\"author.affiliation.name\":\"Harvard University\"}},\"size\":10,\"sort\":[{\"year_published\":\"desc\"}]}";
            conn.getOutputStream().write(request.getBytes(StandardCharsets.UTF_8));

            if (conn.getResponseCode() != 200) {
                throw new RuntimeException(conn.getResponseCode());
            }

            BufferedReader br = new BufferedReader(new InputStreamReader((conn.getInputStream())));
            String output;
            while ((output = br.readLine()) != null) {
                System.out.println(output);
            }
            conn.disconnect();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

NodeJs

var request = require('request');

var endPoint = 'https://api.lens.org/scholarly/search';
var token = 'your-access-token';
var query = {
     "query": {
           "match_phrase":{
                "author.affiliation.name": "Harvard University"
           }
     },
     "size": 10,
     "sort": [
           {
                "year_published": "desc"
           }
     ]
};

var options = {
     url: endPoint,
     body: query,
     json: true,
     headers: {
           "Authorization": "Bearer " + token
     }
}

request.post(options, function(err, res, data) {
     if (err) {
           console.log(err);
     }

     console.log(data);
});

cURL

curl -X POST \
  https://api.lens.org/scholarly/search \
  -H 'Authorization: Bearer your-access-token' \
  -H 'Content-Type: application/json' \
  -d '{
	"query": {
		"bool": {
			"must": {
				"match_phrase": {
					"author.affiliation.name": "Harvard University"
				}
			},
			"filter": {
				"range": {
					"year_published": {
						"gte": "1999",
						"lte": "2000"
					}
				}
			}
		}
	},
	"size": 50
}'