Calcbench is has created a couple of API endpoints. The API serves normalized and as-reported financial information in JSON.
Calcbench uses cookie based authentication, you will need to use a client that handles cookies, an example using Python's requests package is below.
import requests
import json
import pandas as pd
cb_email = "your_calcbench_username" # put your calcbench username here
cb_password = "your_calcbench_password" # put your calcbench password here
r = requests.post('https://www.calcbench.com/account/LogOnAjax',
{'email' : cb_email, 'strng' : cb_password, 'rememberMe' : 'true'},
verify=True)
assert r.text == 'true', 'login failed'
pharma_SIC_code = 2834
pharma_companies = requests.get("https://www.calcbench.com/api/companies?siccodes={0}".format(pharma_SIC_code)).json()
pharma_tickers = [co['ticker'] for co in pharma_companies]
metrics = ['revenue', 'netincome', 'assets', 'stockholdersequity']
payload = {"start_year" : 2010,
'start_period' : 1,
'end_year' : 2014,
'end_period' : 4,
'company_identifiers' : pharma_tickers,
'metrics' : metrics,
}
data = requests.post("https://www.calcbench.com/api/NormalizedValues",
data=json.dumps(payload),
headers={'content-type' : 'application/json'}).json()
pharma_data = pd.DataFrame(data)
pharma_data.set_index(keys=[ 'ticker', 'metric', 'calendar_year', 'calendar_period'], inplace=True)
pharma_data = pharma_data.unstack('metric')['value']
pharma_data = pharma_data[metrics]
pharma_data['profitmargin'] = (pharma_data['netincome'] ) / pharma_data['revenue']
pharma_data['assetturnover'] = pharma_data['revenue'] / pharma_data['assets']
pharma_data['financial_leverage'] = pharma_data['assets'] / pharma_data['stockholdersequity']
pharma_data['return_on_equity'] = pharma_data['profitmargin'] * pharma_data['assetturnover'] * pharma_data['financial_leverage']
pharma_data[pharma_data['return_on_equity'].notnull()]
ticker = "ibm"
statement_type = "income" # one of (income, balance, cash)
as_reported_url_template = 'https://www.calcbench.com/api/asreported/?companyIdentifier={0}&statementType={1}&periodType=annual'
response = requests.get(as_reported_url_template.format(ticker, statement_type), verify=True)
as_reported_data = json.loads(response.text)
columns = pd.PeriodIndex((start['period_start'] for start in as_reported_data['columns']), freq="A")
index = [li['label'] for li in as_reported_data['line_items']]
data = [[f['effective_value'] for f in line_item.get('facts', [])] for line_item in as_reported_data['line_items']]
as_reported_df = pd.DataFrame(data=data, index=index, columns=columns)
as_reported_df