SQL script to perform linear regression analysis on the COVIDcast data
This page documents the preview version (v2.23). Preview includes features under active development and is for development and testing only. For production, use the stable version (v2024.1). To learn more, see Versioning.
Save this script as "analysis-queries.sql"
create or replace view covidcast_fb_survey_results_v as
select
survey_date,
state,
mask_wearing_pct,
cmnty_symptoms_pct as symptoms_pct
from covidcast_fb_survey_results;
\o analysis-results/analysis-queries.txt
\t on
select 'Symptoms by state for survey date = 2020-10-21.';
\t off
select
round(mask_wearing_pct) as "% wearing mask",
round(symptoms_pct) as "% with symptoms",
state
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;
\t on
select 'Symptoms by state, overall average.';
\t off
select
round(avg(mask_wearing_pct)) as "% wearing mask",
round(avg(symptoms_pct)) as "% with symptoms",
state
from covidcast_fb_survey_results_v
group by state
order by 1;
\t on
select 'Daily regression analysis report.';
\t off
with a as (
select
survey_date,
avg (mask_wearing_pct) as mask_wearing_pct,
avg (symptoms_pct) as symptoms_pct,
regr_r2 (symptoms_pct, mask_wearing_pct) as r2,
regr_slope (symptoms_pct, mask_wearing_pct) as s,
regr_intercept(symptoms_pct, mask_wearing_pct) as i
from covidcast_fb_survey_results_v
group by survey_date)
select
to_char(survey_date, 'mm/dd') as survey_date,
to_char(mask_wearing_pct, '90') as mask_wearing_pct,
to_char(symptoms_pct, '90') as symptoms_pct,
to_char(r2, '0.99') as r2,
to_char(s, '90.9') as s,
to_char(i, '990.9') as i
from a
order by survey_date;
\t on
select 'Regression analysis report for survey date = 2020-10-21.';
\t off
with a as (
select
max(survey_date) as survey_date,
regr_slope (symptoms_pct, mask_wearing_pct) as s,
regr_intercept(symptoms_pct, mask_wearing_pct) as i
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd'))
select
to_char(survey_date, 'mm/dd') as survey_date,
to_char(s, '90.9') as s,
to_char(i, '990.9') as i
from a;
with a as (
select regr_r2 (symptoms_pct, mask_wearing_pct) as r2
from covidcast_fb_survey_results_v
group by survey_date)
select
to_char(avg(r2), '0.99') as "avg(R-squared)"
from a;
\o
\o analysis-results/2020-10-21-mask-symptoms.csv
\t on
select
round(mask_wearing_pct)::text||','||round(symptoms_pct)::text
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;
\t off
\o