library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sp)
library('sf')
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
ltlas <-
  read_sf(
    "Local_Authority_Districts__December_2017__Boundaries_in_Great_Britain.shp"
  )

ggplot(ltlas)+geom_sf()+geom_point(aes(x=bng_e,y=bng_n))

library(png)
img <- readPNG("phe_sgtf.png")


scilly_x=104
scilly_y=843
scilly_n = 11447
scilly_e = 91327

scale_factor_x = 0.001055
scale_factor_y=-0.00124

offset_x = scilly_e - scilly_x/scale_factor_x

pos_scilly_x = (scilly_e - offset_x) * scale_factor_x

offset_y = scilly_n - scilly_y/scale_factor_y

pos_scilly = (scilly_n - offset_y) * scale_factor_y

ltlas$pixel_x = round((ltlas$bng_e - offset_x) * scale_factor_x)
ltlas$pixel_y = round( (ltlas$bng_n - offset_y) * scale_factor_y)

ltlas$color = NA
ltlas$area <- st_area(ltlas)
library(units)
## udunits system database from C:/Users/Theo/Documents/R/win-library/3.6/units/share/udunits
min_area = 36033464

units(min_area)  <- as_units("m^2")
ltlas<-ltlas %>% filter(area>min_area)
ltlas$percent =NA



deep_purple = c(62/255,16/255,85/255)
white = c(1,1,1)

color_to_percent <- function(color){
  
  full_range = deep_purple - white
  estimates = (color - white)/full_range
  if(sd(estimates)>0.07){
    return(NA)
  }
  return( mean( estimates))
  
}


for(i in 1:nrow(ltlas)){
  try_x = ltlas$pixel_x[i]
  try_y = ltlas$pixel_y[i]
  if(try_y>0){
 # print(try_x)
 #  print(try_y)
    pixel = img[try_y,try_x,]
ltlas$color[i] = list(pixel)
ltlas$percent[i] = color_to_percent(pixel)
img[try_y:(try_y+2),try_x:(try_x+2),1]=1
img[try_y:(try_y+2),try_x:(try_x+2),2]=0
img[try_y:(try_y+2),try_x:(try_x+2),3]=0
}

}







require(grDevices)
op <- par(bg = "thistle",mar = rep(0, 4))
plot(c(100, 250), c(300, 450), type = "n", xlab = "", ylab = "")

rasterImage(img, 100, 300,250, 450, interpolate = FALSE)

par(op)
# }



newtibble = tibble(name = ltlas$lad17nm, s_dropout_percent = ltlas$percent*100) %>% arrange(name) %>% filter(s_dropout_percent>2)
write_csv(newtibble,"output.csv")
newtibble
ltlas2 <-
  read_sf(
    "Local_Authority_Districts__December_2017__Boundaries_in_Great_Britain.shp"
  )

both<-left_join(ltlas2,newtibble, by=c("lad17nm"="name"))
deep_purple_color = rgb(deep_purple[1],deep_purple[2],deep_purple[3])

ggplot(both,aes(fill=s_dropout_percent))+geom_sf()+scale_fill_gradient(low=rgb(1,1,1),high=deep_purple_color)+coord_sf(ylim=c(58255,600699))+labs(fill="S drop-out rate")+theme_minimal()

ggsave("recreation.png",width=10,height=10)
ggplot(newtibble,aes(y=s_dropout_percent,x=reorder(name, s_dropout_percent)))+geom_bar(stat="identity",fill="navy") + coord_flip(ylim=c(0,100))+theme_bw()

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
cases <- read_csv("ltla_2020-12-29.csv")
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   areaType = col_character(),
##   areaCode = col_character(),
##   areaName = col_character(),
##   newCasesByPublishDateRollingRate = col_double()
## )
cases_earlier = filter(cases,date==ymd("2020-12-7"))
cases_later  = filter(cases,date==ymd("2020-12-14"))


both <-inner_join(cases_earlier,cases_later,by=c("areaName"))
both$change = both$newCasesByPublishDateRollingRate.y / both$newCasesByPublishDateRollingRate.x

all<- inner_join(newtibble,both,by=c("name"="areaName"))

ggplot(all,aes(x=s_dropout_percent/100,y=change))+geom_point()+ scale_x_continuous(labels = scales::percent)+ scale_y_continuous(labels = scales::percent)+theme_bw()+labs(x="S drop-out",y="Week on week ratio of cases")+geom_smooth(method="lm")+coord_cartesian(xlim=c(0,1),ylim=c(0.5,2.5))
## `geom_smooth()` using formula 'y ~ x'

mod = lm(change ~ s_dropout_percent, data = all)
summary(mod)
## 
## Call:
## lm(formula = change ~ s_dropout_percent, data = all)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81959 -0.18820 -0.05788  0.13952  1.37244 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.8625940  0.0420955   20.49   <2e-16 ***
## s_dropout_percent 0.0091916  0.0007807   11.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3285 on 252 degrees of freedom
## Multiple R-squared:  0.3549, Adjusted R-squared:  0.3523 
## F-statistic: 138.6 on 1 and 252 DF,  p-value: < 2.2e-16
cases_day = filter(cases,date==ymd("2020-12-20"))
all<- inner_join(newtibble,cases_day,by=c("name"="areaName"))

all$combo = all$newCasesByPublishDateRollingRate * all$s_dropout_percent/100


library(ggrepel)

ggplot(all,aes(y=s_dropout_percent/100,x=newCasesByPublishDateRollingRate,label=name))+geom_point(color="darkblue")+ scale_y_continuous(labels = scales::percent)+theme_bw()+labs(y="S drop-out",x="Cases per 100K per week")+geom_text_repel(size=1.5,box.padding=0.05,segment.alpha=0.5)

ggsave("scatter.png",width=5,height=5)



ltlas2 <-
  read_sf(
    "Local_Authority_Districts__December_2017__Boundaries_in_Great_Britain.shp"
  )

both<-left_join(ltlas2,all, by=c("lad17nm"="name"))
ggplot(both,aes(fill=combo))+geom_sf()+scale_fill_viridis_c()+coord_sf(ylim=c(58255,600699))+labs(fill="Pred. rate of new variant per 100K")

ggsave("map_of_new_variant_pred_rates.png",width=10,height=10)