## 1.PRS using SBayesR


# To run SBayesR run the lines below. Make sure you degin the input variable before (name of sumstats in GCTA ma format)
echo "
#!/bin/bash
#PBS -N SbayesR
#PBS -l mem=260GB,walltime=24:00:00
cd \$PBS_O_WORKDIR
module load gctb/2.03
gctb --sbayes R   \\
     --mldm /reference/genepi/public_reference_panels/ldm_ukb_50k_bigset_2.8M/ukb50k_2.8M_shrunk_sparse.mldmlist      \\
     --pi 0.95,0.02,0.02,0.01      \\
     --gamma 0.0,0.01,0.1,1      \\
     --gwas-summary ../${input}.ma      \\
     --chain-length 10000      --burn-in 2000      --out-freq 10      \\
     --out ${input}_SBayesR_fullLDM
" > Submit_SBayesR.PBS
qsub Submit_SBayesR.PBS

#Extract the effect sizes and prepare them for plink scoring. In this case I am printing the chr:bp:a1:a2 (or a2:a1) plus the effect allele ($5) and the effect size ($8).
awk '(FNR>1){print $3":"$4":"$5":"$6,$5,$8"\n"$3":"$4":"$6":"$5,$5,$8}'  ${input}_SBayesR_fullLDM.snpRes > ${input}_SBayesR_betas.txt

# Scirpt for PLINK polygenic risk scoring. This script is the base for all other PRS what you do before is what differs in C+T vs other methods
echo "
#!/bin/bash
#PBS -N PlinkScoring
#PBS -l mem=35GB,walltime=12:00:00,ncpus=4
cd \$PBS_O_WORKDIR
chr=\$PBS_ARRAY_INDEX
/working/genepi/software/bin/plink_2.00 --vcf /working/lab_nickm/adrianC/SAMPLE/ImputedFiles/chr\${chr}.dose.vcf.gz dosage=DS --double-id \\
 --extract /working/lab_nickm/adrianC/SAMPLE/ImputedFiles/QCed_variants_MAF01_CR90_RSQ60.list \\
 --score ${input}_SBayesR_betas.txt cols=+scoresums \\
 --threads 4 \\
 --memory 35000 \\
 --out PRScalc/SAMPLE_${input}_PRS_chr\${chr}
" > Submit_PLINKscore_array.PBS
mkdir logfiles
mkdir PRScalc

#Note this is specific to the case where the genotype data is split by chromosomes, so we are taking advantage of that in the submission scripts
qsub -J 1-22 -e logfiles/ -o logfiles/ Submit_PLINKscore_array.PBS # Run this for submissionm do not run if you want to just check the script generated

# This will yield PRS by chromosome. You want to combine all of them into a single file by summing across samples.
# you can do this in R or python or whatever your tool of choice. In my case I made a custom python script.



## 2.PRS using Clumping and Thresholding
# In this example we will be clumping using UKB as a reference panel, but you can use 1000G or any other LD panel:

#This assumes the input file to have these columns: SNP     CHR     BP      A1      A2      FREQ    BETA    SE      P       N 
#where SNP is rs number or the matched SNPID between sumstats, LDref panel and target sample
mkdir clumping
cd  clumping
awk '(FNR>1) {print $1}' ../${input} > allSnpRsNumberUniqueNoIndels
echo "
#!/bin/bash
#PBS -N clump
#PBS -l mem=24GB,walltime=2:00:00
module load plink/1.90b6.8
cd \$PBS_O_WORKDIR
chr=\$PBS_ARRAY_INDEX
plink --bfile /reference/data/UKBB_500k/versions/LDref/HRC_only/UKBB_LD_bestguess_ref_panel_maf1e4_rsq_point3_HRC_SNPs_only \\
  --extract allSnpRsNumberUniqueNoIndels \\
  --clump ../${input} \\
  --clump-p1 1 \\
  --clump-p2 1 \\
  --clump-r2 0.2 \\
  --clump-kb 2000 \\
  --threads 1 \\
  --memory 20000 \\
  --out ./${input}
" > Submit_Clumping.PBS
qsub Submit_Clumping.PBS

# Once clumping is done match back to get A1 and beta:

 awk 'BEGIN{OFS="\t"}(NR==FNR){leadSNP[$3]=1;next}(FNR==1){print "SNP","A1","effect","P";next}($1 in leadSNP){print $2":"$3":"$4":"$5,$4,$7,$9"\n"$2":"$3":"$5":"$4,$4,$7,$9}' ./${input}.clumped ../${input} > indexSNPsumstats.ctg

# Now this is the scoring submission script
echo "
#!/bin/bash
#PBS -N PlinkScoring
#PBS -l mem=35GB,walltime=12:00:00,ncpus=4
cd \$PBS_O_WORKDIR
chr=\$PBS_ARRAY_INDEX
module load plink/20200511
plink2 --pfile /working/lab_nickm/adrianC/SAMPLE/ImputedFiles/plink2Format/SAMPLE_imputed_plink2_DS_chr\${chr} \\
 --extract /working/lab_nickm/adrianC/SAMPLE/ImputedFiles/QCed_variants_MAF01_CR90_RSQ60.list \\
 --score indexSNPsumstats.ctg 1 2 header cols=+scoresums \\
 --score-col-nums 3 \\
 --q-score-range pvalue.ranges indexSNPsumstats.ctg 1 4 \\
 --threads 4 \\
 --memory 35000 \\
 --out ./${prefix}_SAMPLEclumpedPRS_chr\${chr}
" > Submit_PLINKscore_array.PBS

qsub -J1-22 -e /dev/null -o /dev/null Submit_PLINKscore_array.PBS


# Again this will yield PRS by chromosome. You want to combine all of them into a single file by summing across samples.
# you can do this in R or python or whatever your tool of choice.