Example parameters file

#    It is a parameters file for GADMA software.

#    Lines started from # are ignored.
#    Also comments at the end of a line are ignored too.
#    Every line contains: Identificator of parameter : value.

#!!!     means pay attention to this parameter, they are primary.

#!!!
#    Output directory to write all GADMA out.
#    One need to set it to a missing or empty directory.
#    If it is resumed from other directory and output directory 
#    isn't set, GADMA will add '_resumed' for previous output 
#    directory.
Output directory: my_example_run


#!!!
#    Input data can be sfs file (should end with .fs) or 
#    file of SNP's in dadi format (should end with .txt) or
#   VCF file + popmap file (sample population map).
Input data: tests/test_data/DATA/sfs/YRI_CEU.fs

#    'Population labels' is sequence of population names (the same
#    names as in input file)
#    If .fs file is in old format then it would rename population 
#    labels that are absent.
#    It is necessary to put them in order from most ancient to less. 
#    (In case of more than two populations)
#    It is important, because the last of formed populations take
#    part in the next split.
#    For example, if we have YRI - African population,
#    CEU - European population and CHB - Chinese population,
#    then we can write YRI, CEU, CHB or YRI, CHB, CEU 
#    (YRI must be at the first place)
#    Default: from input file
Population labels: [YRI, CEU]

#    Also one can project spectrum to less size.
#    For example, we have 80 individuals in each of three 
#    populations, then spectrum will be 81x81x81 and one can 
#    project it to 21x21x21 by set 'Projections' parameter 
#    to 20, 20, 20.
#    Default: from input file
Projections: [20, 20]

#   To indicate if outgroup information is included in SFS data
#   one can set Outgroup option.
#   If outgroup is False then SFS will be folded.
#   Default: from input file
Outgroup: True

#   Effective length of sequence that was used to build SFS data.
#   Should be used together with Mutation rate and can be replaced
#   be Thera0 setting.
#   Default: None
Sequence length: 4040000

#!!!
#    Are SNP's linked or unlinked?
#    If they are linked, then Composite Likelihood Akaike
#    Information Criterion (CLAIC) will be used to compare models.
#    If they are unlinked, then usual Akaike Information Criterion 
#    (AIC) will be used.
#    Default: True
Linked SNP's: True

#!!!
#    If SNP's are linked in order to calculate CLAIC, please, set
#    the directory with bootstrapped data. 
#    Bootstrap should be done over the regions of the genome.
#    Default: None
Directory with bootstrap: Null


#!!!
#    Now all main parameters:
#
#    Engine for the demographic inference.
#    Default: moments
Engine: moments

#    If you choose to use dadi, please set pts parameter - number
#    of points in the grid. Otherwise this pts would be used in dadi's code.
#    Default: Let n = max number of individuals in one population, 
#    then pts = n, n+10, n+20
Pts: [20, 30, 40]

#!!!
#    Print parameters of model in units of N_ref = N_A.
#    N_A will be placed in brackets at the end of string.
#    Default: False
Relative parameters: False

#    Total mutation flux - theta.
#    It is equal to:
#    theta = 4 * μ * L
#    where μ - mutation rate per site per generation and 
#    L - effective sequenced length, which accounts for losses 
#    in alignment and missed calls.
#    Note: one should estimate μ based on generation time.
#    Default: 1.0
Theta0: Null

#   Instead of Theta0 mutation rate can be set independently.
#   Should be used together with Sequence length option.
#   Default: None
Mutation rate: 2.35e-08

#    Recombination rate
#    Default: None
Recombination rate: Null

#    Time (years) for one generation. Can be float. 
#    Is important for drawing models. If one don't want to draw, 
#    one can pass it.
#    Default: 1.0
Time for generation: Null


#!!!
#    One should choose the demographic history to infer.
#    It can be custom or setted up with structure.

# 1. Using a custom demographic model.
#    Please specify a file with a function named 'model_func' in it. 
#    So file should contain:
#    def model_func(params, ns, pts) in case of dadi
#    or
#    def model_func(params, ns) in case of moments
#    Default: None
Custom filename: Null

#    Now one should specify either bounds or identifications 
#    of custom model's parameters. All values are in Nref units.
#    Lower and upper bounds - lists of numbers.
#    List of usual bounds:
#    N: 1e-2 - 100
#    T: 0 - 5
#    m: 0 - 10
#    s: 0 - 1
#    These bounds will be taken automatically if identifications are set.
#    Default: None
Lower bound: Null
Upper bound: Null
#    An identifier list:
#    T - time
#    N - size of population
#    m - migration
#    s - split event,  proportion in which population size
#    is divided to form two new populations.
#    Default: None
Parameter identifiers: Null

# 2. Structure is for not custom models!
#    Structure of model for one population - number of time periods 
#    (e.g. 5).
#    Structure of model for two populations - number of time periods
#    before the split of the ancestral population and after it (e.g. 2,2).
#    Structure of model for three populations - number of time periods
#    before first split, between first and second splits and after 
#    second split (e.g. 2,1,2).
#
#    Structure of initial model:
#    Default: all is ones - 1 or 1,1 or 1,1,1
Initial structure: [1, 1]

#    Structure of final model:
#    Default: equals to initial structure
Final structure: [2, 1]

#!!!
#    Additional settings for demographic model with structure.
#
#    Use sudden changes of population sizes only. Decreases 
#    the number of parameters.
#    Default: False
Only sudden: False

#    The set of available size dynamics could be set to any subset.
#    Sud is for sudden size change (constant during the next time epoch),
#    Lin - linear size change,
#    Exp - exponential size change.
#    If `Only sudden` is True then this setting will be [Sud].
#    Default: [Sud, Lin, Exp]
Dynamics: [Sud, Lin, Exp]

#    Disable migrations in demographic models.
#    Default: False
No migrations: False

#   Makes all migrations symmetrical.
#   Default: False
Symmetric migrations: False

#   Enable/disable migrations selectively.
#   Default: None
Migration masks: Null

#    Enable/disable inference of selection coefficients.
#    Is supported by moments and dadi engines only.
#    Default: False
Selection: False

#    Enables/disables inference of dominance coefficient.
#    If True then setting `Selection` must be aldo set to True.
#    Is supported by moments and dadi engines only.
#    Default: False
Dominance: True

#   Estimate fraction of ancestral population as parameter of split.
#   If False then population splits and each of new populations
#   has its own size as parameter of the model.
#   Default: False
Split fractions: True

#   Estimate inbreeding coefficients as model parameters.
#   Can be used only for dadi engine.
#   Default: False
Inbreeding: False

#   If False then multinomial approach is used in dadi and moments.
#   In multinomial approach ancestral size is inferred implicitly.
#   Default: False
Ancestral size as parameter: False

#    It is possible to limit the time of splits.
#    Split 1 is the most ancient split.
#    !Note that time is in genetic units (2 * time for 1 generation):
#    e.g. we want to limit by 150 kya, time for one generation is 
#    25 years, then bound will be 150000 / (2*25) = 3000.
#
#    Upper bound for split 1 (in case of 2 or 3 populations).
#    Default: None
Upper bound of first split: Null

#    Upper bound for split 2 (in case of 3 populations).
#    Default: None
Upper bound of second split: Null



#!!!
#    Local optimization.
#
#    Choice of local optimization, that is launched after 
#    each genetic algorithm.
#    Choices:
#
#    *    optimize (BFGS method)
#    
#    *    optimiza_log (BFGS method)
#    
#    *    optimize_powell (Powell’s conjugate direction method)
#    (Note: is implemented in moments: one need to have moments 
#    installed.)
#
#    (If optimizations are often hitting the parameter bounds, 
#    try using these methods:)
#    *    optimize_lbfgsb
#    *    optimize_log_lbfgsb 
#    (Note that it is probably best to start with the vanilla BFGS 
#    methods, because the L-BFGS-B methods will always try parameter
#    values at the bounds during the search. 
#    This can dramatically slow model fitting.)
#
#    *    optimize_log_fmin (simplex (a.k.a. amoeba) method)
#    
#    *    hill_climbing
#    
#    Default: optimize_powell
Local optimizer: BFGS_log



#    Parameters of pipeline
#
#    One can automatically generate dadi and moments code for models.
#    If 0 then only current best model will be printed in GA's 
#    working directory.
#    Also the result model will be saved there. 
#    If specified (not 0) then every N iteration model will be saved
#    in python code folder.
#    Default: 0
Print models' code every N iteration: 100

#   Engine that will draw demographic model plots.
#   Could be moments or demes.
#   Default: moments
Model plot engine: moments

#    One can automatically draw models every N iteration. 
#    If 0 then never.
#    Pictures are saved in GA's directory in the picture folder.
#    Default: 0
Draw models every N iteration: 100

#    One can choose time units in models' plots: years or thousand 
#    years (kya, KYA). If time for one generation isn't specified 
#    then time is in genetic units.
#    Default: years
Units of time in drawing: generations

#   Minimum value that will be drawn in SFS plots.
#   Default: 1
Vmin: 1

#    No std output.
#    Default: False
Silence: False

#   Verbosity of optimizations output.
#   Default: 1
Verbose: 1

#    How many times launch GADMA with this parameters.
#    Default: 1
Number of repeats: 3

#    How many processes to use for this repeats.
#    Note that one repeat isn't parallelized, so increasing number
#    of processes will not affect the time of one repeat.
#    It is desirable that the number of repeats is a multiple of 
#    the number of processes.
#    Default: 1
Number of processes: 3



#    One can resume from some other launch of GADMA by setting
#    output directory of it to 'Resume from' parameter.
#    You can set again new parameters of resumed launch.
Resume from: Null
#
#    If you want to take only models from previous run set this 
#    flag.  Then iterations of GA will start from 0 and values of
#    mutation rate and strength will be initial.
#    Default: None
Only models: False