-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconf.proto
1371 lines (1063 loc) · 49.5 KB
/
conf.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright (c) Meta Platforms, Inc. and affiliates.
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
*/
// Format this file with clang after editing:
// clang-format-8 conf/*.proto -i
syntax = "proto2";
package fairdiplomacy;
import public "conf/agents.proto";
import public "conf/misc.proto";
import public "conf/common.proto";
// Launcher message defines how to launch the job. Two options are avilable -
// locally or on slurm. Launcher information is expected to be a part of the
// main config.
message Launcher {
message Local { optional bool use_local = 1; }
message Slurm {
optional int32 num_gpus = 1 [ default = 0 ];
// By default starting one task per GPU. But if this flag is set, then
// will use one task per machine.
optional bool single_task_per_node = 2 [ default = false ];
optional string partition = 3 [ default = "learnaccel" ];
optional int32 hours = 4;
// Memory per GPU in GB.
optional int32 mem_per_gpu = 5 [ default = 62 ];
optional string comment = 6;
// Number of CPUs per GPU. You probably want 40 on Pascals and 10 otherwise.
optional int32 cpus_per_gpu = 7 [ default = 10 ];
// If set, will schedule job only on volta GPUs with 32GB of mem.
optional bool volta32 = 8;
// If set, will schedule the job only on Pascal GPUs.
optional bool pascal = 9;
// If set, will schedule job only on volta GPUs.
optional bool volta = 10;
}
oneof launcher {
Local local = 1;
Slurm slurm = 2;
}
}
// Root config to compare agents.
message CompareAgentsTask {
optional Agent agent_one = 2;
// Ignored if use_shared_agent.
optional Agent agent_six = 3;
optional Agent cf_agent = 4;
optional Power power_one = 5;
optional string out = 6;
optional int32 seed = 7 [ default = -1 ];
// Optional. For tests - max number of turns to run.
optional int32 max_turns = 8;
optional int32 max_year = 9 [ default = 1935 ];
// Optional. If set, agent_six is ignored, and agent_one is used to get all
// strategies. Enable share_strategy on CFR to get speed up.
optional bool use_shared_agent = 10;
// Optional. If set, then the agents starts from the last phase in the game
// unless start_phase is set.
optional string start_game = 11;
// Optional. Only applies to the case when start_game is set.
optional string start_phase = 12;
// Optional. If set, will draw after this number of years with SC ownership
// trasnfer (detected by dipcc).
optional int32 draw_on_stalemate_years = 13;
// If positive, end messaging after this many messages have been sent.
optional int32 max_msg_iters = 14 [ default = -1 ];
// Capture agent logging to the game json file
optional bool capture_logs = 15;
// Use time-based messaging protocol, with this much time per phase, in
// centiseconds.
optional int32 time_per_phase = 16 [ default = 8640000 ];
// Use the provided base_strategy_model model to perform variance reduction
optional string variance_reduction_model_path = 17;
// If set, will stop eval as soon as agent_one is dead. The resulting
// game.json will be incomplete (not till end of game), but the stats should
// be fine.
// This flag must be false, if use_shared_agent is true.
optional bool stop_on_death = 18 [ default = true ];
// Optional. If false, we compute policy for each of 7 powers independently,
// i.e., one call per power. If true, then strategies for 6 non power_one
// power (if use_shared_agent is false) or for all powers (if use_shared_agent
// is true) are computed with a single call and used for all agents. Only
// sensible for no-press!
optional bool share_strategy = 19 [ default = false ];
// Optional. A string of "year,prob;year,prob;..."
// "year,prob" indicates that at the start of SPRING of that year or later
// years there is a probability of the game ending instantly and being scored
// as-is,
optional string year_spring_prob_of_ending = 20;
optional int32 num_processes = 90
[ default = 0 ]; // for data collection only!
optional int32 num_trials = 91 [ default = 0 ]; // for data collection only!
}
// Root config to compare agents.
message CompareAgentPopulationTask {
message NamedAgent {
optional string key = 1;
optional Agent value = 2;
}
repeated NamedAgent agents = 1;
optional string agent_AUSTRIA = 2;
optional string agent_ENGLAND = 3;
optional string agent_FRANCE = 4;
optional string agent_GERMANY = 5;
optional string agent_ITALY = 6;
optional string agent_RUSSIA = 7;
optional string agent_TURKEY = 8;
optional string out = 10;
optional int32 seed = 11 [ default = -1 ];
// Optional. For tests - max number of turns to run.
optional int32 max_turns = 12;
optional int32 max_year = 13 [ default = 1935 ];
// Optional. If set, then the agents starts from the last phase in the game
// unless start_phase is set.
optional string start_game = 14;
// Optional. Only applies to the case when start_game is set.
optional string start_phase = 15;
// Optional. If set, will draw after this number of years with SC ownership
// trasnfer (detected by dipcc).
optional int32 draw_on_stalemate_years = 16;
// End messaging after this many round robin opportunities for a power to
// speak.
optional int32 max_msg_iters = 17 [ default = -1 ];
// Capture agent logging to the game json file
optional bool capture_logs = 18;
// Use time-based messaging protocol, with this much time per phase, in
// centiseconds.
optional int32 time_per_phase = 19 [ default = 8640000 ];
// Optional. A string of "year,prob;year,prob;..."
// "year,prob" indicates that at the start of SPRING of that year or later
// years there is a probability of the game ending instantly and being scored
// as-is,
optional string year_spring_prob_of_ending = 20;
}
// Specifies the agents in a population, used to generate
// CompareAgentPopulationTask
message CompareAgentPopulationMapping {
message MappingEntry {
// Identifier for displaying and printing results.
optional string name = 1;
// This can be the name of a common agent without the ".prototxt", such as
// "searchbot_02_fastbot".
// Or it can be a path to an agent cfg relative to repo root, including
// encoding with ".prototxt". Or it can be an absolute path to an agent
// config anywhere on filesystem.
optional string cfg = 2;
// Overrides to feed back to heyhi when building the agent,
// like "searchbot.rollouts_cfg.max_rollout_length=0".
repeated string overrides = 3;
// Every population must have at least this number of agents of this type.
optional int32 min_count = 4 [ default = 0 ];
}
// Agents to randomize over in population match.
repeated MappingEntry agent = 1;
}
message NoPressDatasetParams {
// Train dataset file
// Expected format: one json file per line, each line is
// "<path><space><json>\n" i.e. the result of: for g in
// /path/to/jsons/game_*.json ; do echo -n "$g " && cat $g ; done
optional string train_set_path = 1;
// Validation datset file, same format as above
optional string val_set_path = 2;
// Path to file containing game metadata.
optional string metadata_path = 12;
// Dataloader procs (1 means load in the main process).
optional int32 num_dataloader_workers = 3 [ default = 80 ];
// Minimal score (num SC) of the at the enf of the game needed to include the
// power into the training set.
optional int32 only_with_min_final_score = 4;
// exclude actions with >=n units, all holds, from the dataset
optional int32 exclude_n_holds = 5 [ default = -1 ];
// DEPRECATED
optional bool debug_only_opening_phase = 6 [ deprecated = true ];
// DEPRECATED
optional float value_decay_alpha = 7 [ deprecated = true ];
// Optional path to dir containing json files with state values.
optional string value_dir = 8;
// Optional. If specified, use this agent's orders instead of the orders in
// the game.json
optional Agent cf_agent = 9;
// Optional, only valid with cf_agent. If > 1, sample cf_agent multiple
// times, saving each as a separate row in the db.
optional uint32 n_cf_agent_samples = 10 [ default = 1 ];
// For debugging: use only the first n games of dataset, if > 0
optional int32 limit_n_games = 11 [ default = -1 ];
// cut this percentile of games based on player rating
// (only for dataset with player ratings)
optional float min_rating_percentile = 13 [ default = 0 ];
// cut players with fewer than this many games
optional float min_total_games = 14 [ default = 0 ];
// If an invalid order or ultra-long convoy is specified but the
// army/fleet specified is well-formed, convert it into a hold.
optional bool return_hold_for_invalid = 15 [ default = false ];
// DEPRECATED
optional string data_dir = 500 [ deprecated = true ];
}
message Encoder {
message Transformer {
// Number of attention heads. Must evenly divide inter_emb_size * 2.
optional int32 num_heads = 1;
// Number of intermediate channels for the feed-forward component
optional int32 ff_channels = 2;
// Number of transformer layers in the encoder
optional int32 num_blocks = 3;
// Channelwise dropout probablity.
optional float dropout = 4;
// Layerwise dropout probability.
optional float layerdrop = 5 [ default = 0.0 ];
optional string activation = 7 [ default = "relu" ];
}
// Graph conv encoder, for backwards compatibility, is represented as
// this oneof being None, and its parameters are directly inline
// in TrainTask instead of being part of this oneof
oneof encoder { Transformer transformer = 1; }
}
message Wandb {
// Optional. Name of the project. If not set, something like "train_sl" will
// be used.
optional string project = 1;
// Optional. Explicit name of this run.
optional string name = 2;
// Optional. Comma separated lists of tags.
optional string tags = 3;
// Optional. Arbitraty string to describe the experiment.
optional string notes = 4;
// Optional. Name of the group of this run. Useful for sweeps
optional string group = 5;
// Optional. Set to false to disable wandb export. E.g., for use in tests.
// Note, that in adhoc mode this flag is ignored and wandb is disabled.
optional bool enabled = 6 [ default = false ];
}
message PowerConditioning {
// If set, will randomly chose a power and use its orders as input. This
// only applies for all-power models. If all_powers_add_*_chances are
// enabled, then the conditioning will only applied to non-augmented
// training examples.
optional float prob = 1;
// min number of power's action to condition on (inclusive)
optional int32 min_num_power = 2;
// max number of power's action to condition on (inclusive)
optional int32 max_num_power = 3;
}
message TrainTask {
// No Press dataset params
optional NoPressDatasetParams dataset_params = 45;
// Batch size per GPU.
optional int32 batch_size = 4;
// Learning rate.
optional float lr = 5;
optional float adam_beta1 = 73 [ default = 0.9 ];
optional float adam_beta2 = 74 [ default = 0.99 ];
// Learning rate decay per epoch.
optional float lr_decay = 6;
// Max gradient norm.
optional float clip_grad_norm = 7;
// Path to load/save the model.
optional string checkpoint = 8;
// Prob[teacher forcing] during training.
optional float teacher_force = 10;
// LSTM dropout pct.
optional float lstm_dropout = 11;
// Encoder dropout pct. IGNORED when using Transformer encoder.
optional float encoder_dropout = 12;
// If set, use a single process.
optional bool debug_no_mp = 14;
// Skip validation / save.
optional bool skip_validation = 15;
// Use extremely simple one-hot attention in decoder
optional bool use_simple_alignments = 60;
// Number of GCN layers in encoder. IGNORED when using Transformer encoder.
optional int32 num_encoder_blocks = 20;
// Number of channels for intermediate encoder layers
optional int32 inter_emb_size = 55 [ default = 120 ];
// Architecture for the encoder (graph vs transformer)
optional Encoder encoder = 54;
// Max number of epochs to train
optional int32 num_epochs = 21;
// Stale. Always enabled.
optional bool write_jsonl = 22;
// Weight of value loss relative to policy loss, between 0 and 1
optional float value_loss_weight = 23;
// Scale factor for initial value decoder weights
optional float value_decoder_init_scale = 24;
// Max gradient norm in value decoder params
optional float value_decoder_clip_grad_norm = 25;
// Activation function within value decoder
optional string value_decoder_activation = 75 [ default = "relu" ];
optional bool value_decoder_use_weighted_pool = 77;
optional bool value_decoder_extract_from_encoder = 78;
// Value head dropout pct.
optional float value_dropout = 27;
// dimension of LSTM
optional int32 lstm_size = 33 [ default = 200 ];
// number of LSTM layers
optional int32 lstm_layers = 34 [ default = 1 ];
// if true, add features to output orders in the model
optional bool featurize_output = 35 [ default = false ];
// if true, add "relational" features to output orders in the model
optional bool relfeat_output = 36 [ default = false ];
optional bool shuffle_locs = 38 [ default = false ];
optional bool featurize_prev_orders = 39 [ default = false ];
// Legacy parameter used only for graph encoder.
// IGNORED when using Transformer encoder.
optional bool residual_linear = 40 [ default = false ];
// Use a bunch of extra encoder blocks merging prev orders and board state.
// Legacy parameter used only for graph encoder.
// IGNORED when using Transformer encoder.
optional bool merged_gnn = 41 [ default = false ];
// Optional. If set to a positive value, will skip each residual layer in
// encoder with this probability. IGNORED when using Transformer encoder.
optional float encoder_layerdrop = 42 [ default = 0.0 ];
// Optional. If true, will use softmax on top of value head. Otherwise, will
// take squares and normalize.
optional bool value_softmax = 50 [ default = false ];
// Optional. If set, will stop the epoch after that many batches. For testing
// puproses.
optional int32 epoch_max_batches = 51;
optional bool auto_mixed_precision = 57 [ default = false ];
// Pad spatial size to the nearest multiple of this. (e.g. 8 would result in
// 81 -> 88)
optional int32 pad_spatial_size_to_multiple = 58 [ default = 1 ];
// Random seed
optional int32 seed = 59 [ default = 0 ];
// If set, train model to predict all units for all powers in one sequence
optional bool all_powers = 61;
// If set, will mix-in single power batches with this chances.
// chances = 1.0 means that we do 50-50 allpower and singlepower.
optional float all_powers_add_single_chances = 81;
optional float all_powers_add_double_chances = 82;
oneof maybe_power_conditioning { PowerConditioning power_conditioning = 83; }
// If set, the model will support extra input for orders to condition on.
optional bool with_order_conditioning = 84;
// Linear learning rate warmup for this many epochs at the start.
optional int32 warmup_epochs = 62;
// If set, wandb logging will be enabled.
optional Wandb wandb = 63;
// Setting either of these to false will entirely omit the parts of the model
// that are no longer necessary, and these parts will not be present in the
// checkpoint. Attempting to forward() for the omitted output for a model that
// doesn't have it will raise an error.
optional bool has_policy = 64 [ default = true ];
optional bool has_value = 65 [ default = true ];
// Controls which version of input encodings we use. See
// dipcc/dipcc/cc/encoding.h
optional int32 input_version = 66 [ default = 1 ];
// If true, permute the 7 powers in the input encoding randomly.
optional bool training_permute_powers = 67;
// If true, then the model provides player ratings as input.
optional bool use_player_ratings = 68 [ default = false ];
// Override the power emb size for the policy decoder
// Set to 0 to entirely omit.
optional int32 power_emb_size = 69 [ default = 60 ];
// Use new V2 base_strategy_model - drops support for old encoders and some
// other outdated parameters.
optional bool use_v2_base_strategy_model = 70 [ default = false ];
// Support the first N of the scoring systems in
// fairdiplomacy.models.base_strategy_model.base_strategy_model.Scoring
optional int32 num_scoring_systems = 71 [ default = 1 ];
// If true, then the model uses the year as input.
optional bool use_year = 79 [ default = false ];
// If true, then the model accepts an agent power as input.
// This can be used for some RL purposes for training a model
// to be asymmetric with respect to agent vs others.
optional bool use_agent_power = 80 [ default = false ];
// Value loss use cross entropy instead of MSE
optional bool value_loss_use_cross_entropy = 85 [ default = false ];
message TransformerDecoder {
message Transformer {
// Number of attention heads. Must evenly divide inter_emb_size * 2.
optional int32 num_heads = 1;
// Number of intermediate channels for the feed-forward component
optional int32 ff_channels = 2;
// Number of transformer layers in the encoder
optional int32 num_blocks = 3;
// Channelwise dropout probablity.
optional float dropout = 4;
// Layerwise dropout probability.
optional float layerdrop = 5 [ default = 0.0 ];
optional string activation = 7 [ default = "relu" ];
optional bool extra_normalization = 8;
}
optional int32 inner_dim = 1;
optional Transformer transformer = 2;
optional bool featurize_input = 3;
optional bool featurize_output = 4;
optional bool share_input_output_features = 7 [ default = true ];
optional bool explicit_location_input = 5;
optional bool positional_encoding = 6;
}
oneof maybe_transformer_decoder {
TransformerDecoder transformer_decoder = 72;
}
// DEPRECATED or moved to NoPressDatasetParams
optional string data_cache = 500 [ deprecated = true ];
optional string metadata_path = 501 [ deprecated = true ];
optional float min_rating_percentile = 502 [ deprecated = true ];
optional float val_set_pct = 9 [ deprecated = true ];
optional int32 max_games = 30 [ default = -1, deprecated = true ];
optional bool learnable_A = 16 [ deprecated = true ];
optional bool fill_missing_orders = 17 [ default = false, deprecated = true ];
optional bool learnable_alignments = 18 [ deprecated = true ];
optional bool avg_embedding = 19 [ deprecated = true ];
optional bool use_global_pooling = 53 [ deprecated = true ];
optional bool graph_decoder = 32 [ default = false, deprecated = true ];
optional bool separate_value_encoder = 52 [ deprecated = true ];
// If specified, requeue on slurm signal
optional bool use_default_requeue = 900 [ default = false ];
optional Launcher launcher = 1000;
}
message PressTrainTask {
optional TrainTask base_strategy_model_train_params = 1;
// Parlai agent file
optional string parlai_agent_file = 2;
// Glob of raw message_chunks
optional string message_chunks = 3;
// Combine LSTM embedding size
optional int32 combine_emb_size = 4;
// Combine LSTM Num Layers
optional int32 combine_num_layers = 5;
// Parlai Encoder Model Path
optional string encoder_model_path = 6;
// If enabled, trains a vanilla BaseStrategyModel model
optional bool no_dialogue_emb = 7;
// Launcher
optional Launcher launcher = 1000;
}
// A dummy task to use in tests.
message TestTask {
message SubMessage { optional int32 subscalar = 1 [ default = -1 ]; }
message ComplexSubmessageWithIncludes {
repeated Include includes = 1;
optional SubMessage sub = 2;
}
enum SomeEnum {
ZERO = 0;
ONE = 1;
};
optional float scalar = 1 [ default = -1 ];
optional float scalar_no_default = 2;
optional SubMessage sub = 3;
optional SubMessage sub2 = 4;
optional SomeEnum enum_value = 5 [ default = ZERO ];
optional SomeEnum enum_value_no_default = 6;
oneof oneof_field {
int32 oneof_value1 = 7;
int32 oneof_value2 = 8;
SubMessage oneof_value3 = 9;
}
optional ComplexSubmessageWithIncludes complex_sub = 10;
map<string, int32> some_map = 11;
optional bool bool_field_no_default = 12;
optional bool bool_field_with_default_false = 13 [ default = false ];
optional bool bool_field_with_default_true = 14 [ default = true ];
oneof maybe_enum { SomeEnum the_enum = 15; }
optional Launcher launcher = 100;
}
message ExploitTask {
// BaseStrategyModel ckpt to initialize both the blueprint and the training
// agents.
//
// For PG mode only model_path is used. This model is used for both policy and
// value.
//
// For AlphaDip mode both flags are used. If only model_path is given all
// losses will be applied to this model. If both are provided, value losses
// are applied to value_model_path and policy losses to model_path.
//
// By default rollout workers will use models specified in the
// search_rollout.agent.searchbot.*model_path.
// To send trained models to rollouters, use
// search_rollout.extra_params.use_trained_{policy,value}.
//
// Eval workers will use a trained value model iff value loss is on.
// Eval workers will use a trained policy model iff policy loss is on.
optional string model_path = 1;
optional string value_model_path = 22;
// Optional. If set, will load from this full ckpt (requeue.ckpt file in exp
// dir).
optional string requeue_ckpt_path = 25;
// Weight of critic loss in total loss.
// For AlphaDip setting this to zero disabled value loss
optional float critic_weight = 2 [ default = 1.0 ];
// Weight of surrogate entropy loss that should push action-level entropy up.
optional float sampled_entropy_weight = 10;
// Optional. If set, weights of the exploit agent will be randomly
// initialized.
optional bool reset_agent_weights = 8;
optional Trainer trainer = 6;
// Optional. If positive, will set random seed for torch on the main process.
optional int32 seed = 9 [ default = -1 ];
optional Wandb wandb = 57;
// ############### SEARCH ONLY FLAGS
// Search mode. Weight of the XE between the netwrork's policy and the search
// policy.
optional float search_policy_weight = 13;
// Apply policy loss every this many batches. Set to < 1.0 to speed up
// training.
optional float search_policy_update_prob = 16 [ default = 1.0 ];
// Apply value loss every this many batches. Set to < 1.0 to train
// policy more often than the value.
optional float value_update_prob = 85 [ default = 1.0 ];
// Search loss will be skipped for actions that have at least this
// probability.
optional float search_policy_max_prob_cap = 29 [ default = 1.0 ];
// Must be always true. Using online targets is not supported anymore.
optional bool bootstrap_offline_targets = 15;
// Num gpus to use. Search only.
optional int32 num_train_gpus = 18 [ default = 1 ];
message SearchEvTarget {
optional float temperature = 1;
optional bool use_softmax = 2 [ default = true ];
}
oneof maybe_search_ev_loss { SearchEvTarget search_ev_loss = 21; }
// Use a faster method of multiGPU training with processes instead of threads
optional bool use_distributed_data_parallel = 26 [ default = false ];
// If true, permute the 7 powers in the input encoding randomly.
// Only works if the underlying model is input version >= 2
optional bool training_permute_powers = 27;
// For all-power modes we can set use this to augment the policy.
oneof maybe_power_conditioning { PowerConditioning power_conditioning = 83; }
optional float single_power_chances = 84;
optional float six_power_chances = 86;
// ############### PG ONLY FLAGS
// Reward discounting.
optional float discounting = 7 [ default = 1.0 ];
// Weight of entropy loss in total loss.
optional float entropy_weight = 3 [ default = 0.0 ];
message Optimization {
// Deprecated. Use LR within optimizer.
optional float lr = 1;
// Optional (but highly recommended). Gradient clipping.
optional float grad_clip = 2;
// Optional. Warmup LR from zero to normal linearly.
optional int32 warmup_epochs = 3;
// Optional. Set lr to 0 by this number of epochs. Cannot be less than
// number of epochs.
optional int32 cosine_decay_epochs = 6;
// Optional. Multiply LR by factor every certain number of epochs
optional int32 step_decay_epochs = 7;
optional float step_decay_factor = 8;
message WarmupDecay {
optional int32 warmup_epochs = 1;
optional int32 decay_epochs = 2;
optional float final_decay = 3 [ default = 0.1 ];
}
oneof maybe_warmup_decay { WarmupDecay warmup_decay = 9; }
message AdamOptimizer {
optional float lr = 1;
// If set, will use AdamW.
optional float weight_decay = 2;
}
message SgdOptimizer {
optional float lr = 1;
optional float momentum = 2;
optional float weight_decay = 3;
}
oneof optimizer {
AdamOptimizer adam = 4;
SgdOptimizer sgd = 5;
};
}
optional Optimization optimizer = 4;
// Only for AlphaDip.
oneof maybe_value_optimizer { Optimization value_optimizer = 200; }
message Rollout {
// Required. Max number of steps to do in the rollout.
optional int32 rollout_max_length = 1;
// Optional. How many parallel games to batch within single rollout.
optional int32 rollout_batch_size = 2 [ default = 1 ];
// Optional. How many rollout proccesses to run. If zero or negative, will
// run rollouts in the main process.
optional int32 num_rollout_processes = 4 [ default = 1 ];
// Optional. If > 0, will save games with this stride.
optional int32 dump_games_every = 5 [ default = 0 ];
// Optional. Max batch size in postman inference processes.
optional int32 inference_batch_size = 6;
// Optional. Wait at least this number of seconds before loading new model
// in the inference worker. By default check before every forward.
optional int32 inference_ckpt_sync_every = 14;
// Required. The size of the produces batches. That what the training loop
// will receive.
optional int32 batch_size = 7;
// Optional. How much adjancent batches overleave. Note that default value
// (1) means that each action frame will be used exactly once as last item
// in a batch is remove in impala.
optional int32 batch_interleave_size = 8 [ default = 1 ];
// Optional. If set, the batches will concatenate rollouts until batch_size
// is reached, instead of following it exactly.
optional bool do_not_split_rollouts = 9;
optional bool single_rollout_gpu = 11;
optional int32 server_procs_per_gpu = 12 [ default = 1 ];
message Reward {
// Required. Name of the score metric from
// fairdiplomacy.utils.game_scoring.
optional string score_name = 1;
// Optional. Penalty for each move to encourage shorter games.
optional float delay_penalty = 2;
// Optional. If set, then the reward will be a difference between the
// score before the action and after the action.
optional bool differential_reward = 3;
// Optional. Hacky way to hardcore alliances.
// 0 -> no alliances
// 1 -> FRA, ENG, GER vs all.
// 2 -> FRA, ENG, GER, IT vs all.
// 3 -> FRA, ENG, RUS vs all.
// 4 -> FRA vs all.
optional int32 alliance_type = 4;
}
// Required. How to compute rewards.
optional Reward reward = 10;
// Optional. Whether do self plat instead of exploitability.
optional bool selfplay = 13;
// Required in selfplay. Number of rollout proccess to do eval rollouts
// against the supervised model. These rollouts are ingored for training.
// These workers are subtracted from num_rollout_processes.
optional int32 num_eval_rollout_processes = 15;
// Required. Temperature for the oponent agent.
optional float blueprint_temperature = 16;
// Optional. If set, will override global model_path.
optional string blueprint_model_path = 20;
// Optional. If set, will stop rollout once the explout agent/agents is out.
optional bool fast_finish = 17;
// Optional. If provided, rollouts and evals with start from these games.
// The file is expected to contain pathes to game.jsons one per line.
// Additionally, one can specify a phase after game path:
// <game_json_path> [ ":" <phase> ]
// Alternatively, one can provide a path to game.json file here to run on a
// single game with optional phase.
optional string initial_games_index_file = 18;
// Optional. If set, will use only this number of cores. Last cores are
// reserved assuming machine has 80 cores.
optional int32 num_cores_to_reserve = 19;
}
optional Rollout rollout = 5;
message SearchRollout {
// Required. Params of the CFR agent. To simplify includes allowing
// arbitrary agent here, but code will only accept SearchBot agent.
optional Agent agent = 1;
oneof maybe_eval_agent {
// A version of the agent to use for h2h, test_sit, but not self-play
// games.
Agent eval_agent = 2;
}
// // Required. Max number of steps to do in the rollout.
// optional int32 rollout_max_length = 2;
// Optional. How many rollout proccesses to run per GPU. If zero or
// negative, will run single rollout process.
optional int32 num_workers_per_gpu = 4 [ default = 1 ];
// Required. How many consequetive phases are batched together. This affects
// batch (num phases in batch = chunk_length * batch_size) and how
// frequent workers send updates to the trainer (once chunk_length phases
// are collected).
optional int32 chunk_length = 6;
// Required. The size of the produces batches. This is multipliplied by
// chunk_length, see above.
optional int32 batch_size = 7;
// Optional. If set, the batches will concatenate rollouts until batch_size
// is reached, instead of following it exactly.
optional bool do_not_split_rollouts = 9;
optional int32 server_procs_per_gpu = 12 [ default = 1 ];
// Required in selfplay. Number of rollout proccess to do eval rollouts
// against the supervised model. These rollouts are ingored for training.
// These workers are subtracted from num_rollout_processes.
// optional int32 num_eval_rollout_processes = 15;
// Optional. If provided, rollouts and evals with start from these games.
// The file is expected to contain pathes to game.jsons one per line.
optional string initial_games_index_file = 18;
// Optional. If set, will use only this number of cores. Last cores are
// reserved assuming machine has 80 cores.
optional int32 num_cores_to_reserve = 19;
// Optional. How verbose logging for rollout processes.
// 0 - no logging
// 1 - process 0 writes INFO and everyone else writes WARNINGS
// 2 - everyone writes INFO
optional int32 verbosity = 20 [ default = 1 ];
message ExtraRolloutParams {
// Optional. If positive will sample from all plausible actions instead of
// doing CFR with this probability.
optional float explore_eps = 2 [ default = 0.0 ];
// Optional. Custom explore constants for first 2 phases in the game.
optional float explore_s1901m_eps = 14 [ default = 0.0 ];
optional float explore_f1901m_eps = 15 [ default = 0.0 ];
// Optional. Put each rollout this many times to a queue. For debugging.
optional int32 fake_gen = 3 [ default = 1 ];
// Optional. If set, will decided whether to explore for each agent
// independently.
optional bool independent_explore = 4;
// Optional. If set, will choose explore step first and then an agent that
// doesn't deviate at this step, and deviate for the others.
optional bool explore_all_but_one = 9;
// Optional. If set, will not play after this year.
optional int32 max_year = 5;
// Optional. If set, will set max_year on rollour workers randomly from
// [1902, max_year].
optional bool randomize_max_year = 6;
// Optional. Simplex-discounting factor.
optional float discounting = 8 [ default = 1.0 ];
// Optional. If set, will use CFR-based EVs of the next state as targets.
optional bool use_ev_targets = 19;
// Optional. If set, will collect CFR policies and send to the trainer.
// This flag should be set programatically by other flags.
optional bool collect_search_policies = 10;
// Optional. If set, will collect CFR policies and send to the trainer.
// This flag should be set programatically by other flags.
optional bool collect_search_evs = 18;
// Optional. If true, will use average utilities from CFR run. If false,
// will recompute EVs. False only works for 2p when average policy is
// used.
optional bool use_cfr_evs = 23;
// Optional. If set, will use the model under training as a policy, i.e.,
// to generate plausible actions.
optional bool use_trained_policy = 11;
// Optional. If set, will use the model under training for value
// estimation.
optional bool use_trained_value = 21 [ default = true ];
// Optional. If set, use completely random policy (uniform sampling) for
// plausible orders.
optional bool random_policy = 13;
// Optional. Run DoubleOracle in this percentage of games. Only value if
// `do` is provided. Unless allow_policy_tragets_without_do is true.
optional float run_do_prob = 17 [ default = 1.0 ];
// Optional. If set, will use as training targets for policy steps with
// and without DO.
optional bool allow_policy_tragets_without_do = 26;
oneof maybe_do { DoubleOracleExploration do = 16; }
// Optional. If set, will prune this much from the end of the
// episode for games that don't finish.
// This option should probably be considered deprecated
// since using it to control the effective rollout length of
// the training distribution will create weird correlationns with
// whether the game ended or not. See instead
// {min,max}_max_episode_movement_phases and max_training_episode_length.
optional int32 min_undone_episode_length = 27 [ default = 0 ];
message PlayBlueprint {
optional float temperature = 1 [ default = 1.0 ];
optional float top_p = 2 [ default = 1.0 ];
}
// Optional. If set, don't do search, just play blueprint.
oneof maybe_always_play_blueprint {
PlayBlueprint always_play_blueprint = 28;
}
// Optional. If set, truncate any self-play episode after this many
// turns if it hasn't already finished. Value net will be used to score
// the final position.
optional int32 max_episode_length = 31;
// For game json paths, randomize the starting phase within the game
optional bool sample_game_json_phases = 32;
// Value of has_press flag for both the training and quering.
optional bool default_has_press = 40 [ default = false ];
// Randomly half of the time set has_press to be true for the agent.
optional bool randomize_has_press = 33;
// Randomize sos vs dss scoring
optional bool randomize_sosdss = 34;
// If specified, each epsisode randomly draw a number K in the range
// [min_max_episode_movement_phases, max_max_episode_movement_phases]
// inclusive and stop as soon the K+1th movement phase is reached.
// (So episode will have at most K search policies for movement phases).
optional int32 min_max_episode_movement_phases = 35;
optional int32 max_max_episode_movement_phases = 36;
// If specified, only train on the first this many phases of each episode.
// If not use_ev_targets, will still use the untruncated epsiode value