-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagents.proto
1028 lines (795 loc) · 37.8 KB
/
agents.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright (c) Meta Platforms, Inc. and affiliates.
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
*/
// Format this file with clang after editing:
// clang-format-8 conf/*.proto -i
syntax = "proto2";
package fairdiplomacy;
import public "conf/common.proto";
message RandomAgent {}
message BaseStrategyModelAgent {
// Required. Path to BaseStrategyModel checkpoint.
optional string model_path = 1;
// Required. Softmax temperature
optional float temperature = 2;
// Optional. Share of probability mass to keep for sampling.
optional float top_p = 3 [ default = 1.0 ];
// Optional. Predict orders for a full-press (vs no-press) game.
optional bool has_press = 4 [ default = false ];
// Optional. Use FP16 at inference time for everything.
optional bool half_precision = 5 [ default = false ];
// CUDA device to use. Set to -1 to use CPU.
optional int32 device = 6 [ default = 0 ];
}
message BaseStrategyModelRollouts {
optional uint32 n_threads = 1 [ default = 70 ];
// Temperature used for rollouts
optional float temperature = 2 [ default = 1.0 ];
// Nucleus ratio used for rollouts. During nucleus sampling only
// the smallest subset of actions that has probability at least top_p is
// considered. All other actions are never sampled.
optional float top_p = 3 [ default = 1.0 ];
// Maximum rollout length in MOVEMENT PHASES heuristically evaluating the game
optional int32 max_rollout_length = 4 [ default = -1 ];
// # of rollouts to run in parallel for each possible action
optional uint32 average_n_rollouts = 5 [ default = 1 ];
// Optional float 0 - 1 to mix in raw sum of squares ratio
optional float mix_square_ratio_scoring = 6 [ default = 0 ];
// Optional. Enables optimization on clonning dipcc.Game objects.
optional bool clear_old_all_possible_orders = 7 [ default = false ];
// Optional. A string of "year,prob;year,prob;..."
// "year,prob" indicates that at the start of SPRING of that year or later
// years there is a probability of the game ending instantly and being scored
// as-is, therefore we average the rollout with prob * raw score whenever the
// rollout reaches a new year.
optional string year_spring_prob_of_ending = 8;
// DEPRECATED (now computed automatically)
// Predict orders for a full-press (vs no-press) game.
optional bool has_press = 900 [ default = false ];
}
message PlausibleOrderSampling {
// Number of order-sets (actions) to consider at each step in search code
optional uint32 n_plausible_orders = 1;
// Optional, limit number of actions to consider as a ratio of # units
// # plausible actions = min(ceil(max_actions_units_ratio * #units),
// n_plausible_orders)
optional float max_actions_units_ratio = 2 [ default = -1 ];
// Optional. Excludes all-hold orders of length >=N from plausible orders.
optional int32 exclude_n_holds = 3 [ default = -1 ];
optional uint32 req_size = 4 [ default = 700 ];
// Optional. Batch size for sample queries.
// If not set, will equal to req_size.
optional uint32 batch_size = 5 [ default = 0 ];
// Optional. If true, then take the first n_plausible_orders
// order-sets returned by parlai, rather than the top N.
optional bool parlai_take_first = 6 [ default = false ];
// Optional. If true, candidate orders are generated by parlai, but then
// "rescored" by parlai to produce the blueprint policy.
optional bool do_parlai_rescoring = 7 [ default = false ];
// Optional. If non-zero, only rescore the top N orders, discard the rest.
optional uint32 n_rescore = 8 [ default = 0 ];
optional uint32 parlai_req_size = 9 [ default = 200 ];
optional uint32 parlai_batch_size = 10 [ default = 20 ];
// Optional. If rescoring with parlai, consider the top frac*limit
// orders from base_strategy_model and add them to the plausible set if
// missing
optional float augment_base_strategy_model_frac = 11 [ default = 0 ];
// Optional. If true and multiple GPU available, will paralelize over GPUs.
// This may slow down smaller runs as it disables parlai cache and so the
// models will be reloaded on each agent initialization.
optional bool allow_multi_gpu = 12;
};
message DoubleOracleExploration {
// Required. How many iterations to do. Use at least as many as powers you
// have.
optional int32 max_iters = 1;
// Optional. Min EV diff to choose a new action over existing one.
optional float min_diff = 2 [ default = 0.0 ];
// Optional. Min EV relative diff in percentage. Must be in [1.0, 100].
optional float min_diff_percentage = 8 [ default = 0.0 ];
// Optional. Maximum number of opponent actions to consider for EV
// computation. If not set, will be infinite.
optional int32 max_op_actions = 4;
// Optional. If set, will use the full policy of the opponent to compute
// Q(s, a). Only supported if there is a single opponent. If false, will
// sample some number of actions for the opponent.
optional bool use_exact_op_policy = 10 [ default = true ];
// Optional. If set, a couple of games will be considered identical if the
// final board state is the same. This flag should be relatively save for 2p
// games.
optional bool use_board_state_hashing = 5;
// Optional. If set, will try to make for each power in random order rather
// than in lexiographic order aka in order of POWERS list.
optional bool shuffle_powers = 6;
// Optional. If set, will regenerate the set of orders after each DO
// iteration. Otherwise will generate set of orders once.
optional bool regenerate_every_iter = 7;
// Optional. If set, will only run
optional bool only_agent_power = 9;
// Optional. If set, then the algeorithm will do less iterations if worked
// more than this number of seconds.
optional int32 max_seconds = 11 [ default = 0 ];
// Optional. If set, will redefine number iterations used to compute
// equilibrium in the search agent.
optional int32 n_rollouts = 12;
message Generation {
// Required. How many random actions to sample at each iteration.
optional int32 max_actions = 1;
message UniformGenerator {
// Optional. Consider orders that support foreign units.
optional bool allow_foreign_supports = 1 [ default = true ];
}
message ColumnGenerator {
// Optional. If set, will use this model instead of one in the agent.
optional string model_path = 1;
// Optional. Consider orders that support foreign units.
optional float temperature = 2 [ default = 1.0 ];
}
message BaseStrategyModelGenerator {
// Optional. If set, will use this model instead of one in the agent.
optional string model_path = 1;
// Optional. Consider orders that support foreign units.
optional float temperature = 2 [ default = 1.0 ];
// Optional. Prediction order: default or random.
optional string location_order = 3 [ default = "default" ];
}
message LocalUniformGenerator {
// Required. How many action to take from the policy and try to
// modify.
optional int32 num_base_actions = 1;
// Optional. If set, will sample actions from the policy. Otherwise
// will take top actions (default).
optional bool use_sampling = 2;
// Optional. Whether to use blueprint policy (default) or search policy to
// select base actions.
optional bool use_search_policy = 3;
// Optional. Replace supports with hold in the base action, if the action
// is not coordinated.
optional bool fix_uncoordinated_base = 4;
// Optional. If set, will search not only over locations adjancent to
// existing units, but over all locations. In other words, this will group
// units by points of influence.
optional bool with_holes = 5;
}
oneof maybe_uniform { UniformGenerator uniform = 2; }
oneof maybe_column { ColumnGenerator column = 3; }
oneof maybe_base_strategy_model {
BaseStrategyModelGenerator base_strategy_model = 4;
}
oneof maybe_local_uniform { LocalUniformGenerator local_uniform = 5; }
}
optional Generation generation = 3;
}
message SearchBotAgent {
// Required. Path to BaseStrategyModel checkpoint. This model is always used
// to select plausbile actions; it's also used for rollouts and value
// computation, unless these are specified expilicitly.
optional string model_path = 1;
// Number of postman server processes to launch
optional uint32 n_server_procs = 3;
// Distribute server processes over multiple GPUs
optional uint32 n_gpu = 4;
// Model server maximum batch size
optional uint32 max_batch_size = 5 [ default = 700 ];
// Number of CFR iterations
optional int32 n_rollouts = 6 [ default = -1 ];
// CUDA device to use, if > 0
optional int32 device = 13 [ default = 0 ];
// Optional, if True, sample from final iter instead of average iter
optional bool use_final_iter = 18 [ default = true ];
// Optional: separate model path to compute the value function. If not set
// model_path is used.
optional string value_model_path = 19;
// Optional: separate model path for rollouts. If not set model_path is used.
optional string rollout_model_path = 30;
// Optional host:port for value model server
optional string use_value_server_addr = 20;
// Optional, if >0 then play BP strategy for this many iters
optional int32 bp_iters = 23 [ default = 0 ];
// Optional, if >0 then play BP strategy for this many iters
optional float bp_prob = 24 [ default = 0 ];
// Optional, if >0, then at each rollout step will use the current
// model-predicted value as this fraction of the final estimate
// (i.e. exponentially decaying effect of rollouts of increasing length)
optional float rollout_value_frac = 25 [ default = 0 ];
optional bool cache_rollout_results = 26 [ default = false ];
// Optional. If set, will compute cache for the all possible joint actions
// before running CFR. The procomputation will be skippef if have more than 2
// alive powers.
optional bool precompute_cache = 43 [ default = false ];
// Debugging for situation check tests only
// Use the seed for plausible actions, then pick a random seed for rollouts
optional bool reset_seed_on_rollout = 27 [ default = false ];
// If this power is a loser, i.e. its action values are less
// than this value, then this power plays blueprint.
optional float loser_bp_value = 28 [ default = 0 ];
// First CFR iteration to start following loser_bp_value
// (we need a few iterations to accurately estimate the action values)
optional float loser_bp_iter = 29 [ default = 64 ];
// If true, then exploitability is calculated at regular intervals.
optional bool enable_compute_nash_conv = 32 [ default = false ];
// Optional. If set, than this model will be used for plausible orders.
optional ParlaiModel parlai_model = 34;
// Optional. If set, then the agent will do less iterations if worked more
// than this number of seconds.
optional int32 max_seconds = 37 [ default = 0 ];
// Optional. By default, use optimistic cfr
optional bool use_optimistic_cfr = 38 [ default = true ];
// Rollout parameters
optional BaseStrategyModelRollouts rollouts_cfg = 39;
message PlausibleOrderAugmentation {
message RandomExploration {
// Optional. How many top plausible orders always to keep. Has priority
// over max_actions_to_drop.
optional int32 min_actions_to_keep = 1 [ default = 0 ];
// Optional. Make sure we replace at least this number of orders.
optional int32 max_actions_to_drop = 2 [ default = 0 ];
}
oneof augmentation_type {
// Fills plausible orders up to limit with random diverse actions.
RandomExploration random = 1;
// Iteratively tries to find actions that has higher EV from a pool of
// random actions.
DoubleOracleExploration do = 2;
}
}
// Defines plausible order agumentation.
optional PlausibleOrderAugmentation order_aug = 40;
optional PlausibleOrderSampling plausible_orders_cfg = 41;
// Optional. Use FP16 at inference time for everything.
optional bool half_precision = 44 [ default = false ];
// Optional. Dialogue parameters
oneof maybe_dialogue { ParlaiDialogue dialogue = 50; }
optional ParlaiModel parlai_model_orders = 51;
// Optional. If true, generate messages based on CFR-predicted orders
optional bool cfr_messages = 52 [ default = false ];
enum PolicyToPseudo {
// sample an action from the policy
SAMPLE = 0;
// take the argmax action
ARGMAX = 1;
};
// This section configures different strategies on how to generate message
// pseudo-orders for other powers. If strategy=NONE, then pseudo-orders are
// sampled from the computed equilibrium.
// Other strategies described in the enum.
message BilateralDialogue {
// heyhi can only handle enum overrides if the enum is defined in the
// same subcfg as it's used. See conf.py:208
enum BilateralDialogueStrategy {
// Sample pseudo-orders for all powers from the computed equilibrium
NONE = 0;
// From the set of opponent actions where blueprint probability >=
// min_order_prob, pick the action which maximizes agent_power's value
EXPLOIT = 1;
// From the set of opponent actions where (average) equilibrium
// probability >= min_order_prob, pick the action which maximizes
// agent_power's value
BEST_EQ = 2;
// From the set of opponent actions where population
// probability >= min_order_prob, pick the action which maximizes
// agent_power's value
BEST_POP = 3;
};
optional BilateralDialogueStrategy strategy = 1 [ default = NONE ];
// The minimum order probability (in either the BP or equilibrium, depending
// on strategy) that will be considered for pseudo-orders.
optional float min_order_prob = 2 [ default = 0.02 ];
}
optional BilateralDialogue bilateral_dialogue = 54;
// If specified, will simulate this SearchBot agent end-to-end with the
// specified number of samples, and will compute a best response to their
// average policy. Exploited agent MUST have use_final_iter = false.
oneof maybe_exploited_searchbot_cfg {
SearchBotAgent exploited_searchbot_cfg = 57;
}
optional int32 exploited_agent_num_samples = 58 [ default = 1 ];
optional string exploited_agent_power = 59;
// Set to true to enable logging of CFR average policies and utilities on
// iterations other than the final iteration.
optional bool log_intermediate_iterations = 60 [ default = false ];
// Set to true to enable logging of bilateral CFR values, showing the effects
// of other powers' actions on the agent_power's utility.
optional bool log_bilateral_values = 61 [ default = false ];
// Optional. If true, enables setting player ratings
optional bool set_player_rating = 62 [ default = false ];
// Optional. Player rating to be used. Only used when set_player_rating
// is set to true
optional float player_rating = 63;
message QRE {
// QRE Hedge parameter eta: roughly, the learning rate. We believe that 10
// is a reasonable value.
optional float eta = 1 [ default = 10.0 ];
// QRE parameter lambda: roughly, the temperature of the policy, in units of
// SoS score
optional float qre_lambda = 2 [ default = 0.0 ];
enum QRETargetPi {
// Uses the uniform policy as the target policy for QRE
UNIFORM = 0;
// Uses blueprint policy as the target policy for QRE
BLUEPRINT = 1;
}
// QRE target pi to be used
optional QRETargetPi target_pi = 3 [ default = UNIFORM ];
// If set, uses agent_qre_lambda for agent
// and qre_lambda for opponents
optional float agent_qre_lambda = 4;
// Multiply the weight on the entropy part of KL term by this.
optional float qre_entropy_factor = 5 [ default = 1.0 ];
// If set, uses agent_qre_entropy_factor for the agent and
// qre_entropy_factor for opponents
optional float agent_qre_entropy_factor = 6;
}
// Optional. Uses QRE instead of CFR
oneof maybe_qre { QRE qre = 64; }
// Optional. If set, the plausible actions will be rescored with model below
// to get a blueprint policy.
optional string rescoring_blueprint_model_path = 65;
// Optional. If true, do an incremental BP update after each message.
// Only relevnt if cfr_messages=true
optional bool do_incremental_search = 66;
// If true, then always use your most likely orders as pseudo-orders,
// even if PPO model says they are unlikely.
optional bool use_truthful_pseudoorders = 67 [ default = false ];
optional bool use_truthful_pseudoorders_recipient = 68 [ default = false ];
// If one of use_truthful_pseudoorders or use_truthful_pseudoorders_recipient
// is set, we don't need to compute probabilities of all pseudo-orders for
// either us or the recipient. But we still computing them for logging
// purposes. Turning the flag all will remove these logs, but will speed
// things up.
optional bool skip_policy_evaluation_for_truthful_pseudoorders = 69;
optional bool use_greedy_po_for_rollout = 70;
message BRCorrBilateralSearch {
// whether use pair search in pseudo order generation
optional bool enable_for_pseudo_order = 1 [ default = false ];
// use br to correlated bilateral search to generate final orders
optional bool enable_for_final_order = 3 [ default = false ];
// number of conditional joint action samples used to estimate evs for
// actions given a pair of power
optional int32 bilateral_search_num_cond_sample = 4 [ default = 10 ];
// num of samples to compute best response against correlated bilateral br
optional int32 br_num_sample = 5 [ default = 1000 ];
// whther to use all power model to compute p_joint
// e.g. if set to false, the weights for opponent joint actions should all
// be 1
optional bool use_all_power_for_p_joint = 6 [ default = true ];
// a small probability to be added to the p_joint(a1, a2, ..., a6) and
// prod_i p_bp(ai) to prevent the weight from overflowing
optional float joint_action_min_prob = 7 [ default = 0 ];
// coefficient to regularize br policy towards bp, ev_a + lambda log(bp_a)
optional float br_regularize_lambda = 8 [ default = 0 ];
// clip unnormalized weight (i.e. importance ratio) with [min, max]
optional float min_unnormalized_weight = 9 [ default = -1 ];
optional float max_unnormalized_weight = 10 [ default = -1 ];
// this should be specified by searchbot.model_path
optional string all_power_model_path = 101 [ deprecated = true ];
};
// Optional. If set, use best response to correlated bilateral search
// to generate final order and maybe pseudo orders
oneof maybe_br_corr_bilateral_search {
BRCorrBilateralSearch br_corr_bilateral_search = 71;
};
message MessageSearch {
// Number of messages to (maybe) sample
optional int32 n_messages = 1 [ default = 1 ];
// Absolute score differential threshold
optional float max_score_diff_threshold = 5 [ default = 0.007 ];
// Relative score differential threshold
optional float max_rel_score_diff_threshold = 6 [ default = 0.1 ];
enum MessageSearchStrategy {
// Dummy strategy; returns random message
NONE = 0;
// Selects best message
BEST = 1;
// Selects message using softmax with temperature `softmax_temperature`
SOFTMAX = 2;
// Selects random message among the top `filter_top_k`
FILTER = 3;
}
optional MessageSearchStrategy strategy = 7 [ default = BEST ];
// Only applies for SOFTMAX strategy.
optional float softmax_temperature = 8 [ default = 0.003 ];
// Only applies for FILTER strategy.
optional int32 filter_top_k = 9 [ default = 5 ];
}
// Optional. If set, perform message search when generating messages
oneof maybe_message_search { MessageSearch message_search = 72; }
// Has no effect.
optional bool use_predicted_final_scores = 8 [ deprecated = true ];
// Has no effect.
optional bool postman_sync_batches = 11 [ deprecated = true ];
// Has no effect.
optional string use_server_addr = 12 [ deprecated = true ];
// Has no effect.
optional PolicyToPseudo policy_to_pseudo = 53 [ deprecated = true ];
// Has no effect.
optional int32 n_message_search = 55 [ default = 0, deprecated = true ];
// Has no effect.
optional string message_search_reply = 56 [ deprecated = true ];
}
message BQRE1PAgent {
// SearchBotAgent configurations that will be inherited to initialize the
// BQRE1PAgnet
optional SearchBotAgent base_searchbot_cfg = 1;
// Number of player types we will be modelling
optional int32 num_player_types = 2;
// Current agent player type
// Note: agent_type=1 will have the lowest lambda
// and thus would be the strongest player
optional int32 agent_type = 3;
message PlayerTypes {
message Policy {
// Leave both fields blank to indicate no-rescoring.
optional string model_path = 1;
optional string name = 2;
}
// LogUniformLambdas allows to create some number of player types that
// differ by lambda and maybe the policy we use to rescore.
//
// The following invariant must be true:
// num_player_types = num_lambda * max(len(policies), 1) +
// int(include_zero_lambda)
// If no such num_lambda exists, then an exception will be raised.
//
// If not `policies` provided then the default policy is used:
// - if target_pi=UNIFORM, then it will be uniform
// - otherwise the policy from plausible_orders as input to run_search is
// used (= rescoring_blueprint_model_path is set or model_path
// oterwise)
//
// If policies are provided, then they will be used to assign probabilities
// to the plausible_actions. It's still possible to use the incming
// probabilities by passing empty model_path.
message LogUniformLambdas {
optional double min_lambda = 1;
optional double max_lambda = 2;
repeated Policy policies = 3;
// If true, will include a special type with lambda=0. Note 0-lambda type
// will be added only for one policy, as the policy doesn't matter for
// such lambda. This type will be the first type.
optional bool include_zero_lambda = 5;
}
oneof player_types { LogUniformLambdas log_uniform = 1; }
}
// The player types to use.
optional PlayerTypes player_types = 9;
// if true, then the agent type is public, meaning it has a singleton
// distribution from the beginning of the game.
optional bool agent_type_is_public = 8 [ default = false ];
// Raise all lambdas to this power and scale by this value in 1901.
optional float pow_lambdas_1901 = 10;
optional float scale_lambdas_1901 = 11;
// Raise all lambdas to this power and scale by this value in 1901 spring.
// Takes precedence over pow_lambdas_1901 and scale_lambdas_1901 for 1901
// spring.
optional float pow_lambdas_1901_spring = 12;
optional float scale_lambdas_1901_spring = 13;
// Scale lambdas of everyone based on the sqrt(variance) of their values when
// simply uniformly sampling actions for everyone from the blueprint rolling
// out through one movement phase.
// Always adds a minimum epsilon*epsilon to the variance estimate.
// dynamic_lambda_stdev_baseline indicates the standard deviation at which
// no lambda scaling will be applied.
optional float dynamic_lambda_stdev_espilon = 14;
optional float dynamic_lambda_stdev_baseline = 15;
optional int32 dynamic_lambda_stdev_num_samples = 16 [ default = 0 ];
// DEPRECATED.
optional float lambda_min = 105 [ deprecated = true ];
optional float lambda_multiplier = 106 [ deprecated = true ];
optional bool do_bayesian_updates = 107 [ deprecated = true ];
optional int32 update_past_n_phases = 108 [ deprecated = true ];
}
message TheBestAgent {
// Required. Model to predict responses to our orders by other powers. Must be
// condition-aware all-power model. Will be use for everything else by default
optional string conditional_policy_model_path = 2;
// Optional. Plausible order model.
optional string plausible_model_path = 1;
optional string anchor_joint_policy_model_path = 20;
// Required. Model to compute values.
optional string value_model_path = 3;
// CUDA device to use, if > 0
optional int32 device = 13 [ default = 0 ];
optional bool half_precision = 44;
optional uint32 max_batch_size = 8 [ default = 512 ];
// Optional. If set this, batch size will be used to sample from conditional
// policy, otherwise max_batch_size is used.
optional uint32 conditional_max_batch_size = 14;
// Required. Number of BR samples to use to estimate EVs.
optional int32 num_br_samples = 4;
// Optional. Samples to draw from the independent-pikl to rescore.
// By default 10 * num_br_samples.
optional int32 num_importance_samples = 15;
optional float qre_lambda = 10;
optional float qre_eta = 11 [ default = 10000 ];
// If set, will compute probability P(a in plaisuble| a ~ joint policy).
optional bool compute_inside_ratio = 12 [ default = true ];
optional PlausibleOrderSampling plausible_orders_cfg = 5;
// Note, the agent will die if the max_rollout_length is not equal to 0.
optional BaseStrategyModelRollouts rollouts_cfg = 6;
// Use up to this number of samples from the joint policy to compute value.
optional int32 num_value_computation_samples = 17 [ default = 1000 ];
enum SamplingType {
INDEPENDENT_PIKL = 1;
JOINT_CONDITIONAL = 2;
HYBRID_JOINT_AND_INDEP_PIKL = 3;
}
optional SamplingType sampling_type = 16 [ default = INDEPENDENT_PIKL ];
// In HYBRID_JOINT_AND_INDEP_PIKL what is the probability to sample from the
// independent PiKL policy.
optional float hybrid_independent_pikl_prob = 18 [ default = 0.5 ];
// In HYBRID_JOINT_AND_INDEP_PIKL what is the temperature of sampling from the
// joint.
optional float hybrid_joint_temp = 19 [ default = 1.0 ];
}
message BRSearchAgent {
// Path to BaseStrategyModel checkpoint.
optional string model_path = 1;
// Model server maximum batch size
optional uint32 max_batch_size = 5 [ default = 700 ];
// If true, use model predicted final scores in heuristic evalauation
// If false, use current SC counts after max_rollout_length steps
optional bool use_predicted_final_scores = 8;
// CUDA device to use
optional int32 device = 13 [ default = 0 ];
// Optional: separate model path to compute the value function
optional string value_model_path = 14;
// Rollout params
optional BaseStrategyModelRollouts rollouts_cfg = 15;
optional PlausibleOrderSampling plausible_orders_cfg = 16;
}
message ReproAgent {
// Required, path to game.json file
optional string game_path = 1;
}
message ParlaiFlags {
// ------------------------------
// INFERENCE FLAGS
// This is a subset of flags related to model inference from:
// https://github.com/facebookresearch/ParlAI/blob/62a3f4546048997a80f05ce4b00c31e93cb694d1/parlai/core/torch_generator_agent.py#L383
// ------------------------------
// Optional. Inference type: beam, greedy, topk, nucleus, delayedbeam.
// Default: greedy.
optional string inference = 1;
// Optional. Beam size. Default: 1.
optional int32 beam_size = 2;
// K used in Top K sampling. Default: 10.
optional int32 topk = 3;
// p used in nucleus sampling. Default: 0.9
optional float topp = 4;
// Temperature to add during decoding. Default: 1.0;
optional float temperature = 5;
// Minimum beam length for decoding. Default is -1.
optional int32 beam_min_length = 6;
// Intra-sequence n-grams to block during decoding. Default is -1.
optional int32 beam_block_ngram = 7;
// N-gram length to block from the conference
optional int32 beam_context_block_ngram = 8;
// ------------------------------
// MISCELLANEOUS FLAGS
// ------------------------------
// GPU to set. Default is -1.
optional int32 gpu = 9;
// Set the player rating (1-5), if applicable to the model. Default is False.
optional int32 set_player_rating = 10;
// Only relevant to the silence classifier when `sample_classifier` is False;
// threshold for when to speak
optional float threshold = 11;
// Only relevant to the classifier; whether or not to sample
optional bool sample_classifier = 13 [ default = false ];
// Only relevant to silence classifier with sample_classifier=true.
// Rescales the speak probability by this multiplier. >1 to speak more.
optional float prob_speak_multiplier = 14 [ default = 1 ];
// Set the player chattiness (1-20), if applicable to the model (currently
// it's only applicable to the SILENCE classifier). Default is False.
optional int32 set_player_chattiness = 12;
// Use model parallel
optional bool model_parallel = 16 [ default = false ];
// Use special nucleus sampling
optional bool topp_special = 17 [ default = false ];
// For special nucleus sampling
optional float topp_special_threshold = 18 [ default = 0.9 ];
// Set pot type
optional string pot_type = 19 [ default = "Sum-of-squares" ];
// DEPRECATED -- DO NOT USE
optional int64 phase_minutes = 20 [ default = 1440 ];
// Nucleus sampling probability cut off
optional float probability_cutoff = 21 [ default = 0.0 ];
}
message ParlaiModel {
// Required
optional string model_path = 1;
// Optional
optional ParlaiFlags overrides = 2;
// Optional
optional string remote_addr = 3;
}
message ParlaiDiscriminativeNucleusModel {
// Required
optional string dialogue_model_file = 1;
// Required
optional string lm_model_file = 2;
// Optional
optional int32 dialogue_agent_gpu = 3 [ default = 0 ];
// Optional
optional int32 lm_agent_gpu = 4 [ default = 1 ];
// Optional
optional int32 label_truncate = 5 [ default = 50 ];
// Optional
optional float topp = 6 [ default = 0.5 ];
// Optional
optional int32 beam_size = 7 [ default = 1 ];
// Optional
optional int32 beam_min_length = 8 [ default = 1 ];
// Optional
optional int32 beam_context_block_ngram = 9 [ default = -1 ];
// Optional
optional int32 set_player_rating = 10 [ default = 5 ];
// Optional
optional bool model_parallel = 11 [ default = true ];
// Optional
optional float token_scoring_threshold = 12 [ default = 0.7 ];
// Optional
optional string scoring_method = 13 [ default = "speaker_listener" ];
}
message NonsenseClassifier {
optional string name = 1;
optional ParlaiModel nonsense_classifier = 2;
}
message ParlaiNonsenseDetectionEnsemble {
repeated NonsenseClassifier nonsense_classifiers = 1;
}
message ParlaiNoPressAgent {
// Required. Orders model
optional ParlaiModel model_orders = 1;
}
message ParlaiDialogue {
// Required. Dialogue model or discriminative nucleus model
oneof dialogue_model {
ParlaiModel model_dialogue = 1;
ParlaiDiscriminativeNucleusModel model_discriminativenucleus_dialogue = 2;
}
// Optional. Filter offensive dialogue language
optional bool filter_offensive_dialogue = 3 [ default = true ];
// Optional. Pseudo-orders model
optional ParlaiModel model_pseudo_orders = 4;
// Optional. Sleep classifer
optional ParlaiModel model_sleep_classifier = 5;
// Optional. Recipient classifer
optional ParlaiModel model_recipient_classifier = 6;
// Optional. Draw classifier
optional ParlaiModel model_draw_classifier = 13;
// Optional. Zeroshot nonsense classifer
optional ParlaiModel model_zshot_nonsense_classifier = 7;
// Optional nonsense classifier ensemble
optional ParlaiNonsenseDetectionEnsemble ensemble_nonsense_classifier = 8;
// Optional. If true, reuse pseudo-orders for consecutive messages.
// (But recompute pseudo-orders when the agent receives a new message).
optional bool reuse_pseudo_for_consecutive_messages = 9 [ default = false ];
// Optional. If true, reuse pseudo-orders for all messages in a phase.
optional bool reuse_pseudo_for_phase = 12 [ default = false ];
// Optional. Resample dialogue on filter
optional int32 resample_dialogue_on_filter = 11 [ default = 0 ];
// Optional. Only allow sampling inf if p(inf) >= threshold
optional float sleep_inf_threshold = 14 [ default = 0 ];
// Optional. Same as above but only for replies.
optional float sleep_inf_threshold_reply = 15 [ default = 0 ];
// Filter the first message an agent sends in the game if the likelihood of
// being from a weak player is above threshold
optional float rating_threshold_first_message = 16 [ default = 1.0 ];
// Filter other messages if likelihood of being from a weak player is above
// threshold
optional float rating_threshold_other = 17 [ default = 1.0 ];
// DEPRECATED use use_initiate_sleep_heuristic_n_years
optional bool initiate_sleep_heuristic_every_phase = 18 [ default = false ];
// If set, use initial message prompting
optional string initial_message_prompts_path = 19;
// Number of prompt messages to prepend to the dialogue history
optional int32 initial_message_prompts_count = 20 [ default = 1 ];
// Timestamp spacing between prompts
optional int32 initial_message_prompt_spacing_seconds = 21 [ default = 900 ];
optional bool allow_multi_gpu = 22;
// If True, in 5m games sleep times will be contrained to a short sleep time
// (15s) or inf
optional bool binarize_sleep_times_in_5m_games = 23 [ default = false ];
// If True, filter messages containing matching regexes related to grounding
// issues (new players, times of day, etc.)
optional bool should_filter_grounding = 24 [ default = false ];
// If >0, force an inf sleep time for a recipient to whom we have sent
// multiple consecutive messages this phase with no response
optional int32 limit_consecutive_outbound_messages = 25 [ default = -1 ];
// Send a message to everybody every phase until
optional int32 use_initiate_sleep_heuristic_n_years = 26 [ default = 0 ];
// Apply some hardcoded heuristics based on start-of-phase pseudos
// to determine whether we need to message someone.
optional bool use_pseudoorders_initiate_sleep_heuristic = 29
[ default = false ];
// Optional. If set, will use this batch size when queries dialogue model for
// multiple responses. If not set, the batch size will be equal to the request
// size.
optional int32 dialogue_batch_size = 27;
// If set, filters a message when the difference of the pseudo order
// likelihood for the agent after and before the message is less than this
// value.
optional float pseudo_orders_correspondence_threshold = 28;
// If set, filters for issues relating to the model not knowing when the game
// is scheduled to end.
optional int32 grounding_last_playable_year = 30;
// If base_strategy_model-predicted score is below this value, block message
// initiation
optional float block_initiation_if_pred_value_below = 31 [ default = 0 ];
// If true and grounding_last_playable_year is set, block messages in the
// last season except to powers with whom we are coordinating a support or
// convoy.
optional bool use_last_phase_silence_except_coordination_heuristic = 32;
}
message ParlaiFullPressAgent {
// Orders model. Required if no order_handler specified. Exists outside of
// order_handler oneof for backwards compat.
optional ParlaiModel model_orders = 1;
// Optional. Dialogue models
oneof maybe_dialogue { ParlaiDialogue dialogue = 3; }
}
message ParlAIRescoringBaseStrategyModelOrderHandler {
// Required. Path to base_strategy_model model.
optional string base_strategy_model_model_path = 1;
// Required. Parlai model used to re-score base_strategy_model orders.
optional ParlaiModel model_orders = 2;
// Options controlling base_strategy_model sampling.
optional PlausibleOrderSampling plausible_orders_cfg = 3;
// add ranking-algorithm-specific stuff here?
}
message ParlAIBestResponseOrderHandler {
// Required. Order model
optional ParlaiModel model_orders = 1;
// Required. Path to BaseStrategyModel checkpoint.
optional string model_path_base_strategy_model = 3;
// Optional, default provided.
// - beam: use top actions from beam search
// - sample_topk10_top: sample with topk=10 and take top by probability
// - sample_topk10_random: sample with topk=10 and take first sampled N
// - sample_topk10_t0.5_random: sample with temp=0.5 and take first sampled N
// All modes respect plausible_orders_req_size and
// plausible_orders_batch_size. For beam the two must match.
optional string parlai_method = 4 [ default = "sample_topk10_top" ];
// Size of rollout thread pool
optional uint32 n_rollout_procs = 5 [ default = 70 ];
// Number of rollouts per plausible order
optional uint32 rollouts_per_plausible_order = 6;
// Maximum rollout length in MOVEMENT PHASES heuristically evaluating the game
optional int32 max_rollout_length = 7 [ default = -1 ]; // required
// Number of order-sets (actions) to consider at each step in search code
optional int32 n_plausible_orders = 8 [ default = -1 ];
// Optional, cap n_plausible_orders as a ratio of # orderable units
optional float max_actions_units_ratio = 9 [ default = -1 ];
// Temperature used for rollouts
optional float rollout_temperature = 10 [ default = -1 ];
// Optional. Nucleus ratio used for rollouts. During nucleus sampling only the
// smallest subset of actions that has probability at least top_p is
// considered. All other actions are never sampled.