{ "base": { "dir": "/workspace/models/model-2.5-6", "bpw": 2.5151316631209735 }, "alts": [ { "dir": "/workspace/models/model-3.5-6", "bpw": 3.5149173008487336 } ], "groups": [ { "idx": 0, "layers": [ "model.layers.0.self_attn.q_proj", "model.layers.0.self_attn.k_proj", "model.layers.0.self_attn.v_proj", "model.layers.0.self_attn.o_proj" ], "candidates": [ { "dkld": -0.013420631363987912, "dbits": 44040192 } ] }, { "idx": 1, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w1", "model.layers.0.block_sparse_moe.experts.1.w1", "model.layers.0.block_sparse_moe.experts.2.w1", "model.layers.0.block_sparse_moe.experts.3.w1", "model.layers.0.block_sparse_moe.experts.4.w1", "model.layers.0.block_sparse_moe.experts.5.w1", "model.layers.0.block_sparse_moe.experts.6.w1", "model.layers.0.block_sparse_moe.experts.7.w1", "model.layers.0.block_sparse_moe.experts.8.w1", "model.layers.0.block_sparse_moe.experts.9.w1", "model.layers.0.block_sparse_moe.experts.10.w1", "model.layers.0.block_sparse_moe.experts.11.w1", "model.layers.0.block_sparse_moe.experts.12.w1", "model.layers.0.block_sparse_moe.experts.13.w1", "model.layers.0.block_sparse_moe.experts.14.w1", "model.layers.0.block_sparse_moe.experts.15.w1", "model.layers.0.block_sparse_moe.experts.16.w1", "model.layers.0.block_sparse_moe.experts.17.w1", "model.layers.0.block_sparse_moe.experts.18.w1", "model.layers.0.block_sparse_moe.experts.19.w1", "model.layers.0.block_sparse_moe.experts.20.w1", "model.layers.0.block_sparse_moe.experts.21.w1", "model.layers.0.block_sparse_moe.experts.22.w1", "model.layers.0.block_sparse_moe.experts.23.w1", "model.layers.0.block_sparse_moe.experts.24.w1", "model.layers.0.block_sparse_moe.experts.25.w1", "model.layers.0.block_sparse_moe.experts.26.w1", "model.layers.0.block_sparse_moe.experts.27.w1", "model.layers.0.block_sparse_moe.experts.28.w1", "model.layers.0.block_sparse_moe.experts.29.w1", "model.layers.0.block_sparse_moe.experts.30.w1", "model.layers.0.block_sparse_moe.experts.31.w1", "model.layers.0.block_sparse_moe.experts.32.w1", "model.layers.0.block_sparse_moe.experts.33.w1", "model.layers.0.block_sparse_moe.experts.34.w1", "model.layers.0.block_sparse_moe.experts.35.w1", "model.layers.0.block_sparse_moe.experts.36.w1", "model.layers.0.block_sparse_moe.experts.37.w1", "model.layers.0.block_sparse_moe.experts.38.w1", "model.layers.0.block_sparse_moe.experts.39.w1", "model.layers.0.block_sparse_moe.experts.40.w1", "model.layers.0.block_sparse_moe.experts.41.w1", "model.layers.0.block_sparse_moe.experts.42.w1", "model.layers.0.block_sparse_moe.experts.43.w1", "model.layers.0.block_sparse_moe.experts.44.w1", "model.layers.0.block_sparse_moe.experts.45.w1", "model.layers.0.block_sparse_moe.experts.46.w1", "model.layers.0.block_sparse_moe.experts.47.w1", "model.layers.0.block_sparse_moe.experts.48.w1", "model.layers.0.block_sparse_moe.experts.49.w1", "model.layers.0.block_sparse_moe.experts.50.w1", "model.layers.0.block_sparse_moe.experts.51.w1", "model.layers.0.block_sparse_moe.experts.52.w1", "model.layers.0.block_sparse_moe.experts.53.w1", "model.layers.0.block_sparse_moe.experts.54.w1", "model.layers.0.block_sparse_moe.experts.55.w1", "model.layers.0.block_sparse_moe.experts.56.w1", "model.layers.0.block_sparse_moe.experts.57.w1", "model.layers.0.block_sparse_moe.experts.58.w1", "model.layers.0.block_sparse_moe.experts.59.w1", "model.layers.0.block_sparse_moe.experts.60.w1", "model.layers.0.block_sparse_moe.experts.61.w1", "model.layers.0.block_sparse_moe.experts.62.w1", "model.layers.0.block_sparse_moe.experts.63.w1", "model.layers.0.block_sparse_moe.experts.64.w1", "model.layers.0.block_sparse_moe.experts.65.w1", "model.layers.0.block_sparse_moe.experts.66.w1", "model.layers.0.block_sparse_moe.experts.67.w1", "model.layers.0.block_sparse_moe.experts.68.w1", "model.layers.0.block_sparse_moe.experts.69.w1", "model.layers.0.block_sparse_moe.experts.70.w1", "model.layers.0.block_sparse_moe.experts.71.w1", "model.layers.0.block_sparse_moe.experts.72.w1", "model.layers.0.block_sparse_moe.experts.73.w1", "model.layers.0.block_sparse_moe.experts.74.w1", "model.layers.0.block_sparse_moe.experts.75.w1", "model.layers.0.block_sparse_moe.experts.76.w1", "model.layers.0.block_sparse_moe.experts.77.w1", "model.layers.0.block_sparse_moe.experts.78.w1", "model.layers.0.block_sparse_moe.experts.79.w1", "model.layers.0.block_sparse_moe.experts.80.w1", "model.layers.0.block_sparse_moe.experts.81.w1", "model.layers.0.block_sparse_moe.experts.82.w1", "model.layers.0.block_sparse_moe.experts.83.w1", "model.layers.0.block_sparse_moe.experts.84.w1", "model.layers.0.block_sparse_moe.experts.85.w1", "model.layers.0.block_sparse_moe.experts.86.w1", "model.layers.0.block_sparse_moe.experts.87.w1", "model.layers.0.block_sparse_moe.experts.88.w1", "model.layers.0.block_sparse_moe.experts.89.w1", "model.layers.0.block_sparse_moe.experts.90.w1", "model.layers.0.block_sparse_moe.experts.91.w1", "model.layers.0.block_sparse_moe.experts.92.w1", "model.layers.0.block_sparse_moe.experts.93.w1", "model.layers.0.block_sparse_moe.experts.94.w1", "model.layers.0.block_sparse_moe.experts.95.w1", "model.layers.0.block_sparse_moe.experts.96.w1", "model.layers.0.block_sparse_moe.experts.97.w1", "model.layers.0.block_sparse_moe.experts.98.w1", "model.layers.0.block_sparse_moe.experts.99.w1", "model.layers.0.block_sparse_moe.experts.100.w1", "model.layers.0.block_sparse_moe.experts.101.w1", "model.layers.0.block_sparse_moe.experts.102.w1", "model.layers.0.block_sparse_moe.experts.103.w1", "model.layers.0.block_sparse_moe.experts.104.w1", "model.layers.0.block_sparse_moe.experts.105.w1", "model.layers.0.block_sparse_moe.experts.106.w1", "model.layers.0.block_sparse_moe.experts.107.w1", "model.layers.0.block_sparse_moe.experts.108.w1", "model.layers.0.block_sparse_moe.experts.109.w1", "model.layers.0.block_sparse_moe.experts.110.w1", "model.layers.0.block_sparse_moe.experts.111.w1", "model.layers.0.block_sparse_moe.experts.112.w1", "model.layers.0.block_sparse_moe.experts.113.w1", "model.layers.0.block_sparse_moe.experts.114.w1", "model.layers.0.block_sparse_moe.experts.115.w1", "model.layers.0.block_sparse_moe.experts.116.w1", "model.layers.0.block_sparse_moe.experts.117.w1", "model.layers.0.block_sparse_moe.experts.118.w1", "model.layers.0.block_sparse_moe.experts.119.w1", "model.layers.0.block_sparse_moe.experts.120.w1", "model.layers.0.block_sparse_moe.experts.121.w1", "model.layers.0.block_sparse_moe.experts.122.w1", "model.layers.0.block_sparse_moe.experts.123.w1", "model.layers.0.block_sparse_moe.experts.124.w1", "model.layers.0.block_sparse_moe.experts.125.w1", "model.layers.0.block_sparse_moe.experts.126.w1", "model.layers.0.block_sparse_moe.experts.127.w1", "model.layers.0.block_sparse_moe.experts.128.w1", "model.layers.0.block_sparse_moe.experts.129.w1", "model.layers.0.block_sparse_moe.experts.130.w1", "model.layers.0.block_sparse_moe.experts.131.w1", "model.layers.0.block_sparse_moe.experts.132.w1", "model.layers.0.block_sparse_moe.experts.133.w1", "model.layers.0.block_sparse_moe.experts.134.w1", "model.layers.0.block_sparse_moe.experts.135.w1", "model.layers.0.block_sparse_moe.experts.136.w1", "model.layers.0.block_sparse_moe.experts.137.w1", "model.layers.0.block_sparse_moe.experts.138.w1", "model.layers.0.block_sparse_moe.experts.139.w1", "model.layers.0.block_sparse_moe.experts.140.w1", "model.layers.0.block_sparse_moe.experts.141.w1", "model.layers.0.block_sparse_moe.experts.142.w1", "model.layers.0.block_sparse_moe.experts.143.w1", "model.layers.0.block_sparse_moe.experts.144.w1", "model.layers.0.block_sparse_moe.experts.145.w1", "model.layers.0.block_sparse_moe.experts.146.w1", "model.layers.0.block_sparse_moe.experts.147.w1", "model.layers.0.block_sparse_moe.experts.148.w1", "model.layers.0.block_sparse_moe.experts.149.w1", "model.layers.0.block_sparse_moe.experts.150.w1", "model.layers.0.block_sparse_moe.experts.151.w1", "model.layers.0.block_sparse_moe.experts.152.w1", "model.layers.0.block_sparse_moe.experts.153.w1", "model.layers.0.block_sparse_moe.experts.154.w1", "model.layers.0.block_sparse_moe.experts.155.w1", "model.layers.0.block_sparse_moe.experts.156.w1", "model.layers.0.block_sparse_moe.experts.157.w1", "model.layers.0.block_sparse_moe.experts.158.w1", "model.layers.0.block_sparse_moe.experts.159.w1", "model.layers.0.block_sparse_moe.experts.160.w1", "model.layers.0.block_sparse_moe.experts.161.w1", "model.layers.0.block_sparse_moe.experts.162.w1", "model.layers.0.block_sparse_moe.experts.163.w1", "model.layers.0.block_sparse_moe.experts.164.w1", "model.layers.0.block_sparse_moe.experts.165.w1", "model.layers.0.block_sparse_moe.experts.166.w1", "model.layers.0.block_sparse_moe.experts.167.w1", "model.layers.0.block_sparse_moe.experts.168.w1", "model.layers.0.block_sparse_moe.experts.169.w1", "model.layers.0.block_sparse_moe.experts.170.w1", "model.layers.0.block_sparse_moe.experts.171.w1", "model.layers.0.block_sparse_moe.experts.172.w1", "model.layers.0.block_sparse_moe.experts.173.w1", "model.layers.0.block_sparse_moe.experts.174.w1", "model.layers.0.block_sparse_moe.experts.175.w1", "model.layers.0.block_sparse_moe.experts.176.w1", "model.layers.0.block_sparse_moe.experts.177.w1", "model.layers.0.block_sparse_moe.experts.178.w1", "model.layers.0.block_sparse_moe.experts.179.w1", "model.layers.0.block_sparse_moe.experts.180.w1", "model.layers.0.block_sparse_moe.experts.181.w1", "model.layers.0.block_sparse_moe.experts.182.w1", "model.layers.0.block_sparse_moe.experts.183.w1", "model.layers.0.block_sparse_moe.experts.184.w1", "model.layers.0.block_sparse_moe.experts.185.w1", "model.layers.0.block_sparse_moe.experts.186.w1", "model.layers.0.block_sparse_moe.experts.187.w1", "model.layers.0.block_sparse_moe.experts.188.w1", "model.layers.0.block_sparse_moe.experts.189.w1", "model.layers.0.block_sparse_moe.experts.190.w1", "model.layers.0.block_sparse_moe.experts.191.w1", "model.layers.0.block_sparse_moe.experts.192.w1", "model.layers.0.block_sparse_moe.experts.193.w1", "model.layers.0.block_sparse_moe.experts.194.w1", "model.layers.0.block_sparse_moe.experts.195.w1", "model.layers.0.block_sparse_moe.experts.196.w1", "model.layers.0.block_sparse_moe.experts.197.w1", "model.layers.0.block_sparse_moe.experts.198.w1", "model.layers.0.block_sparse_moe.experts.199.w1", "model.layers.0.block_sparse_moe.experts.200.w1", "model.layers.0.block_sparse_moe.experts.201.w1", "model.layers.0.block_sparse_moe.experts.202.w1", "model.layers.0.block_sparse_moe.experts.203.w1", "model.layers.0.block_sparse_moe.experts.204.w1", "model.layers.0.block_sparse_moe.experts.205.w1", "model.layers.0.block_sparse_moe.experts.206.w1", "model.layers.0.block_sparse_moe.experts.207.w1", "model.layers.0.block_sparse_moe.experts.208.w1", "model.layers.0.block_sparse_moe.experts.209.w1", "model.layers.0.block_sparse_moe.experts.210.w1", "model.layers.0.block_sparse_moe.experts.211.w1", "model.layers.0.block_sparse_moe.experts.212.w1", "model.layers.0.block_sparse_moe.experts.213.w1", "model.layers.0.block_sparse_moe.experts.214.w1", "model.layers.0.block_sparse_moe.experts.215.w1", "model.layers.0.block_sparse_moe.experts.216.w1", "model.layers.0.block_sparse_moe.experts.217.w1", "model.layers.0.block_sparse_moe.experts.218.w1", "model.layers.0.block_sparse_moe.experts.219.w1", "model.layers.0.block_sparse_moe.experts.220.w1", "model.layers.0.block_sparse_moe.experts.221.w1", "model.layers.0.block_sparse_moe.experts.222.w1", "model.layers.0.block_sparse_moe.experts.223.w1", "model.layers.0.block_sparse_moe.experts.224.w1", "model.layers.0.block_sparse_moe.experts.225.w1", "model.layers.0.block_sparse_moe.experts.226.w1", "model.layers.0.block_sparse_moe.experts.227.w1", "model.layers.0.block_sparse_moe.experts.228.w1", "model.layers.0.block_sparse_moe.experts.229.w1", "model.layers.0.block_sparse_moe.experts.230.w1", "model.layers.0.block_sparse_moe.experts.231.w1", "model.layers.0.block_sparse_moe.experts.232.w1", "model.layers.0.block_sparse_moe.experts.233.w1", "model.layers.0.block_sparse_moe.experts.234.w1", "model.layers.0.block_sparse_moe.experts.235.w1", "model.layers.0.block_sparse_moe.experts.236.w1", "model.layers.0.block_sparse_moe.experts.237.w1", "model.layers.0.block_sparse_moe.experts.238.w1", "model.layers.0.block_sparse_moe.experts.239.w1", "model.layers.0.block_sparse_moe.experts.240.w1", "model.layers.0.block_sparse_moe.experts.241.w1", "model.layers.0.block_sparse_moe.experts.242.w1", "model.layers.0.block_sparse_moe.experts.243.w1", "model.layers.0.block_sparse_moe.experts.244.w1", "model.layers.0.block_sparse_moe.experts.245.w1", "model.layers.0.block_sparse_moe.experts.246.w1", "model.layers.0.block_sparse_moe.experts.247.w1", "model.layers.0.block_sparse_moe.experts.248.w1", "model.layers.0.block_sparse_moe.experts.249.w1", "model.layers.0.block_sparse_moe.experts.250.w1", "model.layers.0.block_sparse_moe.experts.251.w1", "model.layers.0.block_sparse_moe.experts.252.w1", "model.layers.0.block_sparse_moe.experts.253.w1", "model.layers.0.block_sparse_moe.experts.254.w1", "model.layers.0.block_sparse_moe.experts.255.w1", "model.layers.0.block_sparse_moe.experts.0.w3", "model.layers.0.block_sparse_moe.experts.1.w3", "model.layers.0.block_sparse_moe.experts.2.w3", "model.layers.0.block_sparse_moe.experts.3.w3", "model.layers.0.block_sparse_moe.experts.4.w3", "model.layers.0.block_sparse_moe.experts.5.w3", "model.layers.0.block_sparse_moe.experts.6.w3", "model.layers.0.block_sparse_moe.experts.7.w3", "model.layers.0.block_sparse_moe.experts.8.w3", "model.layers.0.block_sparse_moe.experts.9.w3", "model.layers.0.block_sparse_moe.experts.10.w3", "model.layers.0.block_sparse_moe.experts.11.w3", "model.layers.0.block_sparse_moe.experts.12.w3", "model.layers.0.block_sparse_moe.experts.13.w3", "model.layers.0.block_sparse_moe.experts.14.w3", "model.layers.0.block_sparse_moe.experts.15.w3", "model.layers.0.block_sparse_moe.experts.16.w3", "model.layers.0.block_sparse_moe.experts.17.w3", "model.layers.0.block_sparse_moe.experts.18.w3", "model.layers.0.block_sparse_moe.experts.19.w3", "model.layers.0.block_sparse_moe.experts.20.w3", "model.layers.0.block_sparse_moe.experts.21.w3", "model.layers.0.block_sparse_moe.experts.22.w3", "model.layers.0.block_sparse_moe.experts.23.w3", "model.layers.0.block_sparse_moe.experts.24.w3", "model.layers.0.block_sparse_moe.experts.25.w3", "model.layers.0.block_sparse_moe.experts.26.w3", "model.layers.0.block_sparse_moe.experts.27.w3", "model.layers.0.block_sparse_moe.experts.28.w3", "model.layers.0.block_sparse_moe.experts.29.w3", "model.layers.0.block_sparse_moe.experts.30.w3", "model.layers.0.block_sparse_moe.experts.31.w3", "model.layers.0.block_sparse_moe.experts.32.w3", "model.layers.0.block_sparse_moe.experts.33.w3", "model.layers.0.block_sparse_moe.experts.34.w3", "model.layers.0.block_sparse_moe.experts.35.w3", "model.layers.0.block_sparse_moe.experts.36.w3", "model.layers.0.block_sparse_moe.experts.37.w3", "model.layers.0.block_sparse_moe.experts.38.w3", "model.layers.0.block_sparse_moe.experts.39.w3", "model.layers.0.block_sparse_moe.experts.40.w3", "model.layers.0.block_sparse_moe.experts.41.w3", "model.layers.0.block_sparse_moe.experts.42.w3", "model.layers.0.block_sparse_moe.experts.43.w3", "model.layers.0.block_sparse_moe.experts.44.w3", "model.layers.0.block_sparse_moe.experts.45.w3", "model.layers.0.block_sparse_moe.experts.46.w3", "model.layers.0.block_sparse_moe.experts.47.w3", "model.layers.0.block_sparse_moe.experts.48.w3", "model.layers.0.block_sparse_moe.experts.49.w3", "model.layers.0.block_sparse_moe.experts.50.w3", "model.layers.0.block_sparse_moe.experts.51.w3", "model.layers.0.block_sparse_moe.experts.52.w3", "model.layers.0.block_sparse_moe.experts.53.w3", "model.layers.0.block_sparse_moe.experts.54.w3", "model.layers.0.block_sparse_moe.experts.55.w3", "model.layers.0.block_sparse_moe.experts.56.w3", "model.layers.0.block_sparse_moe.experts.57.w3", "model.layers.0.block_sparse_moe.experts.58.w3", "model.layers.0.block_sparse_moe.experts.59.w3", "model.layers.0.block_sparse_moe.experts.60.w3", "model.layers.0.block_sparse_moe.experts.61.w3", "model.layers.0.block_sparse_moe.experts.62.w3", "model.layers.0.block_sparse_moe.experts.63.w3", "model.layers.0.block_sparse_moe.experts.64.w3", "model.layers.0.block_sparse_moe.experts.65.w3", "model.layers.0.block_sparse_moe.experts.66.w3", "model.layers.0.block_sparse_moe.experts.67.w3", "model.layers.0.block_sparse_moe.experts.68.w3", "model.layers.0.block_sparse_moe.experts.69.w3", "model.layers.0.block_sparse_moe.experts.70.w3", "model.layers.0.block_sparse_moe.experts.71.w3", "model.layers.0.block_sparse_moe.experts.72.w3", "model.layers.0.block_sparse_moe.experts.73.w3", "model.layers.0.block_sparse_moe.experts.74.w3", "model.layers.0.block_sparse_moe.experts.75.w3", "model.layers.0.block_sparse_moe.experts.76.w3", "model.layers.0.block_sparse_moe.experts.77.w3", "model.layers.0.block_sparse_moe.experts.78.w3", "model.layers.0.block_sparse_moe.experts.79.w3", "model.layers.0.block_sparse_moe.experts.80.w3", "model.layers.0.block_sparse_moe.experts.81.w3", "model.layers.0.block_sparse_moe.experts.82.w3", "model.layers.0.block_sparse_moe.experts.83.w3", "model.layers.0.block_sparse_moe.experts.84.w3", "model.layers.0.block_sparse_moe.experts.85.w3", "model.layers.0.block_sparse_moe.experts.86.w3", "model.layers.0.block_sparse_moe.experts.87.w3", "model.layers.0.block_sparse_moe.experts.88.w3", "model.layers.0.block_sparse_moe.experts.89.w3", "model.layers.0.block_sparse_moe.experts.90.w3", "model.layers.0.block_sparse_moe.experts.91.w3", "model.layers.0.block_sparse_moe.experts.92.w3", "model.layers.0.block_sparse_moe.experts.93.w3", "model.layers.0.block_sparse_moe.experts.94.w3", "model.layers.0.block_sparse_moe.experts.95.w3", "model.layers.0.block_sparse_moe.experts.96.w3", "model.layers.0.block_sparse_moe.experts.97.w3", "model.layers.0.block_sparse_moe.experts.98.w3", "model.layers.0.block_sparse_moe.experts.99.w3", "model.layers.0.block_sparse_moe.experts.100.w3", "model.layers.0.block_sparse_moe.experts.101.w3", "model.layers.0.block_sparse_moe.experts.102.w3", "model.layers.0.block_sparse_moe.experts.103.w3", "model.layers.0.block_sparse_moe.experts.104.w3", "model.layers.0.block_sparse_moe.experts.105.w3", "model.layers.0.block_sparse_moe.experts.106.w3", "model.layers.0.block_sparse_moe.experts.107.w3", "model.layers.0.block_sparse_moe.experts.108.w3", "model.layers.0.block_sparse_moe.experts.109.w3", "model.layers.0.block_sparse_moe.experts.110.w3", "model.layers.0.block_sparse_moe.experts.111.w3", "model.layers.0.block_sparse_moe.experts.112.w3", "model.layers.0.block_sparse_moe.experts.113.w3", "model.layers.0.block_sparse_moe.experts.114.w3", "model.layers.0.block_sparse_moe.experts.115.w3", "model.layers.0.block_sparse_moe.experts.116.w3", "model.layers.0.block_sparse_moe.experts.117.w3", "model.layers.0.block_sparse_moe.experts.118.w3", "model.layers.0.block_sparse_moe.experts.119.w3", "model.layers.0.block_sparse_moe.experts.120.w3", "model.layers.0.block_sparse_moe.experts.121.w3", "model.layers.0.block_sparse_moe.experts.122.w3", "model.layers.0.block_sparse_moe.experts.123.w3", "model.layers.0.block_sparse_moe.experts.124.w3", "model.layers.0.block_sparse_moe.experts.125.w3", "model.layers.0.block_sparse_moe.experts.126.w3", "model.layers.0.block_sparse_moe.experts.127.w3", "model.layers.0.block_sparse_moe.experts.128.w3", "model.layers.0.block_sparse_moe.experts.129.w3", "model.layers.0.block_sparse_moe.experts.130.w3", "model.layers.0.block_sparse_moe.experts.131.w3", "model.layers.0.block_sparse_moe.experts.132.w3", "model.layers.0.block_sparse_moe.experts.133.w3", "model.layers.0.block_sparse_moe.experts.134.w3", "model.layers.0.block_sparse_moe.experts.135.w3", "model.layers.0.block_sparse_moe.experts.136.w3", "model.layers.0.block_sparse_moe.experts.137.w3", "model.layers.0.block_sparse_moe.experts.138.w3", "model.layers.0.block_sparse_moe.experts.139.w3", "model.layers.0.block_sparse_moe.experts.140.w3", "model.layers.0.block_sparse_moe.experts.141.w3", "model.layers.0.block_sparse_moe.experts.142.w3", "model.layers.0.block_sparse_moe.experts.143.w3", "model.layers.0.block_sparse_moe.experts.144.w3", "model.layers.0.block_sparse_moe.experts.145.w3", "model.layers.0.block_sparse_moe.experts.146.w3", "model.layers.0.block_sparse_moe.experts.147.w3", "model.layers.0.block_sparse_moe.experts.148.w3", "model.layers.0.block_sparse_moe.experts.149.w3", "model.layers.0.block_sparse_moe.experts.150.w3", "model.layers.0.block_sparse_moe.experts.151.w3", "model.layers.0.block_sparse_moe.experts.152.w3", "model.layers.0.block_sparse_moe.experts.153.w3", "model.layers.0.block_sparse_moe.experts.154.w3", "model.layers.0.block_sparse_moe.experts.155.w3", "model.layers.0.block_sparse_moe.experts.156.w3", "model.layers.0.block_sparse_moe.experts.157.w3", "model.layers.0.block_sparse_moe.experts.158.w3", "model.layers.0.block_sparse_moe.experts.159.w3", "model.layers.0.block_sparse_moe.experts.160.w3", "model.layers.0.block_sparse_moe.experts.161.w3", "model.layers.0.block_sparse_moe.experts.162.w3", "model.layers.0.block_sparse_moe.experts.163.w3", "model.layers.0.block_sparse_moe.experts.164.w3", "model.layers.0.block_sparse_moe.experts.165.w3", "model.layers.0.block_sparse_moe.experts.166.w3", "model.layers.0.block_sparse_moe.experts.167.w3", "model.layers.0.block_sparse_moe.experts.168.w3", "model.layers.0.block_sparse_moe.experts.169.w3", "model.layers.0.block_sparse_moe.experts.170.w3", "model.layers.0.block_sparse_moe.experts.171.w3", "model.layers.0.block_sparse_moe.experts.172.w3", "model.layers.0.block_sparse_moe.experts.173.w3", "model.layers.0.block_sparse_moe.experts.174.w3", "model.layers.0.block_sparse_moe.experts.175.w3", "model.layers.0.block_sparse_moe.experts.176.w3", "model.layers.0.block_sparse_moe.experts.177.w3", "model.layers.0.block_sparse_moe.experts.178.w3", "model.layers.0.block_sparse_moe.experts.179.w3", "model.layers.0.block_sparse_moe.experts.180.w3", "model.layers.0.block_sparse_moe.experts.181.w3", "model.layers.0.block_sparse_moe.experts.182.w3", "model.layers.0.block_sparse_moe.experts.183.w3", "model.layers.0.block_sparse_moe.experts.184.w3", "model.layers.0.block_sparse_moe.experts.185.w3", "model.layers.0.block_sparse_moe.experts.186.w3", "model.layers.0.block_sparse_moe.experts.187.w3", "model.layers.0.block_sparse_moe.experts.188.w3", "model.layers.0.block_sparse_moe.experts.189.w3", "model.layers.0.block_sparse_moe.experts.190.w3", "model.layers.0.block_sparse_moe.experts.191.w3", "model.layers.0.block_sparse_moe.experts.192.w3", "model.layers.0.block_sparse_moe.experts.193.w3", "model.layers.0.block_sparse_moe.experts.194.w3", "model.layers.0.block_sparse_moe.experts.195.w3", "model.layers.0.block_sparse_moe.experts.196.w3", "model.layers.0.block_sparse_moe.experts.197.w3", "model.layers.0.block_sparse_moe.experts.198.w3", "model.layers.0.block_sparse_moe.experts.199.w3", "model.layers.0.block_sparse_moe.experts.200.w3", "model.layers.0.block_sparse_moe.experts.201.w3", "model.layers.0.block_sparse_moe.experts.202.w3", "model.layers.0.block_sparse_moe.experts.203.w3", "model.layers.0.block_sparse_moe.experts.204.w3", "model.layers.0.block_sparse_moe.experts.205.w3", "model.layers.0.block_sparse_moe.experts.206.w3", "model.layers.0.block_sparse_moe.experts.207.w3", "model.layers.0.block_sparse_moe.experts.208.w3", "model.layers.0.block_sparse_moe.experts.209.w3", "model.layers.0.block_sparse_moe.experts.210.w3", "model.layers.0.block_sparse_moe.experts.211.w3", "model.layers.0.block_sparse_moe.experts.212.w3", "model.layers.0.block_sparse_moe.experts.213.w3", "model.layers.0.block_sparse_moe.experts.214.w3", "model.layers.0.block_sparse_moe.experts.215.w3", "model.layers.0.block_sparse_moe.experts.216.w3", "model.layers.0.block_sparse_moe.experts.217.w3", "model.layers.0.block_sparse_moe.experts.218.w3", "model.layers.0.block_sparse_moe.experts.219.w3", "model.layers.0.block_sparse_moe.experts.220.w3", "model.layers.0.block_sparse_moe.experts.221.w3", "model.layers.0.block_sparse_moe.experts.222.w3", "model.layers.0.block_sparse_moe.experts.223.w3", "model.layers.0.block_sparse_moe.experts.224.w3", "model.layers.0.block_sparse_moe.experts.225.w3", "model.layers.0.block_sparse_moe.experts.226.w3", "model.layers.0.block_sparse_moe.experts.227.w3", "model.layers.0.block_sparse_moe.experts.228.w3", "model.layers.0.block_sparse_moe.experts.229.w3", "model.layers.0.block_sparse_moe.experts.230.w3", "model.layers.0.block_sparse_moe.experts.231.w3", "model.layers.0.block_sparse_moe.experts.232.w3", "model.layers.0.block_sparse_moe.experts.233.w3", "model.layers.0.block_sparse_moe.experts.234.w3", "model.layers.0.block_sparse_moe.experts.235.w3", "model.layers.0.block_sparse_moe.experts.236.w3", "model.layers.0.block_sparse_moe.experts.237.w3", "model.layers.0.block_sparse_moe.experts.238.w3", "model.layers.0.block_sparse_moe.experts.239.w3", "model.layers.0.block_sparse_moe.experts.240.w3", "model.layers.0.block_sparse_moe.experts.241.w3", "model.layers.0.block_sparse_moe.experts.242.w3", "model.layers.0.block_sparse_moe.experts.243.w3", "model.layers.0.block_sparse_moe.experts.244.w3", "model.layers.0.block_sparse_moe.experts.245.w3", "model.layers.0.block_sparse_moe.experts.246.w3", "model.layers.0.block_sparse_moe.experts.247.w3", "model.layers.0.block_sparse_moe.experts.248.w3", "model.layers.0.block_sparse_moe.experts.249.w3", "model.layers.0.block_sparse_moe.experts.250.w3", "model.layers.0.block_sparse_moe.experts.251.w3", "model.layers.0.block_sparse_moe.experts.252.w3", "model.layers.0.block_sparse_moe.experts.253.w3", "model.layers.0.block_sparse_moe.experts.254.w3", "model.layers.0.block_sparse_moe.experts.255.w3", "model.layers.0.block_sparse_moe.experts.0.w2", "model.layers.0.block_sparse_moe.experts.1.w2", "model.layers.0.block_sparse_moe.experts.2.w2", "model.layers.0.block_sparse_moe.experts.3.w2", "model.layers.0.block_sparse_moe.experts.4.w2", "model.layers.0.block_sparse_moe.experts.5.w2", "model.layers.0.block_sparse_moe.experts.6.w2", "model.layers.0.block_sparse_moe.experts.7.w2", "model.layers.0.block_sparse_moe.experts.8.w2", "model.layers.0.block_sparse_moe.experts.9.w2", "model.layers.0.block_sparse_moe.experts.10.w2", "model.layers.0.block_sparse_moe.experts.11.w2", "model.layers.0.block_sparse_moe.experts.12.w2", "model.layers.0.block_sparse_moe.experts.13.w2", "model.layers.0.block_sparse_moe.experts.14.w2", "model.layers.0.block_sparse_moe.experts.15.w2", "model.layers.0.block_sparse_moe.experts.16.w2", "model.layers.0.block_sparse_moe.experts.17.w2", "model.layers.0.block_sparse_moe.experts.18.w2", "model.layers.0.block_sparse_moe.experts.19.w2", "model.layers.0.block_sparse_moe.experts.20.w2", "model.layers.0.block_sparse_moe.experts.21.w2", "model.layers.0.block_sparse_moe.experts.22.w2", "model.layers.0.block_sparse_moe.experts.23.w2", "model.layers.0.block_sparse_moe.experts.24.w2", "model.layers.0.block_sparse_moe.experts.25.w2", "model.layers.0.block_sparse_moe.experts.26.w2", "model.layers.0.block_sparse_moe.experts.27.w2", "model.layers.0.block_sparse_moe.experts.28.w2", "model.layers.0.block_sparse_moe.experts.29.w2", "model.layers.0.block_sparse_moe.experts.30.w2", "model.layers.0.block_sparse_moe.experts.31.w2", "model.layers.0.block_sparse_moe.experts.32.w2", "model.layers.0.block_sparse_moe.experts.33.w2", "model.layers.0.block_sparse_moe.experts.34.w2", "model.layers.0.block_sparse_moe.experts.35.w2", "model.layers.0.block_sparse_moe.experts.36.w2", "model.layers.0.block_sparse_moe.experts.37.w2", "model.layers.0.block_sparse_moe.experts.38.w2", "model.layers.0.block_sparse_moe.experts.39.w2", "model.layers.0.block_sparse_moe.experts.40.w2", "model.layers.0.block_sparse_moe.experts.41.w2", "model.layers.0.block_sparse_moe.experts.42.w2", "model.layers.0.block_sparse_moe.experts.43.w2", "model.layers.0.block_sparse_moe.experts.44.w2", "model.layers.0.block_sparse_moe.experts.45.w2", "model.layers.0.block_sparse_moe.experts.46.w2", "model.layers.0.block_sparse_moe.experts.47.w2", "model.layers.0.block_sparse_moe.experts.48.w2", "model.layers.0.block_sparse_moe.experts.49.w2", "model.layers.0.block_sparse_moe.experts.50.w2", "model.layers.0.block_sparse_moe.experts.51.w2", "model.layers.0.block_sparse_moe.experts.52.w2", "model.layers.0.block_sparse_moe.experts.53.w2", "model.layers.0.block_sparse_moe.experts.54.w2", "model.layers.0.block_sparse_moe.experts.55.w2", "model.layers.0.block_sparse_moe.experts.56.w2", "model.layers.0.block_sparse_moe.experts.57.w2", "model.layers.0.block_sparse_moe.experts.58.w2", "model.layers.0.block_sparse_moe.experts.59.w2", "model.layers.0.block_sparse_moe.experts.60.w2", "model.layers.0.block_sparse_moe.experts.61.w2", "model.layers.0.block_sparse_moe.experts.62.w2", "model.layers.0.block_sparse_moe.experts.63.w2", "model.layers.0.block_sparse_moe.experts.64.w2", "model.layers.0.block_sparse_moe.experts.65.w2", "model.layers.0.block_sparse_moe.experts.66.w2", "model.layers.0.block_sparse_moe.experts.67.w2", "model.layers.0.block_sparse_moe.experts.68.w2", "model.layers.0.block_sparse_moe.experts.69.w2", "model.layers.0.block_sparse_moe.experts.70.w2", "model.layers.0.block_sparse_moe.experts.71.w2", "model.layers.0.block_sparse_moe.experts.72.w2", "model.layers.0.block_sparse_moe.experts.73.w2", "model.layers.0.block_sparse_moe.experts.74.w2", "model.layers.0.block_sparse_moe.experts.75.w2", "model.layers.0.block_sparse_moe.experts.76.w2", "model.layers.0.block_sparse_moe.experts.77.w2", "model.layers.0.block_sparse_moe.experts.78.w2", "model.layers.0.block_sparse_moe.experts.79.w2", "model.layers.0.block_sparse_moe.experts.80.w2", "model.layers.0.block_sparse_moe.experts.81.w2", "model.layers.0.block_sparse_moe.experts.82.w2", "model.layers.0.block_sparse_moe.experts.83.w2", "model.layers.0.block_sparse_moe.experts.84.w2", "model.layers.0.block_sparse_moe.experts.85.w2", "model.layers.0.block_sparse_moe.experts.86.w2", "model.layers.0.block_sparse_moe.experts.87.w2", "model.layers.0.block_sparse_moe.experts.88.w2", "model.layers.0.block_sparse_moe.experts.89.w2", "model.layers.0.block_sparse_moe.experts.90.w2", "model.layers.0.block_sparse_moe.experts.91.w2", "model.layers.0.block_sparse_moe.experts.92.w2", "model.layers.0.block_sparse_moe.experts.93.w2", "model.layers.0.block_sparse_moe.experts.94.w2", "model.layers.0.block_sparse_moe.experts.95.w2", "model.layers.0.block_sparse_moe.experts.96.w2", "model.layers.0.block_sparse_moe.experts.97.w2", "model.layers.0.block_sparse_moe.experts.98.w2", "model.layers.0.block_sparse_moe.experts.99.w2", "model.layers.0.block_sparse_moe.experts.100.w2", "model.layers.0.block_sparse_moe.experts.101.w2", "model.layers.0.block_sparse_moe.experts.102.w2", "model.layers.0.block_sparse_moe.experts.103.w2", "model.layers.0.block_sparse_moe.experts.104.w2", "model.layers.0.block_sparse_moe.experts.105.w2", "model.layers.0.block_sparse_moe.experts.106.w2", "model.layers.0.block_sparse_moe.experts.107.w2", "model.layers.0.block_sparse_moe.experts.108.w2", "model.layers.0.block_sparse_moe.experts.109.w2", "model.layers.0.block_sparse_moe.experts.110.w2", "model.layers.0.block_sparse_moe.experts.111.w2", "model.layers.0.block_sparse_moe.experts.112.w2", "model.layers.0.block_sparse_moe.experts.113.w2", "model.layers.0.block_sparse_moe.experts.114.w2", "model.layers.0.block_sparse_moe.experts.115.w2", "model.layers.0.block_sparse_moe.experts.116.w2", "model.layers.0.block_sparse_moe.experts.117.w2", "model.layers.0.block_sparse_moe.experts.118.w2", "model.layers.0.block_sparse_moe.experts.119.w2", "model.layers.0.block_sparse_moe.experts.120.w2", "model.layers.0.block_sparse_moe.experts.121.w2", "model.layers.0.block_sparse_moe.experts.122.w2", "model.layers.0.block_sparse_moe.experts.123.w2", "model.layers.0.block_sparse_moe.experts.124.w2", "model.layers.0.block_sparse_moe.experts.125.w2", "model.layers.0.block_sparse_moe.experts.126.w2", "model.layers.0.block_sparse_moe.experts.127.w2", "model.layers.0.block_sparse_moe.experts.128.w2", "model.layers.0.block_sparse_moe.experts.129.w2", "model.layers.0.block_sparse_moe.experts.130.w2", "model.layers.0.block_sparse_moe.experts.131.w2", "model.layers.0.block_sparse_moe.experts.132.w2", "model.layers.0.block_sparse_moe.experts.133.w2", "model.layers.0.block_sparse_moe.experts.134.w2", "model.layers.0.block_sparse_moe.experts.135.w2", "model.layers.0.block_sparse_moe.experts.136.w2", "model.layers.0.block_sparse_moe.experts.137.w2", "model.layers.0.block_sparse_moe.experts.138.w2", "model.layers.0.block_sparse_moe.experts.139.w2", "model.layers.0.block_sparse_moe.experts.140.w2", "model.layers.0.block_sparse_moe.experts.141.w2", "model.layers.0.block_sparse_moe.experts.142.w2", "model.layers.0.block_sparse_moe.experts.143.w2", "model.layers.0.block_sparse_moe.experts.144.w2", "model.layers.0.block_sparse_moe.experts.145.w2", "model.layers.0.block_sparse_moe.experts.146.w2", "model.layers.0.block_sparse_moe.experts.147.w2", "model.layers.0.block_sparse_moe.experts.148.w2", "model.layers.0.block_sparse_moe.experts.149.w2", "model.layers.0.block_sparse_moe.experts.150.w2", "model.layers.0.block_sparse_moe.experts.151.w2", "model.layers.0.block_sparse_moe.experts.152.w2", "model.layers.0.block_sparse_moe.experts.153.w2", "model.layers.0.block_sparse_moe.experts.154.w2", "model.layers.0.block_sparse_moe.experts.155.w2", "model.layers.0.block_sparse_moe.experts.156.w2", "model.layers.0.block_sparse_moe.experts.157.w2", "model.layers.0.block_sparse_moe.experts.158.w2", "model.layers.0.block_sparse_moe.experts.159.w2", "model.layers.0.block_sparse_moe.experts.160.w2", "model.layers.0.block_sparse_moe.experts.161.w2", "model.layers.0.block_sparse_moe.experts.162.w2", "model.layers.0.block_sparse_moe.experts.163.w2", "model.layers.0.block_sparse_moe.experts.164.w2", "model.layers.0.block_sparse_moe.experts.165.w2", "model.layers.0.block_sparse_moe.experts.166.w2", "model.layers.0.block_sparse_moe.experts.167.w2", "model.layers.0.block_sparse_moe.experts.168.w2", "model.layers.0.block_sparse_moe.experts.169.w2", "model.layers.0.block_sparse_moe.experts.170.w2", "model.layers.0.block_sparse_moe.experts.171.w2", "model.layers.0.block_sparse_moe.experts.172.w2", "model.layers.0.block_sparse_moe.experts.173.w2", "model.layers.0.block_sparse_moe.experts.174.w2", "model.layers.0.block_sparse_moe.experts.175.w2", "model.layers.0.block_sparse_moe.experts.176.w2", "model.layers.0.block_sparse_moe.experts.177.w2", "model.layers.0.block_sparse_moe.experts.178.w2", "model.layers.0.block_sparse_moe.experts.179.w2", "model.layers.0.block_sparse_moe.experts.180.w2", "model.layers.0.block_sparse_moe.experts.181.w2", "model.layers.0.block_sparse_moe.experts.182.w2", "model.layers.0.block_sparse_moe.experts.183.w2", "model.layers.0.block_sparse_moe.experts.184.w2", "model.layers.0.block_sparse_moe.experts.185.w2", "model.layers.0.block_sparse_moe.experts.186.w2", "model.layers.0.block_sparse_moe.experts.187.w2", "model.layers.0.block_sparse_moe.experts.188.w2", "model.layers.0.block_sparse_moe.experts.189.w2", "model.layers.0.block_sparse_moe.experts.190.w2", "model.layers.0.block_sparse_moe.experts.191.w2", "model.layers.0.block_sparse_moe.experts.192.w2", "model.layers.0.block_sparse_moe.experts.193.w2", "model.layers.0.block_sparse_moe.experts.194.w2", "model.layers.0.block_sparse_moe.experts.195.w2", "model.layers.0.block_sparse_moe.experts.196.w2", "model.layers.0.block_sparse_moe.experts.197.w2", "model.layers.0.block_sparse_moe.experts.198.w2", "model.layers.0.block_sparse_moe.experts.199.w2", "model.layers.0.block_sparse_moe.experts.200.w2", "model.layers.0.block_sparse_moe.experts.201.w2", "model.layers.0.block_sparse_moe.experts.202.w2", "model.layers.0.block_sparse_moe.experts.203.w2", "model.layers.0.block_sparse_moe.experts.204.w2", "model.layers.0.block_sparse_moe.experts.205.w2", "model.layers.0.block_sparse_moe.experts.206.w2", "model.layers.0.block_sparse_moe.experts.207.w2", "model.layers.0.block_sparse_moe.experts.208.w2", "model.layers.0.block_sparse_moe.experts.209.w2", "model.layers.0.block_sparse_moe.experts.210.w2", "model.layers.0.block_sparse_moe.experts.211.w2", "model.layers.0.block_sparse_moe.experts.212.w2", "model.layers.0.block_sparse_moe.experts.213.w2", "model.layers.0.block_sparse_moe.experts.214.w2", "model.layers.0.block_sparse_moe.experts.215.w2", "model.layers.0.block_sparse_moe.experts.216.w2", "model.layers.0.block_sparse_moe.experts.217.w2", "model.layers.0.block_sparse_moe.experts.218.w2", "model.layers.0.block_sparse_moe.experts.219.w2", "model.layers.0.block_sparse_moe.experts.220.w2", "model.layers.0.block_sparse_moe.experts.221.w2", "model.layers.0.block_sparse_moe.experts.222.w2", "model.layers.0.block_sparse_moe.experts.223.w2", "model.layers.0.block_sparse_moe.experts.224.w2", "model.layers.0.block_sparse_moe.experts.225.w2", "model.layers.0.block_sparse_moe.experts.226.w2", "model.layers.0.block_sparse_moe.experts.227.w2", "model.layers.0.block_sparse_moe.experts.228.w2", "model.layers.0.block_sparse_moe.experts.229.w2", "model.layers.0.block_sparse_moe.experts.230.w2", "model.layers.0.block_sparse_moe.experts.231.w2", "model.layers.0.block_sparse_moe.experts.232.w2", "model.layers.0.block_sparse_moe.experts.233.w2", "model.layers.0.block_sparse_moe.experts.234.w2", "model.layers.0.block_sparse_moe.experts.235.w2", "model.layers.0.block_sparse_moe.experts.236.w2", "model.layers.0.block_sparse_moe.experts.237.w2", "model.layers.0.block_sparse_moe.experts.238.w2", "model.layers.0.block_sparse_moe.experts.239.w2", "model.layers.0.block_sparse_moe.experts.240.w2", "model.layers.0.block_sparse_moe.experts.241.w2", "model.layers.0.block_sparse_moe.experts.242.w2", "model.layers.0.block_sparse_moe.experts.243.w2", "model.layers.0.block_sparse_moe.experts.244.w2", "model.layers.0.block_sparse_moe.experts.245.w2", "model.layers.0.block_sparse_moe.experts.246.w2", "model.layers.0.block_sparse_moe.experts.247.w2", "model.layers.0.block_sparse_moe.experts.248.w2", "model.layers.0.block_sparse_moe.experts.249.w2", "model.layers.0.block_sparse_moe.experts.250.w2", "model.layers.0.block_sparse_moe.experts.251.w2", "model.layers.0.block_sparse_moe.experts.252.w2", "model.layers.0.block_sparse_moe.experts.253.w2", "model.layers.0.block_sparse_moe.experts.254.w2", "model.layers.0.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.005213822424411763, "dbits": 3623878656 } ] }, { "idx": 2, "layers": [ "model.layers.1.self_attn.q_proj", "model.layers.1.self_attn.k_proj", "model.layers.1.self_attn.v_proj", "model.layers.1.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0005112733691930993, "dbits": 44040192 } ] }, { "idx": 3, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w1", "model.layers.1.block_sparse_moe.experts.1.w1", "model.layers.1.block_sparse_moe.experts.2.w1", "model.layers.1.block_sparse_moe.experts.3.w1", "model.layers.1.block_sparse_moe.experts.4.w1", "model.layers.1.block_sparse_moe.experts.5.w1", "model.layers.1.block_sparse_moe.experts.6.w1", "model.layers.1.block_sparse_moe.experts.7.w1", "model.layers.1.block_sparse_moe.experts.8.w1", "model.layers.1.block_sparse_moe.experts.9.w1", "model.layers.1.block_sparse_moe.experts.10.w1", "model.layers.1.block_sparse_moe.experts.11.w1", "model.layers.1.block_sparse_moe.experts.12.w1", "model.layers.1.block_sparse_moe.experts.13.w1", "model.layers.1.block_sparse_moe.experts.14.w1", "model.layers.1.block_sparse_moe.experts.15.w1", "model.layers.1.block_sparse_moe.experts.16.w1", "model.layers.1.block_sparse_moe.experts.17.w1", "model.layers.1.block_sparse_moe.experts.18.w1", "model.layers.1.block_sparse_moe.experts.19.w1", "model.layers.1.block_sparse_moe.experts.20.w1", "model.layers.1.block_sparse_moe.experts.21.w1", "model.layers.1.block_sparse_moe.experts.22.w1", "model.layers.1.block_sparse_moe.experts.23.w1", "model.layers.1.block_sparse_moe.experts.24.w1", "model.layers.1.block_sparse_moe.experts.25.w1", "model.layers.1.block_sparse_moe.experts.26.w1", "model.layers.1.block_sparse_moe.experts.27.w1", "model.layers.1.block_sparse_moe.experts.28.w1", "model.layers.1.block_sparse_moe.experts.29.w1", "model.layers.1.block_sparse_moe.experts.30.w1", "model.layers.1.block_sparse_moe.experts.31.w1", "model.layers.1.block_sparse_moe.experts.32.w1", "model.layers.1.block_sparse_moe.experts.33.w1", "model.layers.1.block_sparse_moe.experts.34.w1", "model.layers.1.block_sparse_moe.experts.35.w1", "model.layers.1.block_sparse_moe.experts.36.w1", "model.layers.1.block_sparse_moe.experts.37.w1", "model.layers.1.block_sparse_moe.experts.38.w1", "model.layers.1.block_sparse_moe.experts.39.w1", "model.layers.1.block_sparse_moe.experts.40.w1", "model.layers.1.block_sparse_moe.experts.41.w1", "model.layers.1.block_sparse_moe.experts.42.w1", "model.layers.1.block_sparse_moe.experts.43.w1", "model.layers.1.block_sparse_moe.experts.44.w1", "model.layers.1.block_sparse_moe.experts.45.w1", "model.layers.1.block_sparse_moe.experts.46.w1", "model.layers.1.block_sparse_moe.experts.47.w1", "model.layers.1.block_sparse_moe.experts.48.w1", "model.layers.1.block_sparse_moe.experts.49.w1", "model.layers.1.block_sparse_moe.experts.50.w1", "model.layers.1.block_sparse_moe.experts.51.w1", "model.layers.1.block_sparse_moe.experts.52.w1", "model.layers.1.block_sparse_moe.experts.53.w1", "model.layers.1.block_sparse_moe.experts.54.w1", "model.layers.1.block_sparse_moe.experts.55.w1", "model.layers.1.block_sparse_moe.experts.56.w1", "model.layers.1.block_sparse_moe.experts.57.w1", "model.layers.1.block_sparse_moe.experts.58.w1", "model.layers.1.block_sparse_moe.experts.59.w1", "model.layers.1.block_sparse_moe.experts.60.w1", "model.layers.1.block_sparse_moe.experts.61.w1", "model.layers.1.block_sparse_moe.experts.62.w1", "model.layers.1.block_sparse_moe.experts.63.w1", "model.layers.1.block_sparse_moe.experts.64.w1", "model.layers.1.block_sparse_moe.experts.65.w1", "model.layers.1.block_sparse_moe.experts.66.w1", "model.layers.1.block_sparse_moe.experts.67.w1", "model.layers.1.block_sparse_moe.experts.68.w1", "model.layers.1.block_sparse_moe.experts.69.w1", "model.layers.1.block_sparse_moe.experts.70.w1", "model.layers.1.block_sparse_moe.experts.71.w1", "model.layers.1.block_sparse_moe.experts.72.w1", "model.layers.1.block_sparse_moe.experts.73.w1", "model.layers.1.block_sparse_moe.experts.74.w1", "model.layers.1.block_sparse_moe.experts.75.w1", "model.layers.1.block_sparse_moe.experts.76.w1", "model.layers.1.block_sparse_moe.experts.77.w1", "model.layers.1.block_sparse_moe.experts.78.w1", "model.layers.1.block_sparse_moe.experts.79.w1", "model.layers.1.block_sparse_moe.experts.80.w1", "model.layers.1.block_sparse_moe.experts.81.w1", "model.layers.1.block_sparse_moe.experts.82.w1", "model.layers.1.block_sparse_moe.experts.83.w1", "model.layers.1.block_sparse_moe.experts.84.w1", "model.layers.1.block_sparse_moe.experts.85.w1", "model.layers.1.block_sparse_moe.experts.86.w1", "model.layers.1.block_sparse_moe.experts.87.w1", "model.layers.1.block_sparse_moe.experts.88.w1", "model.layers.1.block_sparse_moe.experts.89.w1", "model.layers.1.block_sparse_moe.experts.90.w1", "model.layers.1.block_sparse_moe.experts.91.w1", "model.layers.1.block_sparse_moe.experts.92.w1", "model.layers.1.block_sparse_moe.experts.93.w1", "model.layers.1.block_sparse_moe.experts.94.w1", "model.layers.1.block_sparse_moe.experts.95.w1", "model.layers.1.block_sparse_moe.experts.96.w1", "model.layers.1.block_sparse_moe.experts.97.w1", "model.layers.1.block_sparse_moe.experts.98.w1", "model.layers.1.block_sparse_moe.experts.99.w1", "model.layers.1.block_sparse_moe.experts.100.w1", "model.layers.1.block_sparse_moe.experts.101.w1", "model.layers.1.block_sparse_moe.experts.102.w1", "model.layers.1.block_sparse_moe.experts.103.w1", "model.layers.1.block_sparse_moe.experts.104.w1", "model.layers.1.block_sparse_moe.experts.105.w1", "model.layers.1.block_sparse_moe.experts.106.w1", "model.layers.1.block_sparse_moe.experts.107.w1", "model.layers.1.block_sparse_moe.experts.108.w1", "model.layers.1.block_sparse_moe.experts.109.w1", "model.layers.1.block_sparse_moe.experts.110.w1", "model.layers.1.block_sparse_moe.experts.111.w1", "model.layers.1.block_sparse_moe.experts.112.w1", "model.layers.1.block_sparse_moe.experts.113.w1", "model.layers.1.block_sparse_moe.experts.114.w1", "model.layers.1.block_sparse_moe.experts.115.w1", "model.layers.1.block_sparse_moe.experts.116.w1", "model.layers.1.block_sparse_moe.experts.117.w1", "model.layers.1.block_sparse_moe.experts.118.w1", "model.layers.1.block_sparse_moe.experts.119.w1", "model.layers.1.block_sparse_moe.experts.120.w1", "model.layers.1.block_sparse_moe.experts.121.w1", "model.layers.1.block_sparse_moe.experts.122.w1", "model.layers.1.block_sparse_moe.experts.123.w1", "model.layers.1.block_sparse_moe.experts.124.w1", "model.layers.1.block_sparse_moe.experts.125.w1", "model.layers.1.block_sparse_moe.experts.126.w1", "model.layers.1.block_sparse_moe.experts.127.w1", "model.layers.1.block_sparse_moe.experts.128.w1", "model.layers.1.block_sparse_moe.experts.129.w1", "model.layers.1.block_sparse_moe.experts.130.w1", "model.layers.1.block_sparse_moe.experts.131.w1", "model.layers.1.block_sparse_moe.experts.132.w1", "model.layers.1.block_sparse_moe.experts.133.w1", "model.layers.1.block_sparse_moe.experts.134.w1", "model.layers.1.block_sparse_moe.experts.135.w1", "model.layers.1.block_sparse_moe.experts.136.w1", "model.layers.1.block_sparse_moe.experts.137.w1", "model.layers.1.block_sparse_moe.experts.138.w1", "model.layers.1.block_sparse_moe.experts.139.w1", "model.layers.1.block_sparse_moe.experts.140.w1", "model.layers.1.block_sparse_moe.experts.141.w1", "model.layers.1.block_sparse_moe.experts.142.w1", "model.layers.1.block_sparse_moe.experts.143.w1", "model.layers.1.block_sparse_moe.experts.144.w1", "model.layers.1.block_sparse_moe.experts.145.w1", "model.layers.1.block_sparse_moe.experts.146.w1", "model.layers.1.block_sparse_moe.experts.147.w1", "model.layers.1.block_sparse_moe.experts.148.w1", "model.layers.1.block_sparse_moe.experts.149.w1", "model.layers.1.block_sparse_moe.experts.150.w1", "model.layers.1.block_sparse_moe.experts.151.w1", "model.layers.1.block_sparse_moe.experts.152.w1", "model.layers.1.block_sparse_moe.experts.153.w1", "model.layers.1.block_sparse_moe.experts.154.w1", "model.layers.1.block_sparse_moe.experts.155.w1", "model.layers.1.block_sparse_moe.experts.156.w1", "model.layers.1.block_sparse_moe.experts.157.w1", "model.layers.1.block_sparse_moe.experts.158.w1", "model.layers.1.block_sparse_moe.experts.159.w1", "model.layers.1.block_sparse_moe.experts.160.w1", "model.layers.1.block_sparse_moe.experts.161.w1", "model.layers.1.block_sparse_moe.experts.162.w1", "model.layers.1.block_sparse_moe.experts.163.w1", "model.layers.1.block_sparse_moe.experts.164.w1", "model.layers.1.block_sparse_moe.experts.165.w1", "model.layers.1.block_sparse_moe.experts.166.w1", "model.layers.1.block_sparse_moe.experts.167.w1", "model.layers.1.block_sparse_moe.experts.168.w1", "model.layers.1.block_sparse_moe.experts.169.w1", "model.layers.1.block_sparse_moe.experts.170.w1", "model.layers.1.block_sparse_moe.experts.171.w1", "model.layers.1.block_sparse_moe.experts.172.w1", "model.layers.1.block_sparse_moe.experts.173.w1", "model.layers.1.block_sparse_moe.experts.174.w1", "model.layers.1.block_sparse_moe.experts.175.w1", "model.layers.1.block_sparse_moe.experts.176.w1", "model.layers.1.block_sparse_moe.experts.177.w1", "model.layers.1.block_sparse_moe.experts.178.w1", "model.layers.1.block_sparse_moe.experts.179.w1", "model.layers.1.block_sparse_moe.experts.180.w1", "model.layers.1.block_sparse_moe.experts.181.w1", "model.layers.1.block_sparse_moe.experts.182.w1", "model.layers.1.block_sparse_moe.experts.183.w1", "model.layers.1.block_sparse_moe.experts.184.w1", "model.layers.1.block_sparse_moe.experts.185.w1", "model.layers.1.block_sparse_moe.experts.186.w1", "model.layers.1.block_sparse_moe.experts.187.w1", "model.layers.1.block_sparse_moe.experts.188.w1", "model.layers.1.block_sparse_moe.experts.189.w1", "model.layers.1.block_sparse_moe.experts.190.w1", "model.layers.1.block_sparse_moe.experts.191.w1", "model.layers.1.block_sparse_moe.experts.192.w1", "model.layers.1.block_sparse_moe.experts.193.w1", "model.layers.1.block_sparse_moe.experts.194.w1", "model.layers.1.block_sparse_moe.experts.195.w1", "model.layers.1.block_sparse_moe.experts.196.w1", "model.layers.1.block_sparse_moe.experts.197.w1", "model.layers.1.block_sparse_moe.experts.198.w1", "model.layers.1.block_sparse_moe.experts.199.w1", "model.layers.1.block_sparse_moe.experts.200.w1", "model.layers.1.block_sparse_moe.experts.201.w1", "model.layers.1.block_sparse_moe.experts.202.w1", "model.layers.1.block_sparse_moe.experts.203.w1", "model.layers.1.block_sparse_moe.experts.204.w1", "model.layers.1.block_sparse_moe.experts.205.w1", "model.layers.1.block_sparse_moe.experts.206.w1", "model.layers.1.block_sparse_moe.experts.207.w1", "model.layers.1.block_sparse_moe.experts.208.w1", "model.layers.1.block_sparse_moe.experts.209.w1", "model.layers.1.block_sparse_moe.experts.210.w1", "model.layers.1.block_sparse_moe.experts.211.w1", "model.layers.1.block_sparse_moe.experts.212.w1", "model.layers.1.block_sparse_moe.experts.213.w1", "model.layers.1.block_sparse_moe.experts.214.w1", "model.layers.1.block_sparse_moe.experts.215.w1", "model.layers.1.block_sparse_moe.experts.216.w1", "model.layers.1.block_sparse_moe.experts.217.w1", "model.layers.1.block_sparse_moe.experts.218.w1", "model.layers.1.block_sparse_moe.experts.219.w1", "model.layers.1.block_sparse_moe.experts.220.w1", "model.layers.1.block_sparse_moe.experts.221.w1", "model.layers.1.block_sparse_moe.experts.222.w1", "model.layers.1.block_sparse_moe.experts.223.w1", "model.layers.1.block_sparse_moe.experts.224.w1", "model.layers.1.block_sparse_moe.experts.225.w1", "model.layers.1.block_sparse_moe.experts.226.w1", "model.layers.1.block_sparse_moe.experts.227.w1", "model.layers.1.block_sparse_moe.experts.228.w1", "model.layers.1.block_sparse_moe.experts.229.w1", "model.layers.1.block_sparse_moe.experts.230.w1", "model.layers.1.block_sparse_moe.experts.231.w1", "model.layers.1.block_sparse_moe.experts.232.w1", "model.layers.1.block_sparse_moe.experts.233.w1", "model.layers.1.block_sparse_moe.experts.234.w1", "model.layers.1.block_sparse_moe.experts.235.w1", "model.layers.1.block_sparse_moe.experts.236.w1", "model.layers.1.block_sparse_moe.experts.237.w1", "model.layers.1.block_sparse_moe.experts.238.w1", "model.layers.1.block_sparse_moe.experts.239.w1", "model.layers.1.block_sparse_moe.experts.240.w1", "model.layers.1.block_sparse_moe.experts.241.w1", "model.layers.1.block_sparse_moe.experts.242.w1", "model.layers.1.block_sparse_moe.experts.243.w1", "model.layers.1.block_sparse_moe.experts.244.w1", "model.layers.1.block_sparse_moe.experts.245.w1", "model.layers.1.block_sparse_moe.experts.246.w1", "model.layers.1.block_sparse_moe.experts.247.w1", "model.layers.1.block_sparse_moe.experts.248.w1", "model.layers.1.block_sparse_moe.experts.249.w1", "model.layers.1.block_sparse_moe.experts.250.w1", "model.layers.1.block_sparse_moe.experts.251.w1", "model.layers.1.block_sparse_moe.experts.252.w1", "model.layers.1.block_sparse_moe.experts.253.w1", "model.layers.1.block_sparse_moe.experts.254.w1", "model.layers.1.block_sparse_moe.experts.255.w1", "model.layers.1.block_sparse_moe.experts.0.w3", "model.layers.1.block_sparse_moe.experts.1.w3", "model.layers.1.block_sparse_moe.experts.2.w3", "model.layers.1.block_sparse_moe.experts.3.w3", "model.layers.1.block_sparse_moe.experts.4.w3", "model.layers.1.block_sparse_moe.experts.5.w3", "model.layers.1.block_sparse_moe.experts.6.w3", "model.layers.1.block_sparse_moe.experts.7.w3", "model.layers.1.block_sparse_moe.experts.8.w3", "model.layers.1.block_sparse_moe.experts.9.w3", "model.layers.1.block_sparse_moe.experts.10.w3", "model.layers.1.block_sparse_moe.experts.11.w3", "model.layers.1.block_sparse_moe.experts.12.w3", "model.layers.1.block_sparse_moe.experts.13.w3", "model.layers.1.block_sparse_moe.experts.14.w3", "model.layers.1.block_sparse_moe.experts.15.w3", "model.layers.1.block_sparse_moe.experts.16.w3", "model.layers.1.block_sparse_moe.experts.17.w3", "model.layers.1.block_sparse_moe.experts.18.w3", "model.layers.1.block_sparse_moe.experts.19.w3", "model.layers.1.block_sparse_moe.experts.20.w3", "model.layers.1.block_sparse_moe.experts.21.w3", "model.layers.1.block_sparse_moe.experts.22.w3", "model.layers.1.block_sparse_moe.experts.23.w3", "model.layers.1.block_sparse_moe.experts.24.w3", "model.layers.1.block_sparse_moe.experts.25.w3", "model.layers.1.block_sparse_moe.experts.26.w3", "model.layers.1.block_sparse_moe.experts.27.w3", "model.layers.1.block_sparse_moe.experts.28.w3", "model.layers.1.block_sparse_moe.experts.29.w3", "model.layers.1.block_sparse_moe.experts.30.w3", "model.layers.1.block_sparse_moe.experts.31.w3", "model.layers.1.block_sparse_moe.experts.32.w3", "model.layers.1.block_sparse_moe.experts.33.w3", "model.layers.1.block_sparse_moe.experts.34.w3", "model.layers.1.block_sparse_moe.experts.35.w3", "model.layers.1.block_sparse_moe.experts.36.w3", "model.layers.1.block_sparse_moe.experts.37.w3", "model.layers.1.block_sparse_moe.experts.38.w3", "model.layers.1.block_sparse_moe.experts.39.w3", "model.layers.1.block_sparse_moe.experts.40.w3", "model.layers.1.block_sparse_moe.experts.41.w3", "model.layers.1.block_sparse_moe.experts.42.w3", "model.layers.1.block_sparse_moe.experts.43.w3", "model.layers.1.block_sparse_moe.experts.44.w3", "model.layers.1.block_sparse_moe.experts.45.w3", "model.layers.1.block_sparse_moe.experts.46.w3", "model.layers.1.block_sparse_moe.experts.47.w3", "model.layers.1.block_sparse_moe.experts.48.w3", "model.layers.1.block_sparse_moe.experts.49.w3", "model.layers.1.block_sparse_moe.experts.50.w3", "model.layers.1.block_sparse_moe.experts.51.w3", "model.layers.1.block_sparse_moe.experts.52.w3", "model.layers.1.block_sparse_moe.experts.53.w3", "model.layers.1.block_sparse_moe.experts.54.w3", "model.layers.1.block_sparse_moe.experts.55.w3", "model.layers.1.block_sparse_moe.experts.56.w3", "model.layers.1.block_sparse_moe.experts.57.w3", "model.layers.1.block_sparse_moe.experts.58.w3", "model.layers.1.block_sparse_moe.experts.59.w3", "model.layers.1.block_sparse_moe.experts.60.w3", "model.layers.1.block_sparse_moe.experts.61.w3", "model.layers.1.block_sparse_moe.experts.62.w3", "model.layers.1.block_sparse_moe.experts.63.w3", "model.layers.1.block_sparse_moe.experts.64.w3", "model.layers.1.block_sparse_moe.experts.65.w3", "model.layers.1.block_sparse_moe.experts.66.w3", "model.layers.1.block_sparse_moe.experts.67.w3", "model.layers.1.block_sparse_moe.experts.68.w3", "model.layers.1.block_sparse_moe.experts.69.w3", "model.layers.1.block_sparse_moe.experts.70.w3", "model.layers.1.block_sparse_moe.experts.71.w3", "model.layers.1.block_sparse_moe.experts.72.w3", "model.layers.1.block_sparse_moe.experts.73.w3", "model.layers.1.block_sparse_moe.experts.74.w3", "model.layers.1.block_sparse_moe.experts.75.w3", "model.layers.1.block_sparse_moe.experts.76.w3", "model.layers.1.block_sparse_moe.experts.77.w3", "model.layers.1.block_sparse_moe.experts.78.w3", "model.layers.1.block_sparse_moe.experts.79.w3", "model.layers.1.block_sparse_moe.experts.80.w3", "model.layers.1.block_sparse_moe.experts.81.w3", "model.layers.1.block_sparse_moe.experts.82.w3", "model.layers.1.block_sparse_moe.experts.83.w3", "model.layers.1.block_sparse_moe.experts.84.w3", "model.layers.1.block_sparse_moe.experts.85.w3", "model.layers.1.block_sparse_moe.experts.86.w3", "model.layers.1.block_sparse_moe.experts.87.w3", "model.layers.1.block_sparse_moe.experts.88.w3", "model.layers.1.block_sparse_moe.experts.89.w3", "model.layers.1.block_sparse_moe.experts.90.w3", "model.layers.1.block_sparse_moe.experts.91.w3", "model.layers.1.block_sparse_moe.experts.92.w3", "model.layers.1.block_sparse_moe.experts.93.w3", "model.layers.1.block_sparse_moe.experts.94.w3", "model.layers.1.block_sparse_moe.experts.95.w3", "model.layers.1.block_sparse_moe.experts.96.w3", "model.layers.1.block_sparse_moe.experts.97.w3", "model.layers.1.block_sparse_moe.experts.98.w3", "model.layers.1.block_sparse_moe.experts.99.w3", "model.layers.1.block_sparse_moe.experts.100.w3", "model.layers.1.block_sparse_moe.experts.101.w3", "model.layers.1.block_sparse_moe.experts.102.w3", "model.layers.1.block_sparse_moe.experts.103.w3", "model.layers.1.block_sparse_moe.experts.104.w3", "model.layers.1.block_sparse_moe.experts.105.w3", "model.layers.1.block_sparse_moe.experts.106.w3", "model.layers.1.block_sparse_moe.experts.107.w3", "model.layers.1.block_sparse_moe.experts.108.w3", "model.layers.1.block_sparse_moe.experts.109.w3", "model.layers.1.block_sparse_moe.experts.110.w3", "model.layers.1.block_sparse_moe.experts.111.w3", "model.layers.1.block_sparse_moe.experts.112.w3", "model.layers.1.block_sparse_moe.experts.113.w3", "model.layers.1.block_sparse_moe.experts.114.w3", "model.layers.1.block_sparse_moe.experts.115.w3", "model.layers.1.block_sparse_moe.experts.116.w3", "model.layers.1.block_sparse_moe.experts.117.w3", "model.layers.1.block_sparse_moe.experts.118.w3", "model.layers.1.block_sparse_moe.experts.119.w3", "model.layers.1.block_sparse_moe.experts.120.w3", "model.layers.1.block_sparse_moe.experts.121.w3", "model.layers.1.block_sparse_moe.experts.122.w3", "model.layers.1.block_sparse_moe.experts.123.w3", "model.layers.1.block_sparse_moe.experts.124.w3", "model.layers.1.block_sparse_moe.experts.125.w3", "model.layers.1.block_sparse_moe.experts.126.w3", "model.layers.1.block_sparse_moe.experts.127.w3", "model.layers.1.block_sparse_moe.experts.128.w3", "model.layers.1.block_sparse_moe.experts.129.w3", "model.layers.1.block_sparse_moe.experts.130.w3", "model.layers.1.block_sparse_moe.experts.131.w3", "model.layers.1.block_sparse_moe.experts.132.w3", "model.layers.1.block_sparse_moe.experts.133.w3", "model.layers.1.block_sparse_moe.experts.134.w3", "model.layers.1.block_sparse_moe.experts.135.w3", "model.layers.1.block_sparse_moe.experts.136.w3", "model.layers.1.block_sparse_moe.experts.137.w3", "model.layers.1.block_sparse_moe.experts.138.w3", "model.layers.1.block_sparse_moe.experts.139.w3", "model.layers.1.block_sparse_moe.experts.140.w3", "model.layers.1.block_sparse_moe.experts.141.w3", "model.layers.1.block_sparse_moe.experts.142.w3", "model.layers.1.block_sparse_moe.experts.143.w3", "model.layers.1.block_sparse_moe.experts.144.w3", "model.layers.1.block_sparse_moe.experts.145.w3", "model.layers.1.block_sparse_moe.experts.146.w3", "model.layers.1.block_sparse_moe.experts.147.w3", "model.layers.1.block_sparse_moe.experts.148.w3", "model.layers.1.block_sparse_moe.experts.149.w3", "model.layers.1.block_sparse_moe.experts.150.w3", "model.layers.1.block_sparse_moe.experts.151.w3", "model.layers.1.block_sparse_moe.experts.152.w3", "model.layers.1.block_sparse_moe.experts.153.w3", "model.layers.1.block_sparse_moe.experts.154.w3", "model.layers.1.block_sparse_moe.experts.155.w3", "model.layers.1.block_sparse_moe.experts.156.w3", "model.layers.1.block_sparse_moe.experts.157.w3", "model.layers.1.block_sparse_moe.experts.158.w3", "model.layers.1.block_sparse_moe.experts.159.w3", "model.layers.1.block_sparse_moe.experts.160.w3", "model.layers.1.block_sparse_moe.experts.161.w3", "model.layers.1.block_sparse_moe.experts.162.w3", "model.layers.1.block_sparse_moe.experts.163.w3", "model.layers.1.block_sparse_moe.experts.164.w3", "model.layers.1.block_sparse_moe.experts.165.w3", "model.layers.1.block_sparse_moe.experts.166.w3", "model.layers.1.block_sparse_moe.experts.167.w3", "model.layers.1.block_sparse_moe.experts.168.w3", "model.layers.1.block_sparse_moe.experts.169.w3", "model.layers.1.block_sparse_moe.experts.170.w3", "model.layers.1.block_sparse_moe.experts.171.w3", "model.layers.1.block_sparse_moe.experts.172.w3", "model.layers.1.block_sparse_moe.experts.173.w3", "model.layers.1.block_sparse_moe.experts.174.w3", "model.layers.1.block_sparse_moe.experts.175.w3", "model.layers.1.block_sparse_moe.experts.176.w3", "model.layers.1.block_sparse_moe.experts.177.w3", "model.layers.1.block_sparse_moe.experts.178.w3", "model.layers.1.block_sparse_moe.experts.179.w3", "model.layers.1.block_sparse_moe.experts.180.w3", "model.layers.1.block_sparse_moe.experts.181.w3", "model.layers.1.block_sparse_moe.experts.182.w3", "model.layers.1.block_sparse_moe.experts.183.w3", "model.layers.1.block_sparse_moe.experts.184.w3", "model.layers.1.block_sparse_moe.experts.185.w3", "model.layers.1.block_sparse_moe.experts.186.w3", "model.layers.1.block_sparse_moe.experts.187.w3", "model.layers.1.block_sparse_moe.experts.188.w3", "model.layers.1.block_sparse_moe.experts.189.w3", "model.layers.1.block_sparse_moe.experts.190.w3", "model.layers.1.block_sparse_moe.experts.191.w3", "model.layers.1.block_sparse_moe.experts.192.w3", "model.layers.1.block_sparse_moe.experts.193.w3", "model.layers.1.block_sparse_moe.experts.194.w3", "model.layers.1.block_sparse_moe.experts.195.w3", "model.layers.1.block_sparse_moe.experts.196.w3", "model.layers.1.block_sparse_moe.experts.197.w3", "model.layers.1.block_sparse_moe.experts.198.w3", "model.layers.1.block_sparse_moe.experts.199.w3", "model.layers.1.block_sparse_moe.experts.200.w3", "model.layers.1.block_sparse_moe.experts.201.w3", "model.layers.1.block_sparse_moe.experts.202.w3", "model.layers.1.block_sparse_moe.experts.203.w3", "model.layers.1.block_sparse_moe.experts.204.w3", "model.layers.1.block_sparse_moe.experts.205.w3", "model.layers.1.block_sparse_moe.experts.206.w3", "model.layers.1.block_sparse_moe.experts.207.w3", "model.layers.1.block_sparse_moe.experts.208.w3", "model.layers.1.block_sparse_moe.experts.209.w3", "model.layers.1.block_sparse_moe.experts.210.w3", "model.layers.1.block_sparse_moe.experts.211.w3", "model.layers.1.block_sparse_moe.experts.212.w3", "model.layers.1.block_sparse_moe.experts.213.w3", "model.layers.1.block_sparse_moe.experts.214.w3", "model.layers.1.block_sparse_moe.experts.215.w3", "model.layers.1.block_sparse_moe.experts.216.w3", "model.layers.1.block_sparse_moe.experts.217.w3", "model.layers.1.block_sparse_moe.experts.218.w3", "model.layers.1.block_sparse_moe.experts.219.w3", "model.layers.1.block_sparse_moe.experts.220.w3", "model.layers.1.block_sparse_moe.experts.221.w3", "model.layers.1.block_sparse_moe.experts.222.w3", "model.layers.1.block_sparse_moe.experts.223.w3", "model.layers.1.block_sparse_moe.experts.224.w3", "model.layers.1.block_sparse_moe.experts.225.w3", "model.layers.1.block_sparse_moe.experts.226.w3", "model.layers.1.block_sparse_moe.experts.227.w3", "model.layers.1.block_sparse_moe.experts.228.w3", "model.layers.1.block_sparse_moe.experts.229.w3", "model.layers.1.block_sparse_moe.experts.230.w3", "model.layers.1.block_sparse_moe.experts.231.w3", "model.layers.1.block_sparse_moe.experts.232.w3", "model.layers.1.block_sparse_moe.experts.233.w3", "model.layers.1.block_sparse_moe.experts.234.w3", "model.layers.1.block_sparse_moe.experts.235.w3", "model.layers.1.block_sparse_moe.experts.236.w3", "model.layers.1.block_sparse_moe.experts.237.w3", "model.layers.1.block_sparse_moe.experts.238.w3", "model.layers.1.block_sparse_moe.experts.239.w3", "model.layers.1.block_sparse_moe.experts.240.w3", "model.layers.1.block_sparse_moe.experts.241.w3", "model.layers.1.block_sparse_moe.experts.242.w3", "model.layers.1.block_sparse_moe.experts.243.w3", "model.layers.1.block_sparse_moe.experts.244.w3", "model.layers.1.block_sparse_moe.experts.245.w3", "model.layers.1.block_sparse_moe.experts.246.w3", "model.layers.1.block_sparse_moe.experts.247.w3", "model.layers.1.block_sparse_moe.experts.248.w3", "model.layers.1.block_sparse_moe.experts.249.w3", "model.layers.1.block_sparse_moe.experts.250.w3", "model.layers.1.block_sparse_moe.experts.251.w3", "model.layers.1.block_sparse_moe.experts.252.w3", "model.layers.1.block_sparse_moe.experts.253.w3", "model.layers.1.block_sparse_moe.experts.254.w3", "model.layers.1.block_sparse_moe.experts.255.w3", "model.layers.1.block_sparse_moe.experts.0.w2", "model.layers.1.block_sparse_moe.experts.1.w2", "model.layers.1.block_sparse_moe.experts.2.w2", "model.layers.1.block_sparse_moe.experts.3.w2", "model.layers.1.block_sparse_moe.experts.4.w2", "model.layers.1.block_sparse_moe.experts.5.w2", "model.layers.1.block_sparse_moe.experts.6.w2", "model.layers.1.block_sparse_moe.experts.7.w2", "model.layers.1.block_sparse_moe.experts.8.w2", "model.layers.1.block_sparse_moe.experts.9.w2", "model.layers.1.block_sparse_moe.experts.10.w2", "model.layers.1.block_sparse_moe.experts.11.w2", "model.layers.1.block_sparse_moe.experts.12.w2", "model.layers.1.block_sparse_moe.experts.13.w2", "model.layers.1.block_sparse_moe.experts.14.w2", "model.layers.1.block_sparse_moe.experts.15.w2", "model.layers.1.block_sparse_moe.experts.16.w2", "model.layers.1.block_sparse_moe.experts.17.w2", "model.layers.1.block_sparse_moe.experts.18.w2", "model.layers.1.block_sparse_moe.experts.19.w2", "model.layers.1.block_sparse_moe.experts.20.w2", "model.layers.1.block_sparse_moe.experts.21.w2", "model.layers.1.block_sparse_moe.experts.22.w2", "model.layers.1.block_sparse_moe.experts.23.w2", "model.layers.1.block_sparse_moe.experts.24.w2", "model.layers.1.block_sparse_moe.experts.25.w2", "model.layers.1.block_sparse_moe.experts.26.w2", "model.layers.1.block_sparse_moe.experts.27.w2", "model.layers.1.block_sparse_moe.experts.28.w2", "model.layers.1.block_sparse_moe.experts.29.w2", "model.layers.1.block_sparse_moe.experts.30.w2", "model.layers.1.block_sparse_moe.experts.31.w2", "model.layers.1.block_sparse_moe.experts.32.w2", "model.layers.1.block_sparse_moe.experts.33.w2", "model.layers.1.block_sparse_moe.experts.34.w2", "model.layers.1.block_sparse_moe.experts.35.w2", "model.layers.1.block_sparse_moe.experts.36.w2", "model.layers.1.block_sparse_moe.experts.37.w2", "model.layers.1.block_sparse_moe.experts.38.w2", "model.layers.1.block_sparse_moe.experts.39.w2", "model.layers.1.block_sparse_moe.experts.40.w2", "model.layers.1.block_sparse_moe.experts.41.w2", "model.layers.1.block_sparse_moe.experts.42.w2", "model.layers.1.block_sparse_moe.experts.43.w2", "model.layers.1.block_sparse_moe.experts.44.w2", "model.layers.1.block_sparse_moe.experts.45.w2", "model.layers.1.block_sparse_moe.experts.46.w2", "model.layers.1.block_sparse_moe.experts.47.w2", "model.layers.1.block_sparse_moe.experts.48.w2", "model.layers.1.block_sparse_moe.experts.49.w2", "model.layers.1.block_sparse_moe.experts.50.w2", "model.layers.1.block_sparse_moe.experts.51.w2", "model.layers.1.block_sparse_moe.experts.52.w2", "model.layers.1.block_sparse_moe.experts.53.w2", "model.layers.1.block_sparse_moe.experts.54.w2", "model.layers.1.block_sparse_moe.experts.55.w2", "model.layers.1.block_sparse_moe.experts.56.w2", "model.layers.1.block_sparse_moe.experts.57.w2", "model.layers.1.block_sparse_moe.experts.58.w2", "model.layers.1.block_sparse_moe.experts.59.w2", "model.layers.1.block_sparse_moe.experts.60.w2", "model.layers.1.block_sparse_moe.experts.61.w2", "model.layers.1.block_sparse_moe.experts.62.w2", "model.layers.1.block_sparse_moe.experts.63.w2", "model.layers.1.block_sparse_moe.experts.64.w2", "model.layers.1.block_sparse_moe.experts.65.w2", "model.layers.1.block_sparse_moe.experts.66.w2", "model.layers.1.block_sparse_moe.experts.67.w2", "model.layers.1.block_sparse_moe.experts.68.w2", "model.layers.1.block_sparse_moe.experts.69.w2", "model.layers.1.block_sparse_moe.experts.70.w2", "model.layers.1.block_sparse_moe.experts.71.w2", "model.layers.1.block_sparse_moe.experts.72.w2", "model.layers.1.block_sparse_moe.experts.73.w2", "model.layers.1.block_sparse_moe.experts.74.w2", "model.layers.1.block_sparse_moe.experts.75.w2", "model.layers.1.block_sparse_moe.experts.76.w2", "model.layers.1.block_sparse_moe.experts.77.w2", "model.layers.1.block_sparse_moe.experts.78.w2", "model.layers.1.block_sparse_moe.experts.79.w2", "model.layers.1.block_sparse_moe.experts.80.w2", "model.layers.1.block_sparse_moe.experts.81.w2", "model.layers.1.block_sparse_moe.experts.82.w2", "model.layers.1.block_sparse_moe.experts.83.w2", "model.layers.1.block_sparse_moe.experts.84.w2", "model.layers.1.block_sparse_moe.experts.85.w2", "model.layers.1.block_sparse_moe.experts.86.w2", "model.layers.1.block_sparse_moe.experts.87.w2", "model.layers.1.block_sparse_moe.experts.88.w2", "model.layers.1.block_sparse_moe.experts.89.w2", "model.layers.1.block_sparse_moe.experts.90.w2", "model.layers.1.block_sparse_moe.experts.91.w2", "model.layers.1.block_sparse_moe.experts.92.w2", "model.layers.1.block_sparse_moe.experts.93.w2", "model.layers.1.block_sparse_moe.experts.94.w2", "model.layers.1.block_sparse_moe.experts.95.w2", "model.layers.1.block_sparse_moe.experts.96.w2", "model.layers.1.block_sparse_moe.experts.97.w2", "model.layers.1.block_sparse_moe.experts.98.w2", "model.layers.1.block_sparse_moe.experts.99.w2", "model.layers.1.block_sparse_moe.experts.100.w2", "model.layers.1.block_sparse_moe.experts.101.w2", "model.layers.1.block_sparse_moe.experts.102.w2", "model.layers.1.block_sparse_moe.experts.103.w2", "model.layers.1.block_sparse_moe.experts.104.w2", "model.layers.1.block_sparse_moe.experts.105.w2", "model.layers.1.block_sparse_moe.experts.106.w2", "model.layers.1.block_sparse_moe.experts.107.w2", "model.layers.1.block_sparse_moe.experts.108.w2", "model.layers.1.block_sparse_moe.experts.109.w2", "model.layers.1.block_sparse_moe.experts.110.w2", "model.layers.1.block_sparse_moe.experts.111.w2", "model.layers.1.block_sparse_moe.experts.112.w2", "model.layers.1.block_sparse_moe.experts.113.w2", "model.layers.1.block_sparse_moe.experts.114.w2", "model.layers.1.block_sparse_moe.experts.115.w2", "model.layers.1.block_sparse_moe.experts.116.w2", "model.layers.1.block_sparse_moe.experts.117.w2", "model.layers.1.block_sparse_moe.experts.118.w2", "model.layers.1.block_sparse_moe.experts.119.w2", "model.layers.1.block_sparse_moe.experts.120.w2", "model.layers.1.block_sparse_moe.experts.121.w2", "model.layers.1.block_sparse_moe.experts.122.w2", "model.layers.1.block_sparse_moe.experts.123.w2", "model.layers.1.block_sparse_moe.experts.124.w2", "model.layers.1.block_sparse_moe.experts.125.w2", "model.layers.1.block_sparse_moe.experts.126.w2", "model.layers.1.block_sparse_moe.experts.127.w2", "model.layers.1.block_sparse_moe.experts.128.w2", "model.layers.1.block_sparse_moe.experts.129.w2", "model.layers.1.block_sparse_moe.experts.130.w2", "model.layers.1.block_sparse_moe.experts.131.w2", "model.layers.1.block_sparse_moe.experts.132.w2", "model.layers.1.block_sparse_moe.experts.133.w2", "model.layers.1.block_sparse_moe.experts.134.w2", "model.layers.1.block_sparse_moe.experts.135.w2", "model.layers.1.block_sparse_moe.experts.136.w2", "model.layers.1.block_sparse_moe.experts.137.w2", "model.layers.1.block_sparse_moe.experts.138.w2", "model.layers.1.block_sparse_moe.experts.139.w2", "model.layers.1.block_sparse_moe.experts.140.w2", "model.layers.1.block_sparse_moe.experts.141.w2", "model.layers.1.block_sparse_moe.experts.142.w2", "model.layers.1.block_sparse_moe.experts.143.w2", "model.layers.1.block_sparse_moe.experts.144.w2", "model.layers.1.block_sparse_moe.experts.145.w2", "model.layers.1.block_sparse_moe.experts.146.w2", "model.layers.1.block_sparse_moe.experts.147.w2", "model.layers.1.block_sparse_moe.experts.148.w2", "model.layers.1.block_sparse_moe.experts.149.w2", "model.layers.1.block_sparse_moe.experts.150.w2", "model.layers.1.block_sparse_moe.experts.151.w2", "model.layers.1.block_sparse_moe.experts.152.w2", "model.layers.1.block_sparse_moe.experts.153.w2", "model.layers.1.block_sparse_moe.experts.154.w2", "model.layers.1.block_sparse_moe.experts.155.w2", "model.layers.1.block_sparse_moe.experts.156.w2", "model.layers.1.block_sparse_moe.experts.157.w2", "model.layers.1.block_sparse_moe.experts.158.w2", "model.layers.1.block_sparse_moe.experts.159.w2", "model.layers.1.block_sparse_moe.experts.160.w2", "model.layers.1.block_sparse_moe.experts.161.w2", "model.layers.1.block_sparse_moe.experts.162.w2", "model.layers.1.block_sparse_moe.experts.163.w2", "model.layers.1.block_sparse_moe.experts.164.w2", "model.layers.1.block_sparse_moe.experts.165.w2", "model.layers.1.block_sparse_moe.experts.166.w2", "model.layers.1.block_sparse_moe.experts.167.w2", "model.layers.1.block_sparse_moe.experts.168.w2", "model.layers.1.block_sparse_moe.experts.169.w2", "model.layers.1.block_sparse_moe.experts.170.w2", "model.layers.1.block_sparse_moe.experts.171.w2", "model.layers.1.block_sparse_moe.experts.172.w2", "model.layers.1.block_sparse_moe.experts.173.w2", "model.layers.1.block_sparse_moe.experts.174.w2", "model.layers.1.block_sparse_moe.experts.175.w2", "model.layers.1.block_sparse_moe.experts.176.w2", "model.layers.1.block_sparse_moe.experts.177.w2", "model.layers.1.block_sparse_moe.experts.178.w2", "model.layers.1.block_sparse_moe.experts.179.w2", "model.layers.1.block_sparse_moe.experts.180.w2", "model.layers.1.block_sparse_moe.experts.181.w2", "model.layers.1.block_sparse_moe.experts.182.w2", "model.layers.1.block_sparse_moe.experts.183.w2", "model.layers.1.block_sparse_moe.experts.184.w2", "model.layers.1.block_sparse_moe.experts.185.w2", "model.layers.1.block_sparse_moe.experts.186.w2", "model.layers.1.block_sparse_moe.experts.187.w2", "model.layers.1.block_sparse_moe.experts.188.w2", "model.layers.1.block_sparse_moe.experts.189.w2", "model.layers.1.block_sparse_moe.experts.190.w2", "model.layers.1.block_sparse_moe.experts.191.w2", "model.layers.1.block_sparse_moe.experts.192.w2", "model.layers.1.block_sparse_moe.experts.193.w2", "model.layers.1.block_sparse_moe.experts.194.w2", "model.layers.1.block_sparse_moe.experts.195.w2", "model.layers.1.block_sparse_moe.experts.196.w2", "model.layers.1.block_sparse_moe.experts.197.w2", "model.layers.1.block_sparse_moe.experts.198.w2", "model.layers.1.block_sparse_moe.experts.199.w2", "model.layers.1.block_sparse_moe.experts.200.w2", "model.layers.1.block_sparse_moe.experts.201.w2", "model.layers.1.block_sparse_moe.experts.202.w2", "model.layers.1.block_sparse_moe.experts.203.w2", "model.layers.1.block_sparse_moe.experts.204.w2", "model.layers.1.block_sparse_moe.experts.205.w2", "model.layers.1.block_sparse_moe.experts.206.w2", "model.layers.1.block_sparse_moe.experts.207.w2", "model.layers.1.block_sparse_moe.experts.208.w2", "model.layers.1.block_sparse_moe.experts.209.w2", "model.layers.1.block_sparse_moe.experts.210.w2", "model.layers.1.block_sparse_moe.experts.211.w2", "model.layers.1.block_sparse_moe.experts.212.w2", "model.layers.1.block_sparse_moe.experts.213.w2", "model.layers.1.block_sparse_moe.experts.214.w2", "model.layers.1.block_sparse_moe.experts.215.w2", "model.layers.1.block_sparse_moe.experts.216.w2", "model.layers.1.block_sparse_moe.experts.217.w2", "model.layers.1.block_sparse_moe.experts.218.w2", "model.layers.1.block_sparse_moe.experts.219.w2", "model.layers.1.block_sparse_moe.experts.220.w2", "model.layers.1.block_sparse_moe.experts.221.w2", "model.layers.1.block_sparse_moe.experts.222.w2", "model.layers.1.block_sparse_moe.experts.223.w2", "model.layers.1.block_sparse_moe.experts.224.w2", "model.layers.1.block_sparse_moe.experts.225.w2", "model.layers.1.block_sparse_moe.experts.226.w2", "model.layers.1.block_sparse_moe.experts.227.w2", "model.layers.1.block_sparse_moe.experts.228.w2", "model.layers.1.block_sparse_moe.experts.229.w2", "model.layers.1.block_sparse_moe.experts.230.w2", "model.layers.1.block_sparse_moe.experts.231.w2", "model.layers.1.block_sparse_moe.experts.232.w2", "model.layers.1.block_sparse_moe.experts.233.w2", "model.layers.1.block_sparse_moe.experts.234.w2", "model.layers.1.block_sparse_moe.experts.235.w2", "model.layers.1.block_sparse_moe.experts.236.w2", "model.layers.1.block_sparse_moe.experts.237.w2", "model.layers.1.block_sparse_moe.experts.238.w2", "model.layers.1.block_sparse_moe.experts.239.w2", "model.layers.1.block_sparse_moe.experts.240.w2", "model.layers.1.block_sparse_moe.experts.241.w2", "model.layers.1.block_sparse_moe.experts.242.w2", "model.layers.1.block_sparse_moe.experts.243.w2", "model.layers.1.block_sparse_moe.experts.244.w2", "model.layers.1.block_sparse_moe.experts.245.w2", "model.layers.1.block_sparse_moe.experts.246.w2", "model.layers.1.block_sparse_moe.experts.247.w2", "model.layers.1.block_sparse_moe.experts.248.w2", "model.layers.1.block_sparse_moe.experts.249.w2", "model.layers.1.block_sparse_moe.experts.250.w2", "model.layers.1.block_sparse_moe.experts.251.w2", "model.layers.1.block_sparse_moe.experts.252.w2", "model.layers.1.block_sparse_moe.experts.253.w2", "model.layers.1.block_sparse_moe.experts.254.w2", "model.layers.1.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.010604437440633774, "dbits": 3623878656 } ] }, { "idx": 4, "layers": [ "model.layers.2.self_attn.q_proj", "model.layers.2.self_attn.k_proj", "model.layers.2.self_attn.v_proj", "model.layers.2.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006069594249129284, "dbits": 44040192 } ] }, { "idx": 5, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w1", "model.layers.2.block_sparse_moe.experts.1.w1", "model.layers.2.block_sparse_moe.experts.2.w1", "model.layers.2.block_sparse_moe.experts.3.w1", "model.layers.2.block_sparse_moe.experts.4.w1", "model.layers.2.block_sparse_moe.experts.5.w1", "model.layers.2.block_sparse_moe.experts.6.w1", "model.layers.2.block_sparse_moe.experts.7.w1", "model.layers.2.block_sparse_moe.experts.8.w1", "model.layers.2.block_sparse_moe.experts.9.w1", "model.layers.2.block_sparse_moe.experts.10.w1", "model.layers.2.block_sparse_moe.experts.11.w1", "model.layers.2.block_sparse_moe.experts.12.w1", "model.layers.2.block_sparse_moe.experts.13.w1", "model.layers.2.block_sparse_moe.experts.14.w1", "model.layers.2.block_sparse_moe.experts.15.w1", "model.layers.2.block_sparse_moe.experts.16.w1", "model.layers.2.block_sparse_moe.experts.17.w1", "model.layers.2.block_sparse_moe.experts.18.w1", "model.layers.2.block_sparse_moe.experts.19.w1", "model.layers.2.block_sparse_moe.experts.20.w1", "model.layers.2.block_sparse_moe.experts.21.w1", "model.layers.2.block_sparse_moe.experts.22.w1", "model.layers.2.block_sparse_moe.experts.23.w1", "model.layers.2.block_sparse_moe.experts.24.w1", "model.layers.2.block_sparse_moe.experts.25.w1", "model.layers.2.block_sparse_moe.experts.26.w1", "model.layers.2.block_sparse_moe.experts.27.w1", "model.layers.2.block_sparse_moe.experts.28.w1", "model.layers.2.block_sparse_moe.experts.29.w1", "model.layers.2.block_sparse_moe.experts.30.w1", "model.layers.2.block_sparse_moe.experts.31.w1", "model.layers.2.block_sparse_moe.experts.32.w1", "model.layers.2.block_sparse_moe.experts.33.w1", "model.layers.2.block_sparse_moe.experts.34.w1", "model.layers.2.block_sparse_moe.experts.35.w1", "model.layers.2.block_sparse_moe.experts.36.w1", "model.layers.2.block_sparse_moe.experts.37.w1", "model.layers.2.block_sparse_moe.experts.38.w1", "model.layers.2.block_sparse_moe.experts.39.w1", "model.layers.2.block_sparse_moe.experts.40.w1", "model.layers.2.block_sparse_moe.experts.41.w1", "model.layers.2.block_sparse_moe.experts.42.w1", "model.layers.2.block_sparse_moe.experts.43.w1", "model.layers.2.block_sparse_moe.experts.44.w1", "model.layers.2.block_sparse_moe.experts.45.w1", "model.layers.2.block_sparse_moe.experts.46.w1", "model.layers.2.block_sparse_moe.experts.47.w1", "model.layers.2.block_sparse_moe.experts.48.w1", "model.layers.2.block_sparse_moe.experts.49.w1", "model.layers.2.block_sparse_moe.experts.50.w1", "model.layers.2.block_sparse_moe.experts.51.w1", "model.layers.2.block_sparse_moe.experts.52.w1", "model.layers.2.block_sparse_moe.experts.53.w1", "model.layers.2.block_sparse_moe.experts.54.w1", "model.layers.2.block_sparse_moe.experts.55.w1", "model.layers.2.block_sparse_moe.experts.56.w1", "model.layers.2.block_sparse_moe.experts.57.w1", "model.layers.2.block_sparse_moe.experts.58.w1", "model.layers.2.block_sparse_moe.experts.59.w1", "model.layers.2.block_sparse_moe.experts.60.w1", "model.layers.2.block_sparse_moe.experts.61.w1", "model.layers.2.block_sparse_moe.experts.62.w1", "model.layers.2.block_sparse_moe.experts.63.w1", "model.layers.2.block_sparse_moe.experts.64.w1", "model.layers.2.block_sparse_moe.experts.65.w1", "model.layers.2.block_sparse_moe.experts.66.w1", "model.layers.2.block_sparse_moe.experts.67.w1", "model.layers.2.block_sparse_moe.experts.68.w1", "model.layers.2.block_sparse_moe.experts.69.w1", "model.layers.2.block_sparse_moe.experts.70.w1", "model.layers.2.block_sparse_moe.experts.71.w1", "model.layers.2.block_sparse_moe.experts.72.w1", "model.layers.2.block_sparse_moe.experts.73.w1", "model.layers.2.block_sparse_moe.experts.74.w1", "model.layers.2.block_sparse_moe.experts.75.w1", "model.layers.2.block_sparse_moe.experts.76.w1", "model.layers.2.block_sparse_moe.experts.77.w1", "model.layers.2.block_sparse_moe.experts.78.w1", "model.layers.2.block_sparse_moe.experts.79.w1", "model.layers.2.block_sparse_moe.experts.80.w1", "model.layers.2.block_sparse_moe.experts.81.w1", "model.layers.2.block_sparse_moe.experts.82.w1", "model.layers.2.block_sparse_moe.experts.83.w1", "model.layers.2.block_sparse_moe.experts.84.w1", "model.layers.2.block_sparse_moe.experts.85.w1", "model.layers.2.block_sparse_moe.experts.86.w1", "model.layers.2.block_sparse_moe.experts.87.w1", "model.layers.2.block_sparse_moe.experts.88.w1", "model.layers.2.block_sparse_moe.experts.89.w1", "model.layers.2.block_sparse_moe.experts.90.w1", "model.layers.2.block_sparse_moe.experts.91.w1", "model.layers.2.block_sparse_moe.experts.92.w1", "model.layers.2.block_sparse_moe.experts.93.w1", "model.layers.2.block_sparse_moe.experts.94.w1", "model.layers.2.block_sparse_moe.experts.95.w1", "model.layers.2.block_sparse_moe.experts.96.w1", "model.layers.2.block_sparse_moe.experts.97.w1", "model.layers.2.block_sparse_moe.experts.98.w1", "model.layers.2.block_sparse_moe.experts.99.w1", "model.layers.2.block_sparse_moe.experts.100.w1", "model.layers.2.block_sparse_moe.experts.101.w1", "model.layers.2.block_sparse_moe.experts.102.w1", "model.layers.2.block_sparse_moe.experts.103.w1", "model.layers.2.block_sparse_moe.experts.104.w1", "model.layers.2.block_sparse_moe.experts.105.w1", "model.layers.2.block_sparse_moe.experts.106.w1", "model.layers.2.block_sparse_moe.experts.107.w1", "model.layers.2.block_sparse_moe.experts.108.w1", "model.layers.2.block_sparse_moe.experts.109.w1", "model.layers.2.block_sparse_moe.experts.110.w1", "model.layers.2.block_sparse_moe.experts.111.w1", "model.layers.2.block_sparse_moe.experts.112.w1", "model.layers.2.block_sparse_moe.experts.113.w1", "model.layers.2.block_sparse_moe.experts.114.w1", "model.layers.2.block_sparse_moe.experts.115.w1", "model.layers.2.block_sparse_moe.experts.116.w1", "model.layers.2.block_sparse_moe.experts.117.w1", "model.layers.2.block_sparse_moe.experts.118.w1", "model.layers.2.block_sparse_moe.experts.119.w1", "model.layers.2.block_sparse_moe.experts.120.w1", "model.layers.2.block_sparse_moe.experts.121.w1", "model.layers.2.block_sparse_moe.experts.122.w1", "model.layers.2.block_sparse_moe.experts.123.w1", "model.layers.2.block_sparse_moe.experts.124.w1", "model.layers.2.block_sparse_moe.experts.125.w1", "model.layers.2.block_sparse_moe.experts.126.w1", "model.layers.2.block_sparse_moe.experts.127.w1", "model.layers.2.block_sparse_moe.experts.128.w1", "model.layers.2.block_sparse_moe.experts.129.w1", "model.layers.2.block_sparse_moe.experts.130.w1", "model.layers.2.block_sparse_moe.experts.131.w1", "model.layers.2.block_sparse_moe.experts.132.w1", "model.layers.2.block_sparse_moe.experts.133.w1", "model.layers.2.block_sparse_moe.experts.134.w1", "model.layers.2.block_sparse_moe.experts.135.w1", "model.layers.2.block_sparse_moe.experts.136.w1", "model.layers.2.block_sparse_moe.experts.137.w1", "model.layers.2.block_sparse_moe.experts.138.w1", "model.layers.2.block_sparse_moe.experts.139.w1", "model.layers.2.block_sparse_moe.experts.140.w1", "model.layers.2.block_sparse_moe.experts.141.w1", "model.layers.2.block_sparse_moe.experts.142.w1", "model.layers.2.block_sparse_moe.experts.143.w1", "model.layers.2.block_sparse_moe.experts.144.w1", "model.layers.2.block_sparse_moe.experts.145.w1", "model.layers.2.block_sparse_moe.experts.146.w1", "model.layers.2.block_sparse_moe.experts.147.w1", "model.layers.2.block_sparse_moe.experts.148.w1", "model.layers.2.block_sparse_moe.experts.149.w1", "model.layers.2.block_sparse_moe.experts.150.w1", "model.layers.2.block_sparse_moe.experts.151.w1", "model.layers.2.block_sparse_moe.experts.152.w1", "model.layers.2.block_sparse_moe.experts.153.w1", "model.layers.2.block_sparse_moe.experts.154.w1", "model.layers.2.block_sparse_moe.experts.155.w1", "model.layers.2.block_sparse_moe.experts.156.w1", "model.layers.2.block_sparse_moe.experts.157.w1", "model.layers.2.block_sparse_moe.experts.158.w1", "model.layers.2.block_sparse_moe.experts.159.w1", "model.layers.2.block_sparse_moe.experts.160.w1", "model.layers.2.block_sparse_moe.experts.161.w1", "model.layers.2.block_sparse_moe.experts.162.w1", "model.layers.2.block_sparse_moe.experts.163.w1", "model.layers.2.block_sparse_moe.experts.164.w1", "model.layers.2.block_sparse_moe.experts.165.w1", "model.layers.2.block_sparse_moe.experts.166.w1", "model.layers.2.block_sparse_moe.experts.167.w1", "model.layers.2.block_sparse_moe.experts.168.w1", "model.layers.2.block_sparse_moe.experts.169.w1", "model.layers.2.block_sparse_moe.experts.170.w1", "model.layers.2.block_sparse_moe.experts.171.w1", "model.layers.2.block_sparse_moe.experts.172.w1", "model.layers.2.block_sparse_moe.experts.173.w1", "model.layers.2.block_sparse_moe.experts.174.w1", "model.layers.2.block_sparse_moe.experts.175.w1", "model.layers.2.block_sparse_moe.experts.176.w1", "model.layers.2.block_sparse_moe.experts.177.w1", "model.layers.2.block_sparse_moe.experts.178.w1", "model.layers.2.block_sparse_moe.experts.179.w1", "model.layers.2.block_sparse_moe.experts.180.w1", "model.layers.2.block_sparse_moe.experts.181.w1", "model.layers.2.block_sparse_moe.experts.182.w1", "model.layers.2.block_sparse_moe.experts.183.w1", "model.layers.2.block_sparse_moe.experts.184.w1", "model.layers.2.block_sparse_moe.experts.185.w1", "model.layers.2.block_sparse_moe.experts.186.w1", "model.layers.2.block_sparse_moe.experts.187.w1", "model.layers.2.block_sparse_moe.experts.188.w1", "model.layers.2.block_sparse_moe.experts.189.w1", "model.layers.2.block_sparse_moe.experts.190.w1", "model.layers.2.block_sparse_moe.experts.191.w1", "model.layers.2.block_sparse_moe.experts.192.w1", "model.layers.2.block_sparse_moe.experts.193.w1", "model.layers.2.block_sparse_moe.experts.194.w1", "model.layers.2.block_sparse_moe.experts.195.w1", "model.layers.2.block_sparse_moe.experts.196.w1", "model.layers.2.block_sparse_moe.experts.197.w1", "model.layers.2.block_sparse_moe.experts.198.w1", "model.layers.2.block_sparse_moe.experts.199.w1", "model.layers.2.block_sparse_moe.experts.200.w1", "model.layers.2.block_sparse_moe.experts.201.w1", "model.layers.2.block_sparse_moe.experts.202.w1", "model.layers.2.block_sparse_moe.experts.203.w1", "model.layers.2.block_sparse_moe.experts.204.w1", "model.layers.2.block_sparse_moe.experts.205.w1", "model.layers.2.block_sparse_moe.experts.206.w1", "model.layers.2.block_sparse_moe.experts.207.w1", "model.layers.2.block_sparse_moe.experts.208.w1", "model.layers.2.block_sparse_moe.experts.209.w1", "model.layers.2.block_sparse_moe.experts.210.w1", "model.layers.2.block_sparse_moe.experts.211.w1", "model.layers.2.block_sparse_moe.experts.212.w1", "model.layers.2.block_sparse_moe.experts.213.w1", "model.layers.2.block_sparse_moe.experts.214.w1", "model.layers.2.block_sparse_moe.experts.215.w1", "model.layers.2.block_sparse_moe.experts.216.w1", "model.layers.2.block_sparse_moe.experts.217.w1", "model.layers.2.block_sparse_moe.experts.218.w1", "model.layers.2.block_sparse_moe.experts.219.w1", "model.layers.2.block_sparse_moe.experts.220.w1", "model.layers.2.block_sparse_moe.experts.221.w1", "model.layers.2.block_sparse_moe.experts.222.w1", "model.layers.2.block_sparse_moe.experts.223.w1", "model.layers.2.block_sparse_moe.experts.224.w1", "model.layers.2.block_sparse_moe.experts.225.w1", "model.layers.2.block_sparse_moe.experts.226.w1", "model.layers.2.block_sparse_moe.experts.227.w1", "model.layers.2.block_sparse_moe.experts.228.w1", "model.layers.2.block_sparse_moe.experts.229.w1", "model.layers.2.block_sparse_moe.experts.230.w1", "model.layers.2.block_sparse_moe.experts.231.w1", "model.layers.2.block_sparse_moe.experts.232.w1", "model.layers.2.block_sparse_moe.experts.233.w1", "model.layers.2.block_sparse_moe.experts.234.w1", "model.layers.2.block_sparse_moe.experts.235.w1", "model.layers.2.block_sparse_moe.experts.236.w1", "model.layers.2.block_sparse_moe.experts.237.w1", "model.layers.2.block_sparse_moe.experts.238.w1", "model.layers.2.block_sparse_moe.experts.239.w1", "model.layers.2.block_sparse_moe.experts.240.w1", "model.layers.2.block_sparse_moe.experts.241.w1", "model.layers.2.block_sparse_moe.experts.242.w1", "model.layers.2.block_sparse_moe.experts.243.w1", "model.layers.2.block_sparse_moe.experts.244.w1", "model.layers.2.block_sparse_moe.experts.245.w1", "model.layers.2.block_sparse_moe.experts.246.w1", "model.layers.2.block_sparse_moe.experts.247.w1", "model.layers.2.block_sparse_moe.experts.248.w1", "model.layers.2.block_sparse_moe.experts.249.w1", "model.layers.2.block_sparse_moe.experts.250.w1", "model.layers.2.block_sparse_moe.experts.251.w1", "model.layers.2.block_sparse_moe.experts.252.w1", "model.layers.2.block_sparse_moe.experts.253.w1", "model.layers.2.block_sparse_moe.experts.254.w1", "model.layers.2.block_sparse_moe.experts.255.w1", "model.layers.2.block_sparse_moe.experts.0.w3", "model.layers.2.block_sparse_moe.experts.1.w3", "model.layers.2.block_sparse_moe.experts.2.w3", "model.layers.2.block_sparse_moe.experts.3.w3", "model.layers.2.block_sparse_moe.experts.4.w3", "model.layers.2.block_sparse_moe.experts.5.w3", "model.layers.2.block_sparse_moe.experts.6.w3", "model.layers.2.block_sparse_moe.experts.7.w3", "model.layers.2.block_sparse_moe.experts.8.w3", "model.layers.2.block_sparse_moe.experts.9.w3", "model.layers.2.block_sparse_moe.experts.10.w3", "model.layers.2.block_sparse_moe.experts.11.w3", "model.layers.2.block_sparse_moe.experts.12.w3", "model.layers.2.block_sparse_moe.experts.13.w3", "model.layers.2.block_sparse_moe.experts.14.w3", "model.layers.2.block_sparse_moe.experts.15.w3", "model.layers.2.block_sparse_moe.experts.16.w3", "model.layers.2.block_sparse_moe.experts.17.w3", "model.layers.2.block_sparse_moe.experts.18.w3", "model.layers.2.block_sparse_moe.experts.19.w3", "model.layers.2.block_sparse_moe.experts.20.w3", "model.layers.2.block_sparse_moe.experts.21.w3", "model.layers.2.block_sparse_moe.experts.22.w3", "model.layers.2.block_sparse_moe.experts.23.w3", "model.layers.2.block_sparse_moe.experts.24.w3", "model.layers.2.block_sparse_moe.experts.25.w3", "model.layers.2.block_sparse_moe.experts.26.w3", "model.layers.2.block_sparse_moe.experts.27.w3", "model.layers.2.block_sparse_moe.experts.28.w3", "model.layers.2.block_sparse_moe.experts.29.w3", "model.layers.2.block_sparse_moe.experts.30.w3", "model.layers.2.block_sparse_moe.experts.31.w3", "model.layers.2.block_sparse_moe.experts.32.w3", "model.layers.2.block_sparse_moe.experts.33.w3", "model.layers.2.block_sparse_moe.experts.34.w3", "model.layers.2.block_sparse_moe.experts.35.w3", "model.layers.2.block_sparse_moe.experts.36.w3", "model.layers.2.block_sparse_moe.experts.37.w3", "model.layers.2.block_sparse_moe.experts.38.w3", "model.layers.2.block_sparse_moe.experts.39.w3", "model.layers.2.block_sparse_moe.experts.40.w3", "model.layers.2.block_sparse_moe.experts.41.w3", "model.layers.2.block_sparse_moe.experts.42.w3", "model.layers.2.block_sparse_moe.experts.43.w3", "model.layers.2.block_sparse_moe.experts.44.w3", "model.layers.2.block_sparse_moe.experts.45.w3", "model.layers.2.block_sparse_moe.experts.46.w3", "model.layers.2.block_sparse_moe.experts.47.w3", "model.layers.2.block_sparse_moe.experts.48.w3", "model.layers.2.block_sparse_moe.experts.49.w3", "model.layers.2.block_sparse_moe.experts.50.w3", "model.layers.2.block_sparse_moe.experts.51.w3", "model.layers.2.block_sparse_moe.experts.52.w3", "model.layers.2.block_sparse_moe.experts.53.w3", "model.layers.2.block_sparse_moe.experts.54.w3", "model.layers.2.block_sparse_moe.experts.55.w3", "model.layers.2.block_sparse_moe.experts.56.w3", "model.layers.2.block_sparse_moe.experts.57.w3", "model.layers.2.block_sparse_moe.experts.58.w3", "model.layers.2.block_sparse_moe.experts.59.w3", "model.layers.2.block_sparse_moe.experts.60.w3", "model.layers.2.block_sparse_moe.experts.61.w3", "model.layers.2.block_sparse_moe.experts.62.w3", "model.layers.2.block_sparse_moe.experts.63.w3", "model.layers.2.block_sparse_moe.experts.64.w3", "model.layers.2.block_sparse_moe.experts.65.w3", "model.layers.2.block_sparse_moe.experts.66.w3", "model.layers.2.block_sparse_moe.experts.67.w3", "model.layers.2.block_sparse_moe.experts.68.w3", "model.layers.2.block_sparse_moe.experts.69.w3", "model.layers.2.block_sparse_moe.experts.70.w3", "model.layers.2.block_sparse_moe.experts.71.w3", "model.layers.2.block_sparse_moe.experts.72.w3", "model.layers.2.block_sparse_moe.experts.73.w3", "model.layers.2.block_sparse_moe.experts.74.w3", "model.layers.2.block_sparse_moe.experts.75.w3", "model.layers.2.block_sparse_moe.experts.76.w3", "model.layers.2.block_sparse_moe.experts.77.w3", "model.layers.2.block_sparse_moe.experts.78.w3", "model.layers.2.block_sparse_moe.experts.79.w3", "model.layers.2.block_sparse_moe.experts.80.w3", "model.layers.2.block_sparse_moe.experts.81.w3", "model.layers.2.block_sparse_moe.experts.82.w3", "model.layers.2.block_sparse_moe.experts.83.w3", "model.layers.2.block_sparse_moe.experts.84.w3", "model.layers.2.block_sparse_moe.experts.85.w3", "model.layers.2.block_sparse_moe.experts.86.w3", "model.layers.2.block_sparse_moe.experts.87.w3", "model.layers.2.block_sparse_moe.experts.88.w3", "model.layers.2.block_sparse_moe.experts.89.w3", "model.layers.2.block_sparse_moe.experts.90.w3", "model.layers.2.block_sparse_moe.experts.91.w3", "model.layers.2.block_sparse_moe.experts.92.w3", "model.layers.2.block_sparse_moe.experts.93.w3", "model.layers.2.block_sparse_moe.experts.94.w3", "model.layers.2.block_sparse_moe.experts.95.w3", "model.layers.2.block_sparse_moe.experts.96.w3", "model.layers.2.block_sparse_moe.experts.97.w3", "model.layers.2.block_sparse_moe.experts.98.w3", "model.layers.2.block_sparse_moe.experts.99.w3", "model.layers.2.block_sparse_moe.experts.100.w3", "model.layers.2.block_sparse_moe.experts.101.w3", "model.layers.2.block_sparse_moe.experts.102.w3", "model.layers.2.block_sparse_moe.experts.103.w3", "model.layers.2.block_sparse_moe.experts.104.w3", "model.layers.2.block_sparse_moe.experts.105.w3", "model.layers.2.block_sparse_moe.experts.106.w3", "model.layers.2.block_sparse_moe.experts.107.w3", "model.layers.2.block_sparse_moe.experts.108.w3", "model.layers.2.block_sparse_moe.experts.109.w3", "model.layers.2.block_sparse_moe.experts.110.w3", "model.layers.2.block_sparse_moe.experts.111.w3", "model.layers.2.block_sparse_moe.experts.112.w3", "model.layers.2.block_sparse_moe.experts.113.w3", "model.layers.2.block_sparse_moe.experts.114.w3", "model.layers.2.block_sparse_moe.experts.115.w3", "model.layers.2.block_sparse_moe.experts.116.w3", "model.layers.2.block_sparse_moe.experts.117.w3", "model.layers.2.block_sparse_moe.experts.118.w3", "model.layers.2.block_sparse_moe.experts.119.w3", "model.layers.2.block_sparse_moe.experts.120.w3", "model.layers.2.block_sparse_moe.experts.121.w3", "model.layers.2.block_sparse_moe.experts.122.w3", "model.layers.2.block_sparse_moe.experts.123.w3", "model.layers.2.block_sparse_moe.experts.124.w3", "model.layers.2.block_sparse_moe.experts.125.w3", "model.layers.2.block_sparse_moe.experts.126.w3", "model.layers.2.block_sparse_moe.experts.127.w3", "model.layers.2.block_sparse_moe.experts.128.w3", "model.layers.2.block_sparse_moe.experts.129.w3", "model.layers.2.block_sparse_moe.experts.130.w3", "model.layers.2.block_sparse_moe.experts.131.w3", "model.layers.2.block_sparse_moe.experts.132.w3", "model.layers.2.block_sparse_moe.experts.133.w3", "model.layers.2.block_sparse_moe.experts.134.w3", "model.layers.2.block_sparse_moe.experts.135.w3", "model.layers.2.block_sparse_moe.experts.136.w3", "model.layers.2.block_sparse_moe.experts.137.w3", "model.layers.2.block_sparse_moe.experts.138.w3", "model.layers.2.block_sparse_moe.experts.139.w3", "model.layers.2.block_sparse_moe.experts.140.w3", "model.layers.2.block_sparse_moe.experts.141.w3", "model.layers.2.block_sparse_moe.experts.142.w3", "model.layers.2.block_sparse_moe.experts.143.w3", "model.layers.2.block_sparse_moe.experts.144.w3", "model.layers.2.block_sparse_moe.experts.145.w3", "model.layers.2.block_sparse_moe.experts.146.w3", "model.layers.2.block_sparse_moe.experts.147.w3", "model.layers.2.block_sparse_moe.experts.148.w3", "model.layers.2.block_sparse_moe.experts.149.w3", "model.layers.2.block_sparse_moe.experts.150.w3", "model.layers.2.block_sparse_moe.experts.151.w3", "model.layers.2.block_sparse_moe.experts.152.w3", "model.layers.2.block_sparse_moe.experts.153.w3", "model.layers.2.block_sparse_moe.experts.154.w3", "model.layers.2.block_sparse_moe.experts.155.w3", "model.layers.2.block_sparse_moe.experts.156.w3", "model.layers.2.block_sparse_moe.experts.157.w3", "model.layers.2.block_sparse_moe.experts.158.w3", "model.layers.2.block_sparse_moe.experts.159.w3", "model.layers.2.block_sparse_moe.experts.160.w3", "model.layers.2.block_sparse_moe.experts.161.w3", "model.layers.2.block_sparse_moe.experts.162.w3", "model.layers.2.block_sparse_moe.experts.163.w3", "model.layers.2.block_sparse_moe.experts.164.w3", "model.layers.2.block_sparse_moe.experts.165.w3", "model.layers.2.block_sparse_moe.experts.166.w3", "model.layers.2.block_sparse_moe.experts.167.w3", "model.layers.2.block_sparse_moe.experts.168.w3", "model.layers.2.block_sparse_moe.experts.169.w3", "model.layers.2.block_sparse_moe.experts.170.w3", "model.layers.2.block_sparse_moe.experts.171.w3", "model.layers.2.block_sparse_moe.experts.172.w3", "model.layers.2.block_sparse_moe.experts.173.w3", "model.layers.2.block_sparse_moe.experts.174.w3", "model.layers.2.block_sparse_moe.experts.175.w3", "model.layers.2.block_sparse_moe.experts.176.w3", "model.layers.2.block_sparse_moe.experts.177.w3", "model.layers.2.block_sparse_moe.experts.178.w3", "model.layers.2.block_sparse_moe.experts.179.w3", "model.layers.2.block_sparse_moe.experts.180.w3", "model.layers.2.block_sparse_moe.experts.181.w3", "model.layers.2.block_sparse_moe.experts.182.w3", "model.layers.2.block_sparse_moe.experts.183.w3", "model.layers.2.block_sparse_moe.experts.184.w3", "model.layers.2.block_sparse_moe.experts.185.w3", "model.layers.2.block_sparse_moe.experts.186.w3", "model.layers.2.block_sparse_moe.experts.187.w3", "model.layers.2.block_sparse_moe.experts.188.w3", "model.layers.2.block_sparse_moe.experts.189.w3", "model.layers.2.block_sparse_moe.experts.190.w3", "model.layers.2.block_sparse_moe.experts.191.w3", "model.layers.2.block_sparse_moe.experts.192.w3", "model.layers.2.block_sparse_moe.experts.193.w3", "model.layers.2.block_sparse_moe.experts.194.w3", "model.layers.2.block_sparse_moe.experts.195.w3", "model.layers.2.block_sparse_moe.experts.196.w3", "model.layers.2.block_sparse_moe.experts.197.w3", "model.layers.2.block_sparse_moe.experts.198.w3", "model.layers.2.block_sparse_moe.experts.199.w3", "model.layers.2.block_sparse_moe.experts.200.w3", "model.layers.2.block_sparse_moe.experts.201.w3", "model.layers.2.block_sparse_moe.experts.202.w3", "model.layers.2.block_sparse_moe.experts.203.w3", "model.layers.2.block_sparse_moe.experts.204.w3", "model.layers.2.block_sparse_moe.experts.205.w3", "model.layers.2.block_sparse_moe.experts.206.w3", "model.layers.2.block_sparse_moe.experts.207.w3", "model.layers.2.block_sparse_moe.experts.208.w3", "model.layers.2.block_sparse_moe.experts.209.w3", "model.layers.2.block_sparse_moe.experts.210.w3", "model.layers.2.block_sparse_moe.experts.211.w3", "model.layers.2.block_sparse_moe.experts.212.w3", "model.layers.2.block_sparse_moe.experts.213.w3", "model.layers.2.block_sparse_moe.experts.214.w3", "model.layers.2.block_sparse_moe.experts.215.w3", "model.layers.2.block_sparse_moe.experts.216.w3", "model.layers.2.block_sparse_moe.experts.217.w3", "model.layers.2.block_sparse_moe.experts.218.w3", "model.layers.2.block_sparse_moe.experts.219.w3", "model.layers.2.block_sparse_moe.experts.220.w3", "model.layers.2.block_sparse_moe.experts.221.w3", "model.layers.2.block_sparse_moe.experts.222.w3", "model.layers.2.block_sparse_moe.experts.223.w3", "model.layers.2.block_sparse_moe.experts.224.w3", "model.layers.2.block_sparse_moe.experts.225.w3", "model.layers.2.block_sparse_moe.experts.226.w3", "model.layers.2.block_sparse_moe.experts.227.w3", "model.layers.2.block_sparse_moe.experts.228.w3", "model.layers.2.block_sparse_moe.experts.229.w3", "model.layers.2.block_sparse_moe.experts.230.w3", "model.layers.2.block_sparse_moe.experts.231.w3", "model.layers.2.block_sparse_moe.experts.232.w3", "model.layers.2.block_sparse_moe.experts.233.w3", "model.layers.2.block_sparse_moe.experts.234.w3", "model.layers.2.block_sparse_moe.experts.235.w3", "model.layers.2.block_sparse_moe.experts.236.w3", "model.layers.2.block_sparse_moe.experts.237.w3", "model.layers.2.block_sparse_moe.experts.238.w3", "model.layers.2.block_sparse_moe.experts.239.w3", "model.layers.2.block_sparse_moe.experts.240.w3", "model.layers.2.block_sparse_moe.experts.241.w3", "model.layers.2.block_sparse_moe.experts.242.w3", "model.layers.2.block_sparse_moe.experts.243.w3", "model.layers.2.block_sparse_moe.experts.244.w3", "model.layers.2.block_sparse_moe.experts.245.w3", "model.layers.2.block_sparse_moe.experts.246.w3", "model.layers.2.block_sparse_moe.experts.247.w3", "model.layers.2.block_sparse_moe.experts.248.w3", "model.layers.2.block_sparse_moe.experts.249.w3", "model.layers.2.block_sparse_moe.experts.250.w3", "model.layers.2.block_sparse_moe.experts.251.w3", "model.layers.2.block_sparse_moe.experts.252.w3", "model.layers.2.block_sparse_moe.experts.253.w3", "model.layers.2.block_sparse_moe.experts.254.w3", "model.layers.2.block_sparse_moe.experts.255.w3", "model.layers.2.block_sparse_moe.experts.0.w2", "model.layers.2.block_sparse_moe.experts.1.w2", "model.layers.2.block_sparse_moe.experts.2.w2", "model.layers.2.block_sparse_moe.experts.3.w2", "model.layers.2.block_sparse_moe.experts.4.w2", "model.layers.2.block_sparse_moe.experts.5.w2", "model.layers.2.block_sparse_moe.experts.6.w2", "model.layers.2.block_sparse_moe.experts.7.w2", "model.layers.2.block_sparse_moe.experts.8.w2", "model.layers.2.block_sparse_moe.experts.9.w2", "model.layers.2.block_sparse_moe.experts.10.w2", "model.layers.2.block_sparse_moe.experts.11.w2", "model.layers.2.block_sparse_moe.experts.12.w2", "model.layers.2.block_sparse_moe.experts.13.w2", "model.layers.2.block_sparse_moe.experts.14.w2", "model.layers.2.block_sparse_moe.experts.15.w2", "model.layers.2.block_sparse_moe.experts.16.w2", "model.layers.2.block_sparse_moe.experts.17.w2", "model.layers.2.block_sparse_moe.experts.18.w2", "model.layers.2.block_sparse_moe.experts.19.w2", "model.layers.2.block_sparse_moe.experts.20.w2", "model.layers.2.block_sparse_moe.experts.21.w2", "model.layers.2.block_sparse_moe.experts.22.w2", "model.layers.2.block_sparse_moe.experts.23.w2", "model.layers.2.block_sparse_moe.experts.24.w2", "model.layers.2.block_sparse_moe.experts.25.w2", "model.layers.2.block_sparse_moe.experts.26.w2", "model.layers.2.block_sparse_moe.experts.27.w2", "model.layers.2.block_sparse_moe.experts.28.w2", "model.layers.2.block_sparse_moe.experts.29.w2", "model.layers.2.block_sparse_moe.experts.30.w2", "model.layers.2.block_sparse_moe.experts.31.w2", "model.layers.2.block_sparse_moe.experts.32.w2", "model.layers.2.block_sparse_moe.experts.33.w2", "model.layers.2.block_sparse_moe.experts.34.w2", "model.layers.2.block_sparse_moe.experts.35.w2", "model.layers.2.block_sparse_moe.experts.36.w2", "model.layers.2.block_sparse_moe.experts.37.w2", "model.layers.2.block_sparse_moe.experts.38.w2", "model.layers.2.block_sparse_moe.experts.39.w2", "model.layers.2.block_sparse_moe.experts.40.w2", "model.layers.2.block_sparse_moe.experts.41.w2", "model.layers.2.block_sparse_moe.experts.42.w2", "model.layers.2.block_sparse_moe.experts.43.w2", "model.layers.2.block_sparse_moe.experts.44.w2", "model.layers.2.block_sparse_moe.experts.45.w2", "model.layers.2.block_sparse_moe.experts.46.w2", "model.layers.2.block_sparse_moe.experts.47.w2", "model.layers.2.block_sparse_moe.experts.48.w2", "model.layers.2.block_sparse_moe.experts.49.w2", "model.layers.2.block_sparse_moe.experts.50.w2", "model.layers.2.block_sparse_moe.experts.51.w2", "model.layers.2.block_sparse_moe.experts.52.w2", "model.layers.2.block_sparse_moe.experts.53.w2", "model.layers.2.block_sparse_moe.experts.54.w2", "model.layers.2.block_sparse_moe.experts.55.w2", "model.layers.2.block_sparse_moe.experts.56.w2", "model.layers.2.block_sparse_moe.experts.57.w2", "model.layers.2.block_sparse_moe.experts.58.w2", "model.layers.2.block_sparse_moe.experts.59.w2", "model.layers.2.block_sparse_moe.experts.60.w2", "model.layers.2.block_sparse_moe.experts.61.w2", "model.layers.2.block_sparse_moe.experts.62.w2", "model.layers.2.block_sparse_moe.experts.63.w2", "model.layers.2.block_sparse_moe.experts.64.w2", "model.layers.2.block_sparse_moe.experts.65.w2", "model.layers.2.block_sparse_moe.experts.66.w2", "model.layers.2.block_sparse_moe.experts.67.w2", "model.layers.2.block_sparse_moe.experts.68.w2", "model.layers.2.block_sparse_moe.experts.69.w2", "model.layers.2.block_sparse_moe.experts.70.w2", "model.layers.2.block_sparse_moe.experts.71.w2", "model.layers.2.block_sparse_moe.experts.72.w2", "model.layers.2.block_sparse_moe.experts.73.w2", "model.layers.2.block_sparse_moe.experts.74.w2", "model.layers.2.block_sparse_moe.experts.75.w2", "model.layers.2.block_sparse_moe.experts.76.w2", "model.layers.2.block_sparse_moe.experts.77.w2", "model.layers.2.block_sparse_moe.experts.78.w2", "model.layers.2.block_sparse_moe.experts.79.w2", "model.layers.2.block_sparse_moe.experts.80.w2", "model.layers.2.block_sparse_moe.experts.81.w2", "model.layers.2.block_sparse_moe.experts.82.w2", "model.layers.2.block_sparse_moe.experts.83.w2", "model.layers.2.block_sparse_moe.experts.84.w2", "model.layers.2.block_sparse_moe.experts.85.w2", "model.layers.2.block_sparse_moe.experts.86.w2", "model.layers.2.block_sparse_moe.experts.87.w2", "model.layers.2.block_sparse_moe.experts.88.w2", "model.layers.2.block_sparse_moe.experts.89.w2", "model.layers.2.block_sparse_moe.experts.90.w2", "model.layers.2.block_sparse_moe.experts.91.w2", "model.layers.2.block_sparse_moe.experts.92.w2", "model.layers.2.block_sparse_moe.experts.93.w2", "model.layers.2.block_sparse_moe.experts.94.w2", "model.layers.2.block_sparse_moe.experts.95.w2", "model.layers.2.block_sparse_moe.experts.96.w2", "model.layers.2.block_sparse_moe.experts.97.w2", "model.layers.2.block_sparse_moe.experts.98.w2", "model.layers.2.block_sparse_moe.experts.99.w2", "model.layers.2.block_sparse_moe.experts.100.w2", "model.layers.2.block_sparse_moe.experts.101.w2", "model.layers.2.block_sparse_moe.experts.102.w2", "model.layers.2.block_sparse_moe.experts.103.w2", "model.layers.2.block_sparse_moe.experts.104.w2", "model.layers.2.block_sparse_moe.experts.105.w2", "model.layers.2.block_sparse_moe.experts.106.w2", "model.layers.2.block_sparse_moe.experts.107.w2", "model.layers.2.block_sparse_moe.experts.108.w2", "model.layers.2.block_sparse_moe.experts.109.w2", "model.layers.2.block_sparse_moe.experts.110.w2", "model.layers.2.block_sparse_moe.experts.111.w2", "model.layers.2.block_sparse_moe.experts.112.w2", "model.layers.2.block_sparse_moe.experts.113.w2", "model.layers.2.block_sparse_moe.experts.114.w2", "model.layers.2.block_sparse_moe.experts.115.w2", "model.layers.2.block_sparse_moe.experts.116.w2", "model.layers.2.block_sparse_moe.experts.117.w2", "model.layers.2.block_sparse_moe.experts.118.w2", "model.layers.2.block_sparse_moe.experts.119.w2", "model.layers.2.block_sparse_moe.experts.120.w2", "model.layers.2.block_sparse_moe.experts.121.w2", "model.layers.2.block_sparse_moe.experts.122.w2", "model.layers.2.block_sparse_moe.experts.123.w2", "model.layers.2.block_sparse_moe.experts.124.w2", "model.layers.2.block_sparse_moe.experts.125.w2", "model.layers.2.block_sparse_moe.experts.126.w2", "model.layers.2.block_sparse_moe.experts.127.w2", "model.layers.2.block_sparse_moe.experts.128.w2", "model.layers.2.block_sparse_moe.experts.129.w2", "model.layers.2.block_sparse_moe.experts.130.w2", "model.layers.2.block_sparse_moe.experts.131.w2", "model.layers.2.block_sparse_moe.experts.132.w2", "model.layers.2.block_sparse_moe.experts.133.w2", "model.layers.2.block_sparse_moe.experts.134.w2", "model.layers.2.block_sparse_moe.experts.135.w2", "model.layers.2.block_sparse_moe.experts.136.w2", "model.layers.2.block_sparse_moe.experts.137.w2", "model.layers.2.block_sparse_moe.experts.138.w2", "model.layers.2.block_sparse_moe.experts.139.w2", "model.layers.2.block_sparse_moe.experts.140.w2", "model.layers.2.block_sparse_moe.experts.141.w2", "model.layers.2.block_sparse_moe.experts.142.w2", "model.layers.2.block_sparse_moe.experts.143.w2", "model.layers.2.block_sparse_moe.experts.144.w2", "model.layers.2.block_sparse_moe.experts.145.w2", "model.layers.2.block_sparse_moe.experts.146.w2", "model.layers.2.block_sparse_moe.experts.147.w2", "model.layers.2.block_sparse_moe.experts.148.w2", "model.layers.2.block_sparse_moe.experts.149.w2", "model.layers.2.block_sparse_moe.experts.150.w2", "model.layers.2.block_sparse_moe.experts.151.w2", "model.layers.2.block_sparse_moe.experts.152.w2", "model.layers.2.block_sparse_moe.experts.153.w2", "model.layers.2.block_sparse_moe.experts.154.w2", "model.layers.2.block_sparse_moe.experts.155.w2", "model.layers.2.block_sparse_moe.experts.156.w2", "model.layers.2.block_sparse_moe.experts.157.w2", "model.layers.2.block_sparse_moe.experts.158.w2", "model.layers.2.block_sparse_moe.experts.159.w2", "model.layers.2.block_sparse_moe.experts.160.w2", "model.layers.2.block_sparse_moe.experts.161.w2", "model.layers.2.block_sparse_moe.experts.162.w2", "model.layers.2.block_sparse_moe.experts.163.w2", "model.layers.2.block_sparse_moe.experts.164.w2", "model.layers.2.block_sparse_moe.experts.165.w2", "model.layers.2.block_sparse_moe.experts.166.w2", "model.layers.2.block_sparse_moe.experts.167.w2", "model.layers.2.block_sparse_moe.experts.168.w2", "model.layers.2.block_sparse_moe.experts.169.w2", "model.layers.2.block_sparse_moe.experts.170.w2", "model.layers.2.block_sparse_moe.experts.171.w2", "model.layers.2.block_sparse_moe.experts.172.w2", "model.layers.2.block_sparse_moe.experts.173.w2", "model.layers.2.block_sparse_moe.experts.174.w2", "model.layers.2.block_sparse_moe.experts.175.w2", "model.layers.2.block_sparse_moe.experts.176.w2", "model.layers.2.block_sparse_moe.experts.177.w2", "model.layers.2.block_sparse_moe.experts.178.w2", "model.layers.2.block_sparse_moe.experts.179.w2", "model.layers.2.block_sparse_moe.experts.180.w2", "model.layers.2.block_sparse_moe.experts.181.w2", "model.layers.2.block_sparse_moe.experts.182.w2", "model.layers.2.block_sparse_moe.experts.183.w2", "model.layers.2.block_sparse_moe.experts.184.w2", "model.layers.2.block_sparse_moe.experts.185.w2", "model.layers.2.block_sparse_moe.experts.186.w2", "model.layers.2.block_sparse_moe.experts.187.w2", "model.layers.2.block_sparse_moe.experts.188.w2", "model.layers.2.block_sparse_moe.experts.189.w2", "model.layers.2.block_sparse_moe.experts.190.w2", "model.layers.2.block_sparse_moe.experts.191.w2", "model.layers.2.block_sparse_moe.experts.192.w2", "model.layers.2.block_sparse_moe.experts.193.w2", "model.layers.2.block_sparse_moe.experts.194.w2", "model.layers.2.block_sparse_moe.experts.195.w2", "model.layers.2.block_sparse_moe.experts.196.w2", "model.layers.2.block_sparse_moe.experts.197.w2", "model.layers.2.block_sparse_moe.experts.198.w2", "model.layers.2.block_sparse_moe.experts.199.w2", "model.layers.2.block_sparse_moe.experts.200.w2", "model.layers.2.block_sparse_moe.experts.201.w2", "model.layers.2.block_sparse_moe.experts.202.w2", "model.layers.2.block_sparse_moe.experts.203.w2", "model.layers.2.block_sparse_moe.experts.204.w2", "model.layers.2.block_sparse_moe.experts.205.w2", "model.layers.2.block_sparse_moe.experts.206.w2", "model.layers.2.block_sparse_moe.experts.207.w2", "model.layers.2.block_sparse_moe.experts.208.w2", "model.layers.2.block_sparse_moe.experts.209.w2", "model.layers.2.block_sparse_moe.experts.210.w2", "model.layers.2.block_sparse_moe.experts.211.w2", "model.layers.2.block_sparse_moe.experts.212.w2", "model.layers.2.block_sparse_moe.experts.213.w2", "model.layers.2.block_sparse_moe.experts.214.w2", "model.layers.2.block_sparse_moe.experts.215.w2", "model.layers.2.block_sparse_moe.experts.216.w2", "model.layers.2.block_sparse_moe.experts.217.w2", "model.layers.2.block_sparse_moe.experts.218.w2", "model.layers.2.block_sparse_moe.experts.219.w2", "model.layers.2.block_sparse_moe.experts.220.w2", "model.layers.2.block_sparse_moe.experts.221.w2", "model.layers.2.block_sparse_moe.experts.222.w2", "model.layers.2.block_sparse_moe.experts.223.w2", "model.layers.2.block_sparse_moe.experts.224.w2", "model.layers.2.block_sparse_moe.experts.225.w2", "model.layers.2.block_sparse_moe.experts.226.w2", "model.layers.2.block_sparse_moe.experts.227.w2", "model.layers.2.block_sparse_moe.experts.228.w2", "model.layers.2.block_sparse_moe.experts.229.w2", "model.layers.2.block_sparse_moe.experts.230.w2", "model.layers.2.block_sparse_moe.experts.231.w2", "model.layers.2.block_sparse_moe.experts.232.w2", "model.layers.2.block_sparse_moe.experts.233.w2", "model.layers.2.block_sparse_moe.experts.234.w2", "model.layers.2.block_sparse_moe.experts.235.w2", "model.layers.2.block_sparse_moe.experts.236.w2", "model.layers.2.block_sparse_moe.experts.237.w2", "model.layers.2.block_sparse_moe.experts.238.w2", "model.layers.2.block_sparse_moe.experts.239.w2", "model.layers.2.block_sparse_moe.experts.240.w2", "model.layers.2.block_sparse_moe.experts.241.w2", "model.layers.2.block_sparse_moe.experts.242.w2", "model.layers.2.block_sparse_moe.experts.243.w2", "model.layers.2.block_sparse_moe.experts.244.w2", "model.layers.2.block_sparse_moe.experts.245.w2", "model.layers.2.block_sparse_moe.experts.246.w2", "model.layers.2.block_sparse_moe.experts.247.w2", "model.layers.2.block_sparse_moe.experts.248.w2", "model.layers.2.block_sparse_moe.experts.249.w2", "model.layers.2.block_sparse_moe.experts.250.w2", "model.layers.2.block_sparse_moe.experts.251.w2", "model.layers.2.block_sparse_moe.experts.252.w2", "model.layers.2.block_sparse_moe.experts.253.w2", "model.layers.2.block_sparse_moe.experts.254.w2", "model.layers.2.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0029872145503759273, "dbits": 3623878656 } ] }, { "idx": 6, "layers": [ "model.layers.3.self_attn.q_proj", "model.layers.3.self_attn.k_proj", "model.layers.3.self_attn.v_proj", "model.layers.3.self_attn.o_proj" ], "candidates": [ { "dkld": 0.012346872314810742, "dbits": 44040192 } ] }, { "idx": 7, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w1", "model.layers.3.block_sparse_moe.experts.1.w1", "model.layers.3.block_sparse_moe.experts.2.w1", "model.layers.3.block_sparse_moe.experts.3.w1", "model.layers.3.block_sparse_moe.experts.4.w1", "model.layers.3.block_sparse_moe.experts.5.w1", "model.layers.3.block_sparse_moe.experts.6.w1", "model.layers.3.block_sparse_moe.experts.7.w1", "model.layers.3.block_sparse_moe.experts.8.w1", "model.layers.3.block_sparse_moe.experts.9.w1", "model.layers.3.block_sparse_moe.experts.10.w1", "model.layers.3.block_sparse_moe.experts.11.w1", "model.layers.3.block_sparse_moe.experts.12.w1", "model.layers.3.block_sparse_moe.experts.13.w1", "model.layers.3.block_sparse_moe.experts.14.w1", "model.layers.3.block_sparse_moe.experts.15.w1", "model.layers.3.block_sparse_moe.experts.16.w1", "model.layers.3.block_sparse_moe.experts.17.w1", "model.layers.3.block_sparse_moe.experts.18.w1", "model.layers.3.block_sparse_moe.experts.19.w1", "model.layers.3.block_sparse_moe.experts.20.w1", "model.layers.3.block_sparse_moe.experts.21.w1", "model.layers.3.block_sparse_moe.experts.22.w1", "model.layers.3.block_sparse_moe.experts.23.w1", "model.layers.3.block_sparse_moe.experts.24.w1", "model.layers.3.block_sparse_moe.experts.25.w1", "model.layers.3.block_sparse_moe.experts.26.w1", "model.layers.3.block_sparse_moe.experts.27.w1", "model.layers.3.block_sparse_moe.experts.28.w1", "model.layers.3.block_sparse_moe.experts.29.w1", "model.layers.3.block_sparse_moe.experts.30.w1", "model.layers.3.block_sparse_moe.experts.31.w1", "model.layers.3.block_sparse_moe.experts.32.w1", "model.layers.3.block_sparse_moe.experts.33.w1", "model.layers.3.block_sparse_moe.experts.34.w1", "model.layers.3.block_sparse_moe.experts.35.w1", "model.layers.3.block_sparse_moe.experts.36.w1", "model.layers.3.block_sparse_moe.experts.37.w1", "model.layers.3.block_sparse_moe.experts.38.w1", "model.layers.3.block_sparse_moe.experts.39.w1", "model.layers.3.block_sparse_moe.experts.40.w1", "model.layers.3.block_sparse_moe.experts.41.w1", "model.layers.3.block_sparse_moe.experts.42.w1", "model.layers.3.block_sparse_moe.experts.43.w1", "model.layers.3.block_sparse_moe.experts.44.w1", "model.layers.3.block_sparse_moe.experts.45.w1", "model.layers.3.block_sparse_moe.experts.46.w1", "model.layers.3.block_sparse_moe.experts.47.w1", "model.layers.3.block_sparse_moe.experts.48.w1", "model.layers.3.block_sparse_moe.experts.49.w1", "model.layers.3.block_sparse_moe.experts.50.w1", "model.layers.3.block_sparse_moe.experts.51.w1", "model.layers.3.block_sparse_moe.experts.52.w1", "model.layers.3.block_sparse_moe.experts.53.w1", "model.layers.3.block_sparse_moe.experts.54.w1", "model.layers.3.block_sparse_moe.experts.55.w1", "model.layers.3.block_sparse_moe.experts.56.w1", "model.layers.3.block_sparse_moe.experts.57.w1", "model.layers.3.block_sparse_moe.experts.58.w1", "model.layers.3.block_sparse_moe.experts.59.w1", "model.layers.3.block_sparse_moe.experts.60.w1", "model.layers.3.block_sparse_moe.experts.61.w1", "model.layers.3.block_sparse_moe.experts.62.w1", "model.layers.3.block_sparse_moe.experts.63.w1", "model.layers.3.block_sparse_moe.experts.64.w1", "model.layers.3.block_sparse_moe.experts.65.w1", "model.layers.3.block_sparse_moe.experts.66.w1", "model.layers.3.block_sparse_moe.experts.67.w1", "model.layers.3.block_sparse_moe.experts.68.w1", "model.layers.3.block_sparse_moe.experts.69.w1", "model.layers.3.block_sparse_moe.experts.70.w1", "model.layers.3.block_sparse_moe.experts.71.w1", "model.layers.3.block_sparse_moe.experts.72.w1", "model.layers.3.block_sparse_moe.experts.73.w1", "model.layers.3.block_sparse_moe.experts.74.w1", "model.layers.3.block_sparse_moe.experts.75.w1", "model.layers.3.block_sparse_moe.experts.76.w1", "model.layers.3.block_sparse_moe.experts.77.w1", "model.layers.3.block_sparse_moe.experts.78.w1", "model.layers.3.block_sparse_moe.experts.79.w1", "model.layers.3.block_sparse_moe.experts.80.w1", "model.layers.3.block_sparse_moe.experts.81.w1", "model.layers.3.block_sparse_moe.experts.82.w1", "model.layers.3.block_sparse_moe.experts.83.w1", "model.layers.3.block_sparse_moe.experts.84.w1", "model.layers.3.block_sparse_moe.experts.85.w1", "model.layers.3.block_sparse_moe.experts.86.w1", "model.layers.3.block_sparse_moe.experts.87.w1", "model.layers.3.block_sparse_moe.experts.88.w1", "model.layers.3.block_sparse_moe.experts.89.w1", "model.layers.3.block_sparse_moe.experts.90.w1", "model.layers.3.block_sparse_moe.experts.91.w1", "model.layers.3.block_sparse_moe.experts.92.w1", "model.layers.3.block_sparse_moe.experts.93.w1", "model.layers.3.block_sparse_moe.experts.94.w1", "model.layers.3.block_sparse_moe.experts.95.w1", "model.layers.3.block_sparse_moe.experts.96.w1", "model.layers.3.block_sparse_moe.experts.97.w1", "model.layers.3.block_sparse_moe.experts.98.w1", "model.layers.3.block_sparse_moe.experts.99.w1", "model.layers.3.block_sparse_moe.experts.100.w1", "model.layers.3.block_sparse_moe.experts.101.w1", "model.layers.3.block_sparse_moe.experts.102.w1", "model.layers.3.block_sparse_moe.experts.103.w1", "model.layers.3.block_sparse_moe.experts.104.w1", "model.layers.3.block_sparse_moe.experts.105.w1", "model.layers.3.block_sparse_moe.experts.106.w1", "model.layers.3.block_sparse_moe.experts.107.w1", "model.layers.3.block_sparse_moe.experts.108.w1", "model.layers.3.block_sparse_moe.experts.109.w1", "model.layers.3.block_sparse_moe.experts.110.w1", "model.layers.3.block_sparse_moe.experts.111.w1", "model.layers.3.block_sparse_moe.experts.112.w1", "model.layers.3.block_sparse_moe.experts.113.w1", "model.layers.3.block_sparse_moe.experts.114.w1", "model.layers.3.block_sparse_moe.experts.115.w1", "model.layers.3.block_sparse_moe.experts.116.w1", "model.layers.3.block_sparse_moe.experts.117.w1", "model.layers.3.block_sparse_moe.experts.118.w1", "model.layers.3.block_sparse_moe.experts.119.w1", "model.layers.3.block_sparse_moe.experts.120.w1", "model.layers.3.block_sparse_moe.experts.121.w1", "model.layers.3.block_sparse_moe.experts.122.w1", "model.layers.3.block_sparse_moe.experts.123.w1", "model.layers.3.block_sparse_moe.experts.124.w1", "model.layers.3.block_sparse_moe.experts.125.w1", "model.layers.3.block_sparse_moe.experts.126.w1", "model.layers.3.block_sparse_moe.experts.127.w1", "model.layers.3.block_sparse_moe.experts.128.w1", "model.layers.3.block_sparse_moe.experts.129.w1", "model.layers.3.block_sparse_moe.experts.130.w1", "model.layers.3.block_sparse_moe.experts.131.w1", "model.layers.3.block_sparse_moe.experts.132.w1", "model.layers.3.block_sparse_moe.experts.133.w1", "model.layers.3.block_sparse_moe.experts.134.w1", "model.layers.3.block_sparse_moe.experts.135.w1", "model.layers.3.block_sparse_moe.experts.136.w1", "model.layers.3.block_sparse_moe.experts.137.w1", "model.layers.3.block_sparse_moe.experts.138.w1", "model.layers.3.block_sparse_moe.experts.139.w1", "model.layers.3.block_sparse_moe.experts.140.w1", "model.layers.3.block_sparse_moe.experts.141.w1", "model.layers.3.block_sparse_moe.experts.142.w1", "model.layers.3.block_sparse_moe.experts.143.w1", "model.layers.3.block_sparse_moe.experts.144.w1", "model.layers.3.block_sparse_moe.experts.145.w1", "model.layers.3.block_sparse_moe.experts.146.w1", "model.layers.3.block_sparse_moe.experts.147.w1", "model.layers.3.block_sparse_moe.experts.148.w1", "model.layers.3.block_sparse_moe.experts.149.w1", "model.layers.3.block_sparse_moe.experts.150.w1", "model.layers.3.block_sparse_moe.experts.151.w1", "model.layers.3.block_sparse_moe.experts.152.w1", "model.layers.3.block_sparse_moe.experts.153.w1", "model.layers.3.block_sparse_moe.experts.154.w1", "model.layers.3.block_sparse_moe.experts.155.w1", "model.layers.3.block_sparse_moe.experts.156.w1", "model.layers.3.block_sparse_moe.experts.157.w1", "model.layers.3.block_sparse_moe.experts.158.w1", "model.layers.3.block_sparse_moe.experts.159.w1", "model.layers.3.block_sparse_moe.experts.160.w1", "model.layers.3.block_sparse_moe.experts.161.w1", "model.layers.3.block_sparse_moe.experts.162.w1", "model.layers.3.block_sparse_moe.experts.163.w1", "model.layers.3.block_sparse_moe.experts.164.w1", "model.layers.3.block_sparse_moe.experts.165.w1", "model.layers.3.block_sparse_moe.experts.166.w1", "model.layers.3.block_sparse_moe.experts.167.w1", "model.layers.3.block_sparse_moe.experts.168.w1", "model.layers.3.block_sparse_moe.experts.169.w1", "model.layers.3.block_sparse_moe.experts.170.w1", "model.layers.3.block_sparse_moe.experts.171.w1", "model.layers.3.block_sparse_moe.experts.172.w1", "model.layers.3.block_sparse_moe.experts.173.w1", "model.layers.3.block_sparse_moe.experts.174.w1", "model.layers.3.block_sparse_moe.experts.175.w1", "model.layers.3.block_sparse_moe.experts.176.w1", "model.layers.3.block_sparse_moe.experts.177.w1", "model.layers.3.block_sparse_moe.experts.178.w1", "model.layers.3.block_sparse_moe.experts.179.w1", "model.layers.3.block_sparse_moe.experts.180.w1", "model.layers.3.block_sparse_moe.experts.181.w1", "model.layers.3.block_sparse_moe.experts.182.w1", "model.layers.3.block_sparse_moe.experts.183.w1", "model.layers.3.block_sparse_moe.experts.184.w1", "model.layers.3.block_sparse_moe.experts.185.w1", "model.layers.3.block_sparse_moe.experts.186.w1", "model.layers.3.block_sparse_moe.experts.187.w1", "model.layers.3.block_sparse_moe.experts.188.w1", "model.layers.3.block_sparse_moe.experts.189.w1", "model.layers.3.block_sparse_moe.experts.190.w1", "model.layers.3.block_sparse_moe.experts.191.w1", "model.layers.3.block_sparse_moe.experts.192.w1", "model.layers.3.block_sparse_moe.experts.193.w1", "model.layers.3.block_sparse_moe.experts.194.w1", "model.layers.3.block_sparse_moe.experts.195.w1", "model.layers.3.block_sparse_moe.experts.196.w1", "model.layers.3.block_sparse_moe.experts.197.w1", "model.layers.3.block_sparse_moe.experts.198.w1", "model.layers.3.block_sparse_moe.experts.199.w1", "model.layers.3.block_sparse_moe.experts.200.w1", "model.layers.3.block_sparse_moe.experts.201.w1", "model.layers.3.block_sparse_moe.experts.202.w1", "model.layers.3.block_sparse_moe.experts.203.w1", "model.layers.3.block_sparse_moe.experts.204.w1", "model.layers.3.block_sparse_moe.experts.205.w1", "model.layers.3.block_sparse_moe.experts.206.w1", "model.layers.3.block_sparse_moe.experts.207.w1", "model.layers.3.block_sparse_moe.experts.208.w1", "model.layers.3.block_sparse_moe.experts.209.w1", "model.layers.3.block_sparse_moe.experts.210.w1", "model.layers.3.block_sparse_moe.experts.211.w1", "model.layers.3.block_sparse_moe.experts.212.w1", "model.layers.3.block_sparse_moe.experts.213.w1", "model.layers.3.block_sparse_moe.experts.214.w1", "model.layers.3.block_sparse_moe.experts.215.w1", "model.layers.3.block_sparse_moe.experts.216.w1", "model.layers.3.block_sparse_moe.experts.217.w1", "model.layers.3.block_sparse_moe.experts.218.w1", "model.layers.3.block_sparse_moe.experts.219.w1", "model.layers.3.block_sparse_moe.experts.220.w1", "model.layers.3.block_sparse_moe.experts.221.w1", "model.layers.3.block_sparse_moe.experts.222.w1", "model.layers.3.block_sparse_moe.experts.223.w1", "model.layers.3.block_sparse_moe.experts.224.w1", "model.layers.3.block_sparse_moe.experts.225.w1", "model.layers.3.block_sparse_moe.experts.226.w1", "model.layers.3.block_sparse_moe.experts.227.w1", "model.layers.3.block_sparse_moe.experts.228.w1", "model.layers.3.block_sparse_moe.experts.229.w1", "model.layers.3.block_sparse_moe.experts.230.w1", "model.layers.3.block_sparse_moe.experts.231.w1", "model.layers.3.block_sparse_moe.experts.232.w1", "model.layers.3.block_sparse_moe.experts.233.w1", "model.layers.3.block_sparse_moe.experts.234.w1", "model.layers.3.block_sparse_moe.experts.235.w1", "model.layers.3.block_sparse_moe.experts.236.w1", "model.layers.3.block_sparse_moe.experts.237.w1", "model.layers.3.block_sparse_moe.experts.238.w1", "model.layers.3.block_sparse_moe.experts.239.w1", "model.layers.3.block_sparse_moe.experts.240.w1", "model.layers.3.block_sparse_moe.experts.241.w1", "model.layers.3.block_sparse_moe.experts.242.w1", "model.layers.3.block_sparse_moe.experts.243.w1", "model.layers.3.block_sparse_moe.experts.244.w1", "model.layers.3.block_sparse_moe.experts.245.w1", "model.layers.3.block_sparse_moe.experts.246.w1", "model.layers.3.block_sparse_moe.experts.247.w1", "model.layers.3.block_sparse_moe.experts.248.w1", "model.layers.3.block_sparse_moe.experts.249.w1", "model.layers.3.block_sparse_moe.experts.250.w1", "model.layers.3.block_sparse_moe.experts.251.w1", "model.layers.3.block_sparse_moe.experts.252.w1", "model.layers.3.block_sparse_moe.experts.253.w1", "model.layers.3.block_sparse_moe.experts.254.w1", "model.layers.3.block_sparse_moe.experts.255.w1", "model.layers.3.block_sparse_moe.experts.0.w3", "model.layers.3.block_sparse_moe.experts.1.w3", "model.layers.3.block_sparse_moe.experts.2.w3", "model.layers.3.block_sparse_moe.experts.3.w3", "model.layers.3.block_sparse_moe.experts.4.w3", "model.layers.3.block_sparse_moe.experts.5.w3", "model.layers.3.block_sparse_moe.experts.6.w3", "model.layers.3.block_sparse_moe.experts.7.w3", "model.layers.3.block_sparse_moe.experts.8.w3", "model.layers.3.block_sparse_moe.experts.9.w3", "model.layers.3.block_sparse_moe.experts.10.w3", "model.layers.3.block_sparse_moe.experts.11.w3", "model.layers.3.block_sparse_moe.experts.12.w3", "model.layers.3.block_sparse_moe.experts.13.w3", "model.layers.3.block_sparse_moe.experts.14.w3", "model.layers.3.block_sparse_moe.experts.15.w3", "model.layers.3.block_sparse_moe.experts.16.w3", "model.layers.3.block_sparse_moe.experts.17.w3", "model.layers.3.block_sparse_moe.experts.18.w3", "model.layers.3.block_sparse_moe.experts.19.w3", "model.layers.3.block_sparse_moe.experts.20.w3", "model.layers.3.block_sparse_moe.experts.21.w3", "model.layers.3.block_sparse_moe.experts.22.w3", "model.layers.3.block_sparse_moe.experts.23.w3", "model.layers.3.block_sparse_moe.experts.24.w3", "model.layers.3.block_sparse_moe.experts.25.w3", "model.layers.3.block_sparse_moe.experts.26.w3", "model.layers.3.block_sparse_moe.experts.27.w3", "model.layers.3.block_sparse_moe.experts.28.w3", "model.layers.3.block_sparse_moe.experts.29.w3", "model.layers.3.block_sparse_moe.experts.30.w3", "model.layers.3.block_sparse_moe.experts.31.w3", "model.layers.3.block_sparse_moe.experts.32.w3", "model.layers.3.block_sparse_moe.experts.33.w3", "model.layers.3.block_sparse_moe.experts.34.w3", "model.layers.3.block_sparse_moe.experts.35.w3", "model.layers.3.block_sparse_moe.experts.36.w3", "model.layers.3.block_sparse_moe.experts.37.w3", "model.layers.3.block_sparse_moe.experts.38.w3", "model.layers.3.block_sparse_moe.experts.39.w3", "model.layers.3.block_sparse_moe.experts.40.w3", "model.layers.3.block_sparse_moe.experts.41.w3", "model.layers.3.block_sparse_moe.experts.42.w3", "model.layers.3.block_sparse_moe.experts.43.w3", "model.layers.3.block_sparse_moe.experts.44.w3", "model.layers.3.block_sparse_moe.experts.45.w3", "model.layers.3.block_sparse_moe.experts.46.w3", "model.layers.3.block_sparse_moe.experts.47.w3", "model.layers.3.block_sparse_moe.experts.48.w3", "model.layers.3.block_sparse_moe.experts.49.w3", "model.layers.3.block_sparse_moe.experts.50.w3", "model.layers.3.block_sparse_moe.experts.51.w3", "model.layers.3.block_sparse_moe.experts.52.w3", "model.layers.3.block_sparse_moe.experts.53.w3", "model.layers.3.block_sparse_moe.experts.54.w3", "model.layers.3.block_sparse_moe.experts.55.w3", "model.layers.3.block_sparse_moe.experts.56.w3", "model.layers.3.block_sparse_moe.experts.57.w3", "model.layers.3.block_sparse_moe.experts.58.w3", "model.layers.3.block_sparse_moe.experts.59.w3", "model.layers.3.block_sparse_moe.experts.60.w3", "model.layers.3.block_sparse_moe.experts.61.w3", "model.layers.3.block_sparse_moe.experts.62.w3", "model.layers.3.block_sparse_moe.experts.63.w3", "model.layers.3.block_sparse_moe.experts.64.w3", "model.layers.3.block_sparse_moe.experts.65.w3", "model.layers.3.block_sparse_moe.experts.66.w3", "model.layers.3.block_sparse_moe.experts.67.w3", "model.layers.3.block_sparse_moe.experts.68.w3", "model.layers.3.block_sparse_moe.experts.69.w3", "model.layers.3.block_sparse_moe.experts.70.w3", "model.layers.3.block_sparse_moe.experts.71.w3", "model.layers.3.block_sparse_moe.experts.72.w3", "model.layers.3.block_sparse_moe.experts.73.w3", "model.layers.3.block_sparse_moe.experts.74.w3", "model.layers.3.block_sparse_moe.experts.75.w3", "model.layers.3.block_sparse_moe.experts.76.w3", "model.layers.3.block_sparse_moe.experts.77.w3", "model.layers.3.block_sparse_moe.experts.78.w3", "model.layers.3.block_sparse_moe.experts.79.w3", "model.layers.3.block_sparse_moe.experts.80.w3", "model.layers.3.block_sparse_moe.experts.81.w3", "model.layers.3.block_sparse_moe.experts.82.w3", "model.layers.3.block_sparse_moe.experts.83.w3", "model.layers.3.block_sparse_moe.experts.84.w3", "model.layers.3.block_sparse_moe.experts.85.w3", "model.layers.3.block_sparse_moe.experts.86.w3", "model.layers.3.block_sparse_moe.experts.87.w3", "model.layers.3.block_sparse_moe.experts.88.w3", "model.layers.3.block_sparse_moe.experts.89.w3", "model.layers.3.block_sparse_moe.experts.90.w3", "model.layers.3.block_sparse_moe.experts.91.w3", "model.layers.3.block_sparse_moe.experts.92.w3", "model.layers.3.block_sparse_moe.experts.93.w3", "model.layers.3.block_sparse_moe.experts.94.w3", "model.layers.3.block_sparse_moe.experts.95.w3", "model.layers.3.block_sparse_moe.experts.96.w3", "model.layers.3.block_sparse_moe.experts.97.w3", "model.layers.3.block_sparse_moe.experts.98.w3", "model.layers.3.block_sparse_moe.experts.99.w3", "model.layers.3.block_sparse_moe.experts.100.w3", "model.layers.3.block_sparse_moe.experts.101.w3", "model.layers.3.block_sparse_moe.experts.102.w3", "model.layers.3.block_sparse_moe.experts.103.w3", "model.layers.3.block_sparse_moe.experts.104.w3", "model.layers.3.block_sparse_moe.experts.105.w3", "model.layers.3.block_sparse_moe.experts.106.w3", "model.layers.3.block_sparse_moe.experts.107.w3", "model.layers.3.block_sparse_moe.experts.108.w3", "model.layers.3.block_sparse_moe.experts.109.w3", "model.layers.3.block_sparse_moe.experts.110.w3", "model.layers.3.block_sparse_moe.experts.111.w3", "model.layers.3.block_sparse_moe.experts.112.w3", "model.layers.3.block_sparse_moe.experts.113.w3", "model.layers.3.block_sparse_moe.experts.114.w3", "model.layers.3.block_sparse_moe.experts.115.w3", "model.layers.3.block_sparse_moe.experts.116.w3", "model.layers.3.block_sparse_moe.experts.117.w3", "model.layers.3.block_sparse_moe.experts.118.w3", "model.layers.3.block_sparse_moe.experts.119.w3", "model.layers.3.block_sparse_moe.experts.120.w3", "model.layers.3.block_sparse_moe.experts.121.w3", "model.layers.3.block_sparse_moe.experts.122.w3", "model.layers.3.block_sparse_moe.experts.123.w3", "model.layers.3.block_sparse_moe.experts.124.w3", "model.layers.3.block_sparse_moe.experts.125.w3", "model.layers.3.block_sparse_moe.experts.126.w3", "model.layers.3.block_sparse_moe.experts.127.w3", "model.layers.3.block_sparse_moe.experts.128.w3", "model.layers.3.block_sparse_moe.experts.129.w3", "model.layers.3.block_sparse_moe.experts.130.w3", "model.layers.3.block_sparse_moe.experts.131.w3", "model.layers.3.block_sparse_moe.experts.132.w3", "model.layers.3.block_sparse_moe.experts.133.w3", "model.layers.3.block_sparse_moe.experts.134.w3", "model.layers.3.block_sparse_moe.experts.135.w3", "model.layers.3.block_sparse_moe.experts.136.w3", "model.layers.3.block_sparse_moe.experts.137.w3", "model.layers.3.block_sparse_moe.experts.138.w3", "model.layers.3.block_sparse_moe.experts.139.w3", "model.layers.3.block_sparse_moe.experts.140.w3", "model.layers.3.block_sparse_moe.experts.141.w3", "model.layers.3.block_sparse_moe.experts.142.w3", "model.layers.3.block_sparse_moe.experts.143.w3", "model.layers.3.block_sparse_moe.experts.144.w3", "model.layers.3.block_sparse_moe.experts.145.w3", "model.layers.3.block_sparse_moe.experts.146.w3", "model.layers.3.block_sparse_moe.experts.147.w3", "model.layers.3.block_sparse_moe.experts.148.w3", "model.layers.3.block_sparse_moe.experts.149.w3", "model.layers.3.block_sparse_moe.experts.150.w3", "model.layers.3.block_sparse_moe.experts.151.w3", "model.layers.3.block_sparse_moe.experts.152.w3", "model.layers.3.block_sparse_moe.experts.153.w3", "model.layers.3.block_sparse_moe.experts.154.w3", "model.layers.3.block_sparse_moe.experts.155.w3", "model.layers.3.block_sparse_moe.experts.156.w3", "model.layers.3.block_sparse_moe.experts.157.w3", "model.layers.3.block_sparse_moe.experts.158.w3", "model.layers.3.block_sparse_moe.experts.159.w3", "model.layers.3.block_sparse_moe.experts.160.w3", "model.layers.3.block_sparse_moe.experts.161.w3", "model.layers.3.block_sparse_moe.experts.162.w3", "model.layers.3.block_sparse_moe.experts.163.w3", "model.layers.3.block_sparse_moe.experts.164.w3", "model.layers.3.block_sparse_moe.experts.165.w3", "model.layers.3.block_sparse_moe.experts.166.w3", "model.layers.3.block_sparse_moe.experts.167.w3", "model.layers.3.block_sparse_moe.experts.168.w3", "model.layers.3.block_sparse_moe.experts.169.w3", "model.layers.3.block_sparse_moe.experts.170.w3", "model.layers.3.block_sparse_moe.experts.171.w3", "model.layers.3.block_sparse_moe.experts.172.w3", "model.layers.3.block_sparse_moe.experts.173.w3", "model.layers.3.block_sparse_moe.experts.174.w3", "model.layers.3.block_sparse_moe.experts.175.w3", "model.layers.3.block_sparse_moe.experts.176.w3", "model.layers.3.block_sparse_moe.experts.177.w3", "model.layers.3.block_sparse_moe.experts.178.w3", "model.layers.3.block_sparse_moe.experts.179.w3", "model.layers.3.block_sparse_moe.experts.180.w3", "model.layers.3.block_sparse_moe.experts.181.w3", "model.layers.3.block_sparse_moe.experts.182.w3", "model.layers.3.block_sparse_moe.experts.183.w3", "model.layers.3.block_sparse_moe.experts.184.w3", "model.layers.3.block_sparse_moe.experts.185.w3", "model.layers.3.block_sparse_moe.experts.186.w3", "model.layers.3.block_sparse_moe.experts.187.w3", "model.layers.3.block_sparse_moe.experts.188.w3", "model.layers.3.block_sparse_moe.experts.189.w3", "model.layers.3.block_sparse_moe.experts.190.w3", "model.layers.3.block_sparse_moe.experts.191.w3", "model.layers.3.block_sparse_moe.experts.192.w3", "model.layers.3.block_sparse_moe.experts.193.w3", "model.layers.3.block_sparse_moe.experts.194.w3", "model.layers.3.block_sparse_moe.experts.195.w3", "model.layers.3.block_sparse_moe.experts.196.w3", "model.layers.3.block_sparse_moe.experts.197.w3", "model.layers.3.block_sparse_moe.experts.198.w3", "model.layers.3.block_sparse_moe.experts.199.w3", "model.layers.3.block_sparse_moe.experts.200.w3", "model.layers.3.block_sparse_moe.experts.201.w3", "model.layers.3.block_sparse_moe.experts.202.w3", "model.layers.3.block_sparse_moe.experts.203.w3", "model.layers.3.block_sparse_moe.experts.204.w3", "model.layers.3.block_sparse_moe.experts.205.w3", "model.layers.3.block_sparse_moe.experts.206.w3", "model.layers.3.block_sparse_moe.experts.207.w3", "model.layers.3.block_sparse_moe.experts.208.w3", "model.layers.3.block_sparse_moe.experts.209.w3", "model.layers.3.block_sparse_moe.experts.210.w3", "model.layers.3.block_sparse_moe.experts.211.w3", "model.layers.3.block_sparse_moe.experts.212.w3", "model.layers.3.block_sparse_moe.experts.213.w3", "model.layers.3.block_sparse_moe.experts.214.w3", "model.layers.3.block_sparse_moe.experts.215.w3", "model.layers.3.block_sparse_moe.experts.216.w3", "model.layers.3.block_sparse_moe.experts.217.w3", "model.layers.3.block_sparse_moe.experts.218.w3", "model.layers.3.block_sparse_moe.experts.219.w3", "model.layers.3.block_sparse_moe.experts.220.w3", "model.layers.3.block_sparse_moe.experts.221.w3", "model.layers.3.block_sparse_moe.experts.222.w3", "model.layers.3.block_sparse_moe.experts.223.w3", "model.layers.3.block_sparse_moe.experts.224.w3", "model.layers.3.block_sparse_moe.experts.225.w3", "model.layers.3.block_sparse_moe.experts.226.w3", "model.layers.3.block_sparse_moe.experts.227.w3", "model.layers.3.block_sparse_moe.experts.228.w3", "model.layers.3.block_sparse_moe.experts.229.w3", "model.layers.3.block_sparse_moe.experts.230.w3", "model.layers.3.block_sparse_moe.experts.231.w3", "model.layers.3.block_sparse_moe.experts.232.w3", "model.layers.3.block_sparse_moe.experts.233.w3", "model.layers.3.block_sparse_moe.experts.234.w3", "model.layers.3.block_sparse_moe.experts.235.w3", "model.layers.3.block_sparse_moe.experts.236.w3", "model.layers.3.block_sparse_moe.experts.237.w3", "model.layers.3.block_sparse_moe.experts.238.w3", "model.layers.3.block_sparse_moe.experts.239.w3", "model.layers.3.block_sparse_moe.experts.240.w3", "model.layers.3.block_sparse_moe.experts.241.w3", "model.layers.3.block_sparse_moe.experts.242.w3", "model.layers.3.block_sparse_moe.experts.243.w3", "model.layers.3.block_sparse_moe.experts.244.w3", "model.layers.3.block_sparse_moe.experts.245.w3", "model.layers.3.block_sparse_moe.experts.246.w3", "model.layers.3.block_sparse_moe.experts.247.w3", "model.layers.3.block_sparse_moe.experts.248.w3", "model.layers.3.block_sparse_moe.experts.249.w3", "model.layers.3.block_sparse_moe.experts.250.w3", "model.layers.3.block_sparse_moe.experts.251.w3", "model.layers.3.block_sparse_moe.experts.252.w3", "model.layers.3.block_sparse_moe.experts.253.w3", "model.layers.3.block_sparse_moe.experts.254.w3", "model.layers.3.block_sparse_moe.experts.255.w3", "model.layers.3.block_sparse_moe.experts.0.w2", "model.layers.3.block_sparse_moe.experts.1.w2", "model.layers.3.block_sparse_moe.experts.2.w2", "model.layers.3.block_sparse_moe.experts.3.w2", "model.layers.3.block_sparse_moe.experts.4.w2", "model.layers.3.block_sparse_moe.experts.5.w2", "model.layers.3.block_sparse_moe.experts.6.w2", "model.layers.3.block_sparse_moe.experts.7.w2", "model.layers.3.block_sparse_moe.experts.8.w2", "model.layers.3.block_sparse_moe.experts.9.w2", "model.layers.3.block_sparse_moe.experts.10.w2", "model.layers.3.block_sparse_moe.experts.11.w2", "model.layers.3.block_sparse_moe.experts.12.w2", "model.layers.3.block_sparse_moe.experts.13.w2", "model.layers.3.block_sparse_moe.experts.14.w2", "model.layers.3.block_sparse_moe.experts.15.w2", "model.layers.3.block_sparse_moe.experts.16.w2", "model.layers.3.block_sparse_moe.experts.17.w2", "model.layers.3.block_sparse_moe.experts.18.w2", "model.layers.3.block_sparse_moe.experts.19.w2", "model.layers.3.block_sparse_moe.experts.20.w2", "model.layers.3.block_sparse_moe.experts.21.w2", "model.layers.3.block_sparse_moe.experts.22.w2", "model.layers.3.block_sparse_moe.experts.23.w2", "model.layers.3.block_sparse_moe.experts.24.w2", "model.layers.3.block_sparse_moe.experts.25.w2", "model.layers.3.block_sparse_moe.experts.26.w2", "model.layers.3.block_sparse_moe.experts.27.w2", "model.layers.3.block_sparse_moe.experts.28.w2", "model.layers.3.block_sparse_moe.experts.29.w2", "model.layers.3.block_sparse_moe.experts.30.w2", "model.layers.3.block_sparse_moe.experts.31.w2", "model.layers.3.block_sparse_moe.experts.32.w2", "model.layers.3.block_sparse_moe.experts.33.w2", "model.layers.3.block_sparse_moe.experts.34.w2", "model.layers.3.block_sparse_moe.experts.35.w2", "model.layers.3.block_sparse_moe.experts.36.w2", "model.layers.3.block_sparse_moe.experts.37.w2", "model.layers.3.block_sparse_moe.experts.38.w2", "model.layers.3.block_sparse_moe.experts.39.w2", "model.layers.3.block_sparse_moe.experts.40.w2", "model.layers.3.block_sparse_moe.experts.41.w2", "model.layers.3.block_sparse_moe.experts.42.w2", "model.layers.3.block_sparse_moe.experts.43.w2", "model.layers.3.block_sparse_moe.experts.44.w2", "model.layers.3.block_sparse_moe.experts.45.w2", "model.layers.3.block_sparse_moe.experts.46.w2", "model.layers.3.block_sparse_moe.experts.47.w2", "model.layers.3.block_sparse_moe.experts.48.w2", "model.layers.3.block_sparse_moe.experts.49.w2", "model.layers.3.block_sparse_moe.experts.50.w2", "model.layers.3.block_sparse_moe.experts.51.w2", "model.layers.3.block_sparse_moe.experts.52.w2", "model.layers.3.block_sparse_moe.experts.53.w2", "model.layers.3.block_sparse_moe.experts.54.w2", "model.layers.3.block_sparse_moe.experts.55.w2", "model.layers.3.block_sparse_moe.experts.56.w2", "model.layers.3.block_sparse_moe.experts.57.w2", "model.layers.3.block_sparse_moe.experts.58.w2", "model.layers.3.block_sparse_moe.experts.59.w2", "model.layers.3.block_sparse_moe.experts.60.w2", "model.layers.3.block_sparse_moe.experts.61.w2", "model.layers.3.block_sparse_moe.experts.62.w2", "model.layers.3.block_sparse_moe.experts.63.w2", "model.layers.3.block_sparse_moe.experts.64.w2", "model.layers.3.block_sparse_moe.experts.65.w2", "model.layers.3.block_sparse_moe.experts.66.w2", "model.layers.3.block_sparse_moe.experts.67.w2", "model.layers.3.block_sparse_moe.experts.68.w2", "model.layers.3.block_sparse_moe.experts.69.w2", "model.layers.3.block_sparse_moe.experts.70.w2", "model.layers.3.block_sparse_moe.experts.71.w2", "model.layers.3.block_sparse_moe.experts.72.w2", "model.layers.3.block_sparse_moe.experts.73.w2", "model.layers.3.block_sparse_moe.experts.74.w2", "model.layers.3.block_sparse_moe.experts.75.w2", "model.layers.3.block_sparse_moe.experts.76.w2", "model.layers.3.block_sparse_moe.experts.77.w2", "model.layers.3.block_sparse_moe.experts.78.w2", "model.layers.3.block_sparse_moe.experts.79.w2", "model.layers.3.block_sparse_moe.experts.80.w2", "model.layers.3.block_sparse_moe.experts.81.w2", "model.layers.3.block_sparse_moe.experts.82.w2", "model.layers.3.block_sparse_moe.experts.83.w2", "model.layers.3.block_sparse_moe.experts.84.w2", "model.layers.3.block_sparse_moe.experts.85.w2", "model.layers.3.block_sparse_moe.experts.86.w2", "model.layers.3.block_sparse_moe.experts.87.w2", "model.layers.3.block_sparse_moe.experts.88.w2", "model.layers.3.block_sparse_moe.experts.89.w2", "model.layers.3.block_sparse_moe.experts.90.w2", "model.layers.3.block_sparse_moe.experts.91.w2", "model.layers.3.block_sparse_moe.experts.92.w2", "model.layers.3.block_sparse_moe.experts.93.w2", "model.layers.3.block_sparse_moe.experts.94.w2", "model.layers.3.block_sparse_moe.experts.95.w2", "model.layers.3.block_sparse_moe.experts.96.w2", "model.layers.3.block_sparse_moe.experts.97.w2", "model.layers.3.block_sparse_moe.experts.98.w2", "model.layers.3.block_sparse_moe.experts.99.w2", "model.layers.3.block_sparse_moe.experts.100.w2", "model.layers.3.block_sparse_moe.experts.101.w2", "model.layers.3.block_sparse_moe.experts.102.w2", "model.layers.3.block_sparse_moe.experts.103.w2", "model.layers.3.block_sparse_moe.experts.104.w2", "model.layers.3.block_sparse_moe.experts.105.w2", "model.layers.3.block_sparse_moe.experts.106.w2", "model.layers.3.block_sparse_moe.experts.107.w2", "model.layers.3.block_sparse_moe.experts.108.w2", "model.layers.3.block_sparse_moe.experts.109.w2", "model.layers.3.block_sparse_moe.experts.110.w2", "model.layers.3.block_sparse_moe.experts.111.w2", "model.layers.3.block_sparse_moe.experts.112.w2", "model.layers.3.block_sparse_moe.experts.113.w2", "model.layers.3.block_sparse_moe.experts.114.w2", "model.layers.3.block_sparse_moe.experts.115.w2", "model.layers.3.block_sparse_moe.experts.116.w2", "model.layers.3.block_sparse_moe.experts.117.w2", "model.layers.3.block_sparse_moe.experts.118.w2", "model.layers.3.block_sparse_moe.experts.119.w2", "model.layers.3.block_sparse_moe.experts.120.w2", "model.layers.3.block_sparse_moe.experts.121.w2", "model.layers.3.block_sparse_moe.experts.122.w2", "model.layers.3.block_sparse_moe.experts.123.w2", "model.layers.3.block_sparse_moe.experts.124.w2", "model.layers.3.block_sparse_moe.experts.125.w2", "model.layers.3.block_sparse_moe.experts.126.w2", "model.layers.3.block_sparse_moe.experts.127.w2", "model.layers.3.block_sparse_moe.experts.128.w2", "model.layers.3.block_sparse_moe.experts.129.w2", "model.layers.3.block_sparse_moe.experts.130.w2", "model.layers.3.block_sparse_moe.experts.131.w2", "model.layers.3.block_sparse_moe.experts.132.w2", "model.layers.3.block_sparse_moe.experts.133.w2", "model.layers.3.block_sparse_moe.experts.134.w2", "model.layers.3.block_sparse_moe.experts.135.w2", "model.layers.3.block_sparse_moe.experts.136.w2", "model.layers.3.block_sparse_moe.experts.137.w2", "model.layers.3.block_sparse_moe.experts.138.w2", "model.layers.3.block_sparse_moe.experts.139.w2", "model.layers.3.block_sparse_moe.experts.140.w2", "model.layers.3.block_sparse_moe.experts.141.w2", "model.layers.3.block_sparse_moe.experts.142.w2", "model.layers.3.block_sparse_moe.experts.143.w2", "model.layers.3.block_sparse_moe.experts.144.w2", "model.layers.3.block_sparse_moe.experts.145.w2", "model.layers.3.block_sparse_moe.experts.146.w2", "model.layers.3.block_sparse_moe.experts.147.w2", "model.layers.3.block_sparse_moe.experts.148.w2", "model.layers.3.block_sparse_moe.experts.149.w2", "model.layers.3.block_sparse_moe.experts.150.w2", "model.layers.3.block_sparse_moe.experts.151.w2", "model.layers.3.block_sparse_moe.experts.152.w2", "model.layers.3.block_sparse_moe.experts.153.w2", "model.layers.3.block_sparse_moe.experts.154.w2", "model.layers.3.block_sparse_moe.experts.155.w2", "model.layers.3.block_sparse_moe.experts.156.w2", "model.layers.3.block_sparse_moe.experts.157.w2", "model.layers.3.block_sparse_moe.experts.158.w2", "model.layers.3.block_sparse_moe.experts.159.w2", "model.layers.3.block_sparse_moe.experts.160.w2", "model.layers.3.block_sparse_moe.experts.161.w2", "model.layers.3.block_sparse_moe.experts.162.w2", "model.layers.3.block_sparse_moe.experts.163.w2", "model.layers.3.block_sparse_moe.experts.164.w2", "model.layers.3.block_sparse_moe.experts.165.w2", "model.layers.3.block_sparse_moe.experts.166.w2", "model.layers.3.block_sparse_moe.experts.167.w2", "model.layers.3.block_sparse_moe.experts.168.w2", "model.layers.3.block_sparse_moe.experts.169.w2", "model.layers.3.block_sparse_moe.experts.170.w2", "model.layers.3.block_sparse_moe.experts.171.w2", "model.layers.3.block_sparse_moe.experts.172.w2", "model.layers.3.block_sparse_moe.experts.173.w2", "model.layers.3.block_sparse_moe.experts.174.w2", "model.layers.3.block_sparse_moe.experts.175.w2", "model.layers.3.block_sparse_moe.experts.176.w2", "model.layers.3.block_sparse_moe.experts.177.w2", "model.layers.3.block_sparse_moe.experts.178.w2", "model.layers.3.block_sparse_moe.experts.179.w2", "model.layers.3.block_sparse_moe.experts.180.w2", "model.layers.3.block_sparse_moe.experts.181.w2", "model.layers.3.block_sparse_moe.experts.182.w2", "model.layers.3.block_sparse_moe.experts.183.w2", "model.layers.3.block_sparse_moe.experts.184.w2", "model.layers.3.block_sparse_moe.experts.185.w2", "model.layers.3.block_sparse_moe.experts.186.w2", "model.layers.3.block_sparse_moe.experts.187.w2", "model.layers.3.block_sparse_moe.experts.188.w2", "model.layers.3.block_sparse_moe.experts.189.w2", "model.layers.3.block_sparse_moe.experts.190.w2", "model.layers.3.block_sparse_moe.experts.191.w2", "model.layers.3.block_sparse_moe.experts.192.w2", "model.layers.3.block_sparse_moe.experts.193.w2", "model.layers.3.block_sparse_moe.experts.194.w2", "model.layers.3.block_sparse_moe.experts.195.w2", "model.layers.3.block_sparse_moe.experts.196.w2", "model.layers.3.block_sparse_moe.experts.197.w2", "model.layers.3.block_sparse_moe.experts.198.w2", "model.layers.3.block_sparse_moe.experts.199.w2", "model.layers.3.block_sparse_moe.experts.200.w2", "model.layers.3.block_sparse_moe.experts.201.w2", "model.layers.3.block_sparse_moe.experts.202.w2", "model.layers.3.block_sparse_moe.experts.203.w2", "model.layers.3.block_sparse_moe.experts.204.w2", "model.layers.3.block_sparse_moe.experts.205.w2", "model.layers.3.block_sparse_moe.experts.206.w2", "model.layers.3.block_sparse_moe.experts.207.w2", "model.layers.3.block_sparse_moe.experts.208.w2", "model.layers.3.block_sparse_moe.experts.209.w2", "model.layers.3.block_sparse_moe.experts.210.w2", "model.layers.3.block_sparse_moe.experts.211.w2", "model.layers.3.block_sparse_moe.experts.212.w2", "model.layers.3.block_sparse_moe.experts.213.w2", "model.layers.3.block_sparse_moe.experts.214.w2", "model.layers.3.block_sparse_moe.experts.215.w2", "model.layers.3.block_sparse_moe.experts.216.w2", "model.layers.3.block_sparse_moe.experts.217.w2", "model.layers.3.block_sparse_moe.experts.218.w2", "model.layers.3.block_sparse_moe.experts.219.w2", "model.layers.3.block_sparse_moe.experts.220.w2", "model.layers.3.block_sparse_moe.experts.221.w2", "model.layers.3.block_sparse_moe.experts.222.w2", "model.layers.3.block_sparse_moe.experts.223.w2", "model.layers.3.block_sparse_moe.experts.224.w2", "model.layers.3.block_sparse_moe.experts.225.w2", "model.layers.3.block_sparse_moe.experts.226.w2", "model.layers.3.block_sparse_moe.experts.227.w2", "model.layers.3.block_sparse_moe.experts.228.w2", "model.layers.3.block_sparse_moe.experts.229.w2", "model.layers.3.block_sparse_moe.experts.230.w2", "model.layers.3.block_sparse_moe.experts.231.w2", "model.layers.3.block_sparse_moe.experts.232.w2", "model.layers.3.block_sparse_moe.experts.233.w2", "model.layers.3.block_sparse_moe.experts.234.w2", "model.layers.3.block_sparse_moe.experts.235.w2", "model.layers.3.block_sparse_moe.experts.236.w2", "model.layers.3.block_sparse_moe.experts.237.w2", "model.layers.3.block_sparse_moe.experts.238.w2", "model.layers.3.block_sparse_moe.experts.239.w2", "model.layers.3.block_sparse_moe.experts.240.w2", "model.layers.3.block_sparse_moe.experts.241.w2", "model.layers.3.block_sparse_moe.experts.242.w2", "model.layers.3.block_sparse_moe.experts.243.w2", "model.layers.3.block_sparse_moe.experts.244.w2", "model.layers.3.block_sparse_moe.experts.245.w2", "model.layers.3.block_sparse_moe.experts.246.w2", "model.layers.3.block_sparse_moe.experts.247.w2", "model.layers.3.block_sparse_moe.experts.248.w2", "model.layers.3.block_sparse_moe.experts.249.w2", "model.layers.3.block_sparse_moe.experts.250.w2", "model.layers.3.block_sparse_moe.experts.251.w2", "model.layers.3.block_sparse_moe.experts.252.w2", "model.layers.3.block_sparse_moe.experts.253.w2", "model.layers.3.block_sparse_moe.experts.254.w2", "model.layers.3.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.010562162101268757, "dbits": 3623878656 } ] }, { "idx": 8, "layers": [ "model.layers.4.self_attn.q_proj", "model.layers.4.self_attn.k_proj", "model.layers.4.self_attn.v_proj", "model.layers.4.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008409646898508039, "dbits": 44040192 } ] }, { "idx": 9, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w1", "model.layers.4.block_sparse_moe.experts.1.w1", "model.layers.4.block_sparse_moe.experts.2.w1", "model.layers.4.block_sparse_moe.experts.3.w1", "model.layers.4.block_sparse_moe.experts.4.w1", "model.layers.4.block_sparse_moe.experts.5.w1", "model.layers.4.block_sparse_moe.experts.6.w1", "model.layers.4.block_sparse_moe.experts.7.w1", "model.layers.4.block_sparse_moe.experts.8.w1", "model.layers.4.block_sparse_moe.experts.9.w1", "model.layers.4.block_sparse_moe.experts.10.w1", "model.layers.4.block_sparse_moe.experts.11.w1", "model.layers.4.block_sparse_moe.experts.12.w1", "model.layers.4.block_sparse_moe.experts.13.w1", "model.layers.4.block_sparse_moe.experts.14.w1", "model.layers.4.block_sparse_moe.experts.15.w1", "model.layers.4.block_sparse_moe.experts.16.w1", "model.layers.4.block_sparse_moe.experts.17.w1", "model.layers.4.block_sparse_moe.experts.18.w1", "model.layers.4.block_sparse_moe.experts.19.w1", "model.layers.4.block_sparse_moe.experts.20.w1", "model.layers.4.block_sparse_moe.experts.21.w1", "model.layers.4.block_sparse_moe.experts.22.w1", "model.layers.4.block_sparse_moe.experts.23.w1", "model.layers.4.block_sparse_moe.experts.24.w1", "model.layers.4.block_sparse_moe.experts.25.w1", "model.layers.4.block_sparse_moe.experts.26.w1", "model.layers.4.block_sparse_moe.experts.27.w1", "model.layers.4.block_sparse_moe.experts.28.w1", "model.layers.4.block_sparse_moe.experts.29.w1", "model.layers.4.block_sparse_moe.experts.30.w1", "model.layers.4.block_sparse_moe.experts.31.w1", "model.layers.4.block_sparse_moe.experts.32.w1", "model.layers.4.block_sparse_moe.experts.33.w1", "model.layers.4.block_sparse_moe.experts.34.w1", "model.layers.4.block_sparse_moe.experts.35.w1", "model.layers.4.block_sparse_moe.experts.36.w1", "model.layers.4.block_sparse_moe.experts.37.w1", "model.layers.4.block_sparse_moe.experts.38.w1", "model.layers.4.block_sparse_moe.experts.39.w1", "model.layers.4.block_sparse_moe.experts.40.w1", "model.layers.4.block_sparse_moe.experts.41.w1", "model.layers.4.block_sparse_moe.experts.42.w1", "model.layers.4.block_sparse_moe.experts.43.w1", "model.layers.4.block_sparse_moe.experts.44.w1", "model.layers.4.block_sparse_moe.experts.45.w1", "model.layers.4.block_sparse_moe.experts.46.w1", "model.layers.4.block_sparse_moe.experts.47.w1", "model.layers.4.block_sparse_moe.experts.48.w1", "model.layers.4.block_sparse_moe.experts.49.w1", "model.layers.4.block_sparse_moe.experts.50.w1", "model.layers.4.block_sparse_moe.experts.51.w1", "model.layers.4.block_sparse_moe.experts.52.w1", "model.layers.4.block_sparse_moe.experts.53.w1", "model.layers.4.block_sparse_moe.experts.54.w1", "model.layers.4.block_sparse_moe.experts.55.w1", "model.layers.4.block_sparse_moe.experts.56.w1", "model.layers.4.block_sparse_moe.experts.57.w1", "model.layers.4.block_sparse_moe.experts.58.w1", "model.layers.4.block_sparse_moe.experts.59.w1", "model.layers.4.block_sparse_moe.experts.60.w1", "model.layers.4.block_sparse_moe.experts.61.w1", "model.layers.4.block_sparse_moe.experts.62.w1", "model.layers.4.block_sparse_moe.experts.63.w1", "model.layers.4.block_sparse_moe.experts.64.w1", "model.layers.4.block_sparse_moe.experts.65.w1", "model.layers.4.block_sparse_moe.experts.66.w1", "model.layers.4.block_sparse_moe.experts.67.w1", "model.layers.4.block_sparse_moe.experts.68.w1", "model.layers.4.block_sparse_moe.experts.69.w1", "model.layers.4.block_sparse_moe.experts.70.w1", "model.layers.4.block_sparse_moe.experts.71.w1", "model.layers.4.block_sparse_moe.experts.72.w1", "model.layers.4.block_sparse_moe.experts.73.w1", "model.layers.4.block_sparse_moe.experts.74.w1", "model.layers.4.block_sparse_moe.experts.75.w1", "model.layers.4.block_sparse_moe.experts.76.w1", "model.layers.4.block_sparse_moe.experts.77.w1", "model.layers.4.block_sparse_moe.experts.78.w1", "model.layers.4.block_sparse_moe.experts.79.w1", "model.layers.4.block_sparse_moe.experts.80.w1", "model.layers.4.block_sparse_moe.experts.81.w1", "model.layers.4.block_sparse_moe.experts.82.w1", "model.layers.4.block_sparse_moe.experts.83.w1", "model.layers.4.block_sparse_moe.experts.84.w1", "model.layers.4.block_sparse_moe.experts.85.w1", "model.layers.4.block_sparse_moe.experts.86.w1", "model.layers.4.block_sparse_moe.experts.87.w1", "model.layers.4.block_sparse_moe.experts.88.w1", "model.layers.4.block_sparse_moe.experts.89.w1", "model.layers.4.block_sparse_moe.experts.90.w1", "model.layers.4.block_sparse_moe.experts.91.w1", "model.layers.4.block_sparse_moe.experts.92.w1", "model.layers.4.block_sparse_moe.experts.93.w1", "model.layers.4.block_sparse_moe.experts.94.w1", "model.layers.4.block_sparse_moe.experts.95.w1", "model.layers.4.block_sparse_moe.experts.96.w1", "model.layers.4.block_sparse_moe.experts.97.w1", "model.layers.4.block_sparse_moe.experts.98.w1", "model.layers.4.block_sparse_moe.experts.99.w1", "model.layers.4.block_sparse_moe.experts.100.w1", "model.layers.4.block_sparse_moe.experts.101.w1", "model.layers.4.block_sparse_moe.experts.102.w1", "model.layers.4.block_sparse_moe.experts.103.w1", "model.layers.4.block_sparse_moe.experts.104.w1", "model.layers.4.block_sparse_moe.experts.105.w1", "model.layers.4.block_sparse_moe.experts.106.w1", "model.layers.4.block_sparse_moe.experts.107.w1", "model.layers.4.block_sparse_moe.experts.108.w1", "model.layers.4.block_sparse_moe.experts.109.w1", "model.layers.4.block_sparse_moe.experts.110.w1", "model.layers.4.block_sparse_moe.experts.111.w1", "model.layers.4.block_sparse_moe.experts.112.w1", "model.layers.4.block_sparse_moe.experts.113.w1", "model.layers.4.block_sparse_moe.experts.114.w1", "model.layers.4.block_sparse_moe.experts.115.w1", "model.layers.4.block_sparse_moe.experts.116.w1", "model.layers.4.block_sparse_moe.experts.117.w1", "model.layers.4.block_sparse_moe.experts.118.w1", "model.layers.4.block_sparse_moe.experts.119.w1", "model.layers.4.block_sparse_moe.experts.120.w1", "model.layers.4.block_sparse_moe.experts.121.w1", "model.layers.4.block_sparse_moe.experts.122.w1", "model.layers.4.block_sparse_moe.experts.123.w1", "model.layers.4.block_sparse_moe.experts.124.w1", "model.layers.4.block_sparse_moe.experts.125.w1", "model.layers.4.block_sparse_moe.experts.126.w1", "model.layers.4.block_sparse_moe.experts.127.w1", "model.layers.4.block_sparse_moe.experts.128.w1", "model.layers.4.block_sparse_moe.experts.129.w1", "model.layers.4.block_sparse_moe.experts.130.w1", "model.layers.4.block_sparse_moe.experts.131.w1", "model.layers.4.block_sparse_moe.experts.132.w1", "model.layers.4.block_sparse_moe.experts.133.w1", "model.layers.4.block_sparse_moe.experts.134.w1", "model.layers.4.block_sparse_moe.experts.135.w1", "model.layers.4.block_sparse_moe.experts.136.w1", "model.layers.4.block_sparse_moe.experts.137.w1", "model.layers.4.block_sparse_moe.experts.138.w1", "model.layers.4.block_sparse_moe.experts.139.w1", "model.layers.4.block_sparse_moe.experts.140.w1", "model.layers.4.block_sparse_moe.experts.141.w1", "model.layers.4.block_sparse_moe.experts.142.w1", "model.layers.4.block_sparse_moe.experts.143.w1", "model.layers.4.block_sparse_moe.experts.144.w1", "model.layers.4.block_sparse_moe.experts.145.w1", "model.layers.4.block_sparse_moe.experts.146.w1", "model.layers.4.block_sparse_moe.experts.147.w1", "model.layers.4.block_sparse_moe.experts.148.w1", "model.layers.4.block_sparse_moe.experts.149.w1", "model.layers.4.block_sparse_moe.experts.150.w1", "model.layers.4.block_sparse_moe.experts.151.w1", "model.layers.4.block_sparse_moe.experts.152.w1", "model.layers.4.block_sparse_moe.experts.153.w1", "model.layers.4.block_sparse_moe.experts.154.w1", "model.layers.4.block_sparse_moe.experts.155.w1", "model.layers.4.block_sparse_moe.experts.156.w1", "model.layers.4.block_sparse_moe.experts.157.w1", "model.layers.4.block_sparse_moe.experts.158.w1", "model.layers.4.block_sparse_moe.experts.159.w1", "model.layers.4.block_sparse_moe.experts.160.w1", "model.layers.4.block_sparse_moe.experts.161.w1", "model.layers.4.block_sparse_moe.experts.162.w1", "model.layers.4.block_sparse_moe.experts.163.w1", "model.layers.4.block_sparse_moe.experts.164.w1", "model.layers.4.block_sparse_moe.experts.165.w1", "model.layers.4.block_sparse_moe.experts.166.w1", "model.layers.4.block_sparse_moe.experts.167.w1", "model.layers.4.block_sparse_moe.experts.168.w1", "model.layers.4.block_sparse_moe.experts.169.w1", "model.layers.4.block_sparse_moe.experts.170.w1", "model.layers.4.block_sparse_moe.experts.171.w1", "model.layers.4.block_sparse_moe.experts.172.w1", "model.layers.4.block_sparse_moe.experts.173.w1", "model.layers.4.block_sparse_moe.experts.174.w1", "model.layers.4.block_sparse_moe.experts.175.w1", "model.layers.4.block_sparse_moe.experts.176.w1", "model.layers.4.block_sparse_moe.experts.177.w1", "model.layers.4.block_sparse_moe.experts.178.w1", "model.layers.4.block_sparse_moe.experts.179.w1", "model.layers.4.block_sparse_moe.experts.180.w1", "model.layers.4.block_sparse_moe.experts.181.w1", "model.layers.4.block_sparse_moe.experts.182.w1", "model.layers.4.block_sparse_moe.experts.183.w1", "model.layers.4.block_sparse_moe.experts.184.w1", "model.layers.4.block_sparse_moe.experts.185.w1", "model.layers.4.block_sparse_moe.experts.186.w1", "model.layers.4.block_sparse_moe.experts.187.w1", "model.layers.4.block_sparse_moe.experts.188.w1", "model.layers.4.block_sparse_moe.experts.189.w1", "model.layers.4.block_sparse_moe.experts.190.w1", "model.layers.4.block_sparse_moe.experts.191.w1", "model.layers.4.block_sparse_moe.experts.192.w1", "model.layers.4.block_sparse_moe.experts.193.w1", "model.layers.4.block_sparse_moe.experts.194.w1", "model.layers.4.block_sparse_moe.experts.195.w1", "model.layers.4.block_sparse_moe.experts.196.w1", "model.layers.4.block_sparse_moe.experts.197.w1", "model.layers.4.block_sparse_moe.experts.198.w1", "model.layers.4.block_sparse_moe.experts.199.w1", "model.layers.4.block_sparse_moe.experts.200.w1", "model.layers.4.block_sparse_moe.experts.201.w1", "model.layers.4.block_sparse_moe.experts.202.w1", "model.layers.4.block_sparse_moe.experts.203.w1", "model.layers.4.block_sparse_moe.experts.204.w1", "model.layers.4.block_sparse_moe.experts.205.w1", "model.layers.4.block_sparse_moe.experts.206.w1", "model.layers.4.block_sparse_moe.experts.207.w1", "model.layers.4.block_sparse_moe.experts.208.w1", "model.layers.4.block_sparse_moe.experts.209.w1", "model.layers.4.block_sparse_moe.experts.210.w1", "model.layers.4.block_sparse_moe.experts.211.w1", "model.layers.4.block_sparse_moe.experts.212.w1", "model.layers.4.block_sparse_moe.experts.213.w1", "model.layers.4.block_sparse_moe.experts.214.w1", "model.layers.4.block_sparse_moe.experts.215.w1", "model.layers.4.block_sparse_moe.experts.216.w1", "model.layers.4.block_sparse_moe.experts.217.w1", "model.layers.4.block_sparse_moe.experts.218.w1", "model.layers.4.block_sparse_moe.experts.219.w1", "model.layers.4.block_sparse_moe.experts.220.w1", "model.layers.4.block_sparse_moe.experts.221.w1", "model.layers.4.block_sparse_moe.experts.222.w1", "model.layers.4.block_sparse_moe.experts.223.w1", "model.layers.4.block_sparse_moe.experts.224.w1", "model.layers.4.block_sparse_moe.experts.225.w1", "model.layers.4.block_sparse_moe.experts.226.w1", "model.layers.4.block_sparse_moe.experts.227.w1", "model.layers.4.block_sparse_moe.experts.228.w1", "model.layers.4.block_sparse_moe.experts.229.w1", "model.layers.4.block_sparse_moe.experts.230.w1", "model.layers.4.block_sparse_moe.experts.231.w1", "model.layers.4.block_sparse_moe.experts.232.w1", "model.layers.4.block_sparse_moe.experts.233.w1", "model.layers.4.block_sparse_moe.experts.234.w1", "model.layers.4.block_sparse_moe.experts.235.w1", "model.layers.4.block_sparse_moe.experts.236.w1", "model.layers.4.block_sparse_moe.experts.237.w1", "model.layers.4.block_sparse_moe.experts.238.w1", "model.layers.4.block_sparse_moe.experts.239.w1", "model.layers.4.block_sparse_moe.experts.240.w1", "model.layers.4.block_sparse_moe.experts.241.w1", "model.layers.4.block_sparse_moe.experts.242.w1", "model.layers.4.block_sparse_moe.experts.243.w1", "model.layers.4.block_sparse_moe.experts.244.w1", "model.layers.4.block_sparse_moe.experts.245.w1", "model.layers.4.block_sparse_moe.experts.246.w1", "model.layers.4.block_sparse_moe.experts.247.w1", "model.layers.4.block_sparse_moe.experts.248.w1", "model.layers.4.block_sparse_moe.experts.249.w1", "model.layers.4.block_sparse_moe.experts.250.w1", "model.layers.4.block_sparse_moe.experts.251.w1", "model.layers.4.block_sparse_moe.experts.252.w1", "model.layers.4.block_sparse_moe.experts.253.w1", "model.layers.4.block_sparse_moe.experts.254.w1", "model.layers.4.block_sparse_moe.experts.255.w1", "model.layers.4.block_sparse_moe.experts.0.w3", "model.layers.4.block_sparse_moe.experts.1.w3", "model.layers.4.block_sparse_moe.experts.2.w3", "model.layers.4.block_sparse_moe.experts.3.w3", "model.layers.4.block_sparse_moe.experts.4.w3", "model.layers.4.block_sparse_moe.experts.5.w3", "model.layers.4.block_sparse_moe.experts.6.w3", "model.layers.4.block_sparse_moe.experts.7.w3", "model.layers.4.block_sparse_moe.experts.8.w3", "model.layers.4.block_sparse_moe.experts.9.w3", "model.layers.4.block_sparse_moe.experts.10.w3", "model.layers.4.block_sparse_moe.experts.11.w3", "model.layers.4.block_sparse_moe.experts.12.w3", "model.layers.4.block_sparse_moe.experts.13.w3", "model.layers.4.block_sparse_moe.experts.14.w3", "model.layers.4.block_sparse_moe.experts.15.w3", "model.layers.4.block_sparse_moe.experts.16.w3", "model.layers.4.block_sparse_moe.experts.17.w3", "model.layers.4.block_sparse_moe.experts.18.w3", "model.layers.4.block_sparse_moe.experts.19.w3", "model.layers.4.block_sparse_moe.experts.20.w3", "model.layers.4.block_sparse_moe.experts.21.w3", "model.layers.4.block_sparse_moe.experts.22.w3", "model.layers.4.block_sparse_moe.experts.23.w3", "model.layers.4.block_sparse_moe.experts.24.w3", "model.layers.4.block_sparse_moe.experts.25.w3", "model.layers.4.block_sparse_moe.experts.26.w3", "model.layers.4.block_sparse_moe.experts.27.w3", "model.layers.4.block_sparse_moe.experts.28.w3", "model.layers.4.block_sparse_moe.experts.29.w3", "model.layers.4.block_sparse_moe.experts.30.w3", "model.layers.4.block_sparse_moe.experts.31.w3", "model.layers.4.block_sparse_moe.experts.32.w3", "model.layers.4.block_sparse_moe.experts.33.w3", "model.layers.4.block_sparse_moe.experts.34.w3", "model.layers.4.block_sparse_moe.experts.35.w3", "model.layers.4.block_sparse_moe.experts.36.w3", "model.layers.4.block_sparse_moe.experts.37.w3", "model.layers.4.block_sparse_moe.experts.38.w3", "model.layers.4.block_sparse_moe.experts.39.w3", "model.layers.4.block_sparse_moe.experts.40.w3", "model.layers.4.block_sparse_moe.experts.41.w3", "model.layers.4.block_sparse_moe.experts.42.w3", "model.layers.4.block_sparse_moe.experts.43.w3", "model.layers.4.block_sparse_moe.experts.44.w3", "model.layers.4.block_sparse_moe.experts.45.w3", "model.layers.4.block_sparse_moe.experts.46.w3", "model.layers.4.block_sparse_moe.experts.47.w3", "model.layers.4.block_sparse_moe.experts.48.w3", "model.layers.4.block_sparse_moe.experts.49.w3", "model.layers.4.block_sparse_moe.experts.50.w3", "model.layers.4.block_sparse_moe.experts.51.w3", "model.layers.4.block_sparse_moe.experts.52.w3", "model.layers.4.block_sparse_moe.experts.53.w3", "model.layers.4.block_sparse_moe.experts.54.w3", "model.layers.4.block_sparse_moe.experts.55.w3", "model.layers.4.block_sparse_moe.experts.56.w3", "model.layers.4.block_sparse_moe.experts.57.w3", "model.layers.4.block_sparse_moe.experts.58.w3", "model.layers.4.block_sparse_moe.experts.59.w3", "model.layers.4.block_sparse_moe.experts.60.w3", "model.layers.4.block_sparse_moe.experts.61.w3", "model.layers.4.block_sparse_moe.experts.62.w3", "model.layers.4.block_sparse_moe.experts.63.w3", "model.layers.4.block_sparse_moe.experts.64.w3", "model.layers.4.block_sparse_moe.experts.65.w3", "model.layers.4.block_sparse_moe.experts.66.w3", "model.layers.4.block_sparse_moe.experts.67.w3", "model.layers.4.block_sparse_moe.experts.68.w3", "model.layers.4.block_sparse_moe.experts.69.w3", "model.layers.4.block_sparse_moe.experts.70.w3", "model.layers.4.block_sparse_moe.experts.71.w3", "model.layers.4.block_sparse_moe.experts.72.w3", "model.layers.4.block_sparse_moe.experts.73.w3", "model.layers.4.block_sparse_moe.experts.74.w3", "model.layers.4.block_sparse_moe.experts.75.w3", "model.layers.4.block_sparse_moe.experts.76.w3", "model.layers.4.block_sparse_moe.experts.77.w3", "model.layers.4.block_sparse_moe.experts.78.w3", "model.layers.4.block_sparse_moe.experts.79.w3", "model.layers.4.block_sparse_moe.experts.80.w3", "model.layers.4.block_sparse_moe.experts.81.w3", "model.layers.4.block_sparse_moe.experts.82.w3", "model.layers.4.block_sparse_moe.experts.83.w3", "model.layers.4.block_sparse_moe.experts.84.w3", "model.layers.4.block_sparse_moe.experts.85.w3", "model.layers.4.block_sparse_moe.experts.86.w3", "model.layers.4.block_sparse_moe.experts.87.w3", "model.layers.4.block_sparse_moe.experts.88.w3", "model.layers.4.block_sparse_moe.experts.89.w3", "model.layers.4.block_sparse_moe.experts.90.w3", "model.layers.4.block_sparse_moe.experts.91.w3", "model.layers.4.block_sparse_moe.experts.92.w3", "model.layers.4.block_sparse_moe.experts.93.w3", "model.layers.4.block_sparse_moe.experts.94.w3", "model.layers.4.block_sparse_moe.experts.95.w3", "model.layers.4.block_sparse_moe.experts.96.w3", "model.layers.4.block_sparse_moe.experts.97.w3", "model.layers.4.block_sparse_moe.experts.98.w3", "model.layers.4.block_sparse_moe.experts.99.w3", "model.layers.4.block_sparse_moe.experts.100.w3", "model.layers.4.block_sparse_moe.experts.101.w3", "model.layers.4.block_sparse_moe.experts.102.w3", "model.layers.4.block_sparse_moe.experts.103.w3", "model.layers.4.block_sparse_moe.experts.104.w3", "model.layers.4.block_sparse_moe.experts.105.w3", "model.layers.4.block_sparse_moe.experts.106.w3", "model.layers.4.block_sparse_moe.experts.107.w3", "model.layers.4.block_sparse_moe.experts.108.w3", "model.layers.4.block_sparse_moe.experts.109.w3", "model.layers.4.block_sparse_moe.experts.110.w3", "model.layers.4.block_sparse_moe.experts.111.w3", "model.layers.4.block_sparse_moe.experts.112.w3", "model.layers.4.block_sparse_moe.experts.113.w3", "model.layers.4.block_sparse_moe.experts.114.w3", "model.layers.4.block_sparse_moe.experts.115.w3", "model.layers.4.block_sparse_moe.experts.116.w3", "model.layers.4.block_sparse_moe.experts.117.w3", "model.layers.4.block_sparse_moe.experts.118.w3", "model.layers.4.block_sparse_moe.experts.119.w3", "model.layers.4.block_sparse_moe.experts.120.w3", "model.layers.4.block_sparse_moe.experts.121.w3", "model.layers.4.block_sparse_moe.experts.122.w3", "model.layers.4.block_sparse_moe.experts.123.w3", "model.layers.4.block_sparse_moe.experts.124.w3", "model.layers.4.block_sparse_moe.experts.125.w3", "model.layers.4.block_sparse_moe.experts.126.w3", "model.layers.4.block_sparse_moe.experts.127.w3", "model.layers.4.block_sparse_moe.experts.128.w3", "model.layers.4.block_sparse_moe.experts.129.w3", "model.layers.4.block_sparse_moe.experts.130.w3", "model.layers.4.block_sparse_moe.experts.131.w3", "model.layers.4.block_sparse_moe.experts.132.w3", "model.layers.4.block_sparse_moe.experts.133.w3", "model.layers.4.block_sparse_moe.experts.134.w3", "model.layers.4.block_sparse_moe.experts.135.w3", "model.layers.4.block_sparse_moe.experts.136.w3", "model.layers.4.block_sparse_moe.experts.137.w3", "model.layers.4.block_sparse_moe.experts.138.w3", "model.layers.4.block_sparse_moe.experts.139.w3", "model.layers.4.block_sparse_moe.experts.140.w3", "model.layers.4.block_sparse_moe.experts.141.w3", "model.layers.4.block_sparse_moe.experts.142.w3", "model.layers.4.block_sparse_moe.experts.143.w3", "model.layers.4.block_sparse_moe.experts.144.w3", "model.layers.4.block_sparse_moe.experts.145.w3", "model.layers.4.block_sparse_moe.experts.146.w3", "model.layers.4.block_sparse_moe.experts.147.w3", "model.layers.4.block_sparse_moe.experts.148.w3", "model.layers.4.block_sparse_moe.experts.149.w3", "model.layers.4.block_sparse_moe.experts.150.w3", "model.layers.4.block_sparse_moe.experts.151.w3", "model.layers.4.block_sparse_moe.experts.152.w3", "model.layers.4.block_sparse_moe.experts.153.w3", "model.layers.4.block_sparse_moe.experts.154.w3", "model.layers.4.block_sparse_moe.experts.155.w3", "model.layers.4.block_sparse_moe.experts.156.w3", "model.layers.4.block_sparse_moe.experts.157.w3", "model.layers.4.block_sparse_moe.experts.158.w3", "model.layers.4.block_sparse_moe.experts.159.w3", "model.layers.4.block_sparse_moe.experts.160.w3", "model.layers.4.block_sparse_moe.experts.161.w3", "model.layers.4.block_sparse_moe.experts.162.w3", "model.layers.4.block_sparse_moe.experts.163.w3", "model.layers.4.block_sparse_moe.experts.164.w3", "model.layers.4.block_sparse_moe.experts.165.w3", "model.layers.4.block_sparse_moe.experts.166.w3", "model.layers.4.block_sparse_moe.experts.167.w3", "model.layers.4.block_sparse_moe.experts.168.w3", "model.layers.4.block_sparse_moe.experts.169.w3", "model.layers.4.block_sparse_moe.experts.170.w3", "model.layers.4.block_sparse_moe.experts.171.w3", "model.layers.4.block_sparse_moe.experts.172.w3", "model.layers.4.block_sparse_moe.experts.173.w3", "model.layers.4.block_sparse_moe.experts.174.w3", "model.layers.4.block_sparse_moe.experts.175.w3", "model.layers.4.block_sparse_moe.experts.176.w3", "model.layers.4.block_sparse_moe.experts.177.w3", "model.layers.4.block_sparse_moe.experts.178.w3", "model.layers.4.block_sparse_moe.experts.179.w3", "model.layers.4.block_sparse_moe.experts.180.w3", "model.layers.4.block_sparse_moe.experts.181.w3", "model.layers.4.block_sparse_moe.experts.182.w3", "model.layers.4.block_sparse_moe.experts.183.w3", "model.layers.4.block_sparse_moe.experts.184.w3", "model.layers.4.block_sparse_moe.experts.185.w3", "model.layers.4.block_sparse_moe.experts.186.w3", "model.layers.4.block_sparse_moe.experts.187.w3", "model.layers.4.block_sparse_moe.experts.188.w3", "model.layers.4.block_sparse_moe.experts.189.w3", "model.layers.4.block_sparse_moe.experts.190.w3", "model.layers.4.block_sparse_moe.experts.191.w3", "model.layers.4.block_sparse_moe.experts.192.w3", "model.layers.4.block_sparse_moe.experts.193.w3", "model.layers.4.block_sparse_moe.experts.194.w3", "model.layers.4.block_sparse_moe.experts.195.w3", "model.layers.4.block_sparse_moe.experts.196.w3", "model.layers.4.block_sparse_moe.experts.197.w3", "model.layers.4.block_sparse_moe.experts.198.w3", "model.layers.4.block_sparse_moe.experts.199.w3", "model.layers.4.block_sparse_moe.experts.200.w3", "model.layers.4.block_sparse_moe.experts.201.w3", "model.layers.4.block_sparse_moe.experts.202.w3", "model.layers.4.block_sparse_moe.experts.203.w3", "model.layers.4.block_sparse_moe.experts.204.w3", "model.layers.4.block_sparse_moe.experts.205.w3", "model.layers.4.block_sparse_moe.experts.206.w3", "model.layers.4.block_sparse_moe.experts.207.w3", "model.layers.4.block_sparse_moe.experts.208.w3", "model.layers.4.block_sparse_moe.experts.209.w3", "model.layers.4.block_sparse_moe.experts.210.w3", "model.layers.4.block_sparse_moe.experts.211.w3", "model.layers.4.block_sparse_moe.experts.212.w3", "model.layers.4.block_sparse_moe.experts.213.w3", "model.layers.4.block_sparse_moe.experts.214.w3", "model.layers.4.block_sparse_moe.experts.215.w3", "model.layers.4.block_sparse_moe.experts.216.w3", "model.layers.4.block_sparse_moe.experts.217.w3", "model.layers.4.block_sparse_moe.experts.218.w3", "model.layers.4.block_sparse_moe.experts.219.w3", "model.layers.4.block_sparse_moe.experts.220.w3", "model.layers.4.block_sparse_moe.experts.221.w3", "model.layers.4.block_sparse_moe.experts.222.w3", "model.layers.4.block_sparse_moe.experts.223.w3", "model.layers.4.block_sparse_moe.experts.224.w3", "model.layers.4.block_sparse_moe.experts.225.w3", "model.layers.4.block_sparse_moe.experts.226.w3", "model.layers.4.block_sparse_moe.experts.227.w3", "model.layers.4.block_sparse_moe.experts.228.w3", "model.layers.4.block_sparse_moe.experts.229.w3", "model.layers.4.block_sparse_moe.experts.230.w3", "model.layers.4.block_sparse_moe.experts.231.w3", "model.layers.4.block_sparse_moe.experts.232.w3", "model.layers.4.block_sparse_moe.experts.233.w3", "model.layers.4.block_sparse_moe.experts.234.w3", "model.layers.4.block_sparse_moe.experts.235.w3", "model.layers.4.block_sparse_moe.experts.236.w3", "model.layers.4.block_sparse_moe.experts.237.w3", "model.layers.4.block_sparse_moe.experts.238.w3", "model.layers.4.block_sparse_moe.experts.239.w3", "model.layers.4.block_sparse_moe.experts.240.w3", "model.layers.4.block_sparse_moe.experts.241.w3", "model.layers.4.block_sparse_moe.experts.242.w3", "model.layers.4.block_sparse_moe.experts.243.w3", "model.layers.4.block_sparse_moe.experts.244.w3", "model.layers.4.block_sparse_moe.experts.245.w3", "model.layers.4.block_sparse_moe.experts.246.w3", "model.layers.4.block_sparse_moe.experts.247.w3", "model.layers.4.block_sparse_moe.experts.248.w3", "model.layers.4.block_sparse_moe.experts.249.w3", "model.layers.4.block_sparse_moe.experts.250.w3", "model.layers.4.block_sparse_moe.experts.251.w3", "model.layers.4.block_sparse_moe.experts.252.w3", "model.layers.4.block_sparse_moe.experts.253.w3", "model.layers.4.block_sparse_moe.experts.254.w3", "model.layers.4.block_sparse_moe.experts.255.w3", "model.layers.4.block_sparse_moe.experts.0.w2", "model.layers.4.block_sparse_moe.experts.1.w2", "model.layers.4.block_sparse_moe.experts.2.w2", "model.layers.4.block_sparse_moe.experts.3.w2", "model.layers.4.block_sparse_moe.experts.4.w2", "model.layers.4.block_sparse_moe.experts.5.w2", "model.layers.4.block_sparse_moe.experts.6.w2", "model.layers.4.block_sparse_moe.experts.7.w2", "model.layers.4.block_sparse_moe.experts.8.w2", "model.layers.4.block_sparse_moe.experts.9.w2", "model.layers.4.block_sparse_moe.experts.10.w2", "model.layers.4.block_sparse_moe.experts.11.w2", "model.layers.4.block_sparse_moe.experts.12.w2", "model.layers.4.block_sparse_moe.experts.13.w2", "model.layers.4.block_sparse_moe.experts.14.w2", "model.layers.4.block_sparse_moe.experts.15.w2", "model.layers.4.block_sparse_moe.experts.16.w2", "model.layers.4.block_sparse_moe.experts.17.w2", "model.layers.4.block_sparse_moe.experts.18.w2", "model.layers.4.block_sparse_moe.experts.19.w2", "model.layers.4.block_sparse_moe.experts.20.w2", "model.layers.4.block_sparse_moe.experts.21.w2", "model.layers.4.block_sparse_moe.experts.22.w2", "model.layers.4.block_sparse_moe.experts.23.w2", "model.layers.4.block_sparse_moe.experts.24.w2", "model.layers.4.block_sparse_moe.experts.25.w2", "model.layers.4.block_sparse_moe.experts.26.w2", "model.layers.4.block_sparse_moe.experts.27.w2", "model.layers.4.block_sparse_moe.experts.28.w2", "model.layers.4.block_sparse_moe.experts.29.w2", "model.layers.4.block_sparse_moe.experts.30.w2", "model.layers.4.block_sparse_moe.experts.31.w2", "model.layers.4.block_sparse_moe.experts.32.w2", "model.layers.4.block_sparse_moe.experts.33.w2", "model.layers.4.block_sparse_moe.experts.34.w2", "model.layers.4.block_sparse_moe.experts.35.w2", "model.layers.4.block_sparse_moe.experts.36.w2", "model.layers.4.block_sparse_moe.experts.37.w2", "model.layers.4.block_sparse_moe.experts.38.w2", "model.layers.4.block_sparse_moe.experts.39.w2", "model.layers.4.block_sparse_moe.experts.40.w2", "model.layers.4.block_sparse_moe.experts.41.w2", "model.layers.4.block_sparse_moe.experts.42.w2", "model.layers.4.block_sparse_moe.experts.43.w2", "model.layers.4.block_sparse_moe.experts.44.w2", "model.layers.4.block_sparse_moe.experts.45.w2", "model.layers.4.block_sparse_moe.experts.46.w2", "model.layers.4.block_sparse_moe.experts.47.w2", "model.layers.4.block_sparse_moe.experts.48.w2", "model.layers.4.block_sparse_moe.experts.49.w2", "model.layers.4.block_sparse_moe.experts.50.w2", "model.layers.4.block_sparse_moe.experts.51.w2", "model.layers.4.block_sparse_moe.experts.52.w2", "model.layers.4.block_sparse_moe.experts.53.w2", "model.layers.4.block_sparse_moe.experts.54.w2", "model.layers.4.block_sparse_moe.experts.55.w2", "model.layers.4.block_sparse_moe.experts.56.w2", "model.layers.4.block_sparse_moe.experts.57.w2", "model.layers.4.block_sparse_moe.experts.58.w2", "model.layers.4.block_sparse_moe.experts.59.w2", "model.layers.4.block_sparse_moe.experts.60.w2", "model.layers.4.block_sparse_moe.experts.61.w2", "model.layers.4.block_sparse_moe.experts.62.w2", "model.layers.4.block_sparse_moe.experts.63.w2", "model.layers.4.block_sparse_moe.experts.64.w2", "model.layers.4.block_sparse_moe.experts.65.w2", "model.layers.4.block_sparse_moe.experts.66.w2", "model.layers.4.block_sparse_moe.experts.67.w2", "model.layers.4.block_sparse_moe.experts.68.w2", "model.layers.4.block_sparse_moe.experts.69.w2", "model.layers.4.block_sparse_moe.experts.70.w2", "model.layers.4.block_sparse_moe.experts.71.w2", "model.layers.4.block_sparse_moe.experts.72.w2", "model.layers.4.block_sparse_moe.experts.73.w2", "model.layers.4.block_sparse_moe.experts.74.w2", "model.layers.4.block_sparse_moe.experts.75.w2", "model.layers.4.block_sparse_moe.experts.76.w2", "model.layers.4.block_sparse_moe.experts.77.w2", "model.layers.4.block_sparse_moe.experts.78.w2", "model.layers.4.block_sparse_moe.experts.79.w2", "model.layers.4.block_sparse_moe.experts.80.w2", "model.layers.4.block_sparse_moe.experts.81.w2", "model.layers.4.block_sparse_moe.experts.82.w2", "model.layers.4.block_sparse_moe.experts.83.w2", "model.layers.4.block_sparse_moe.experts.84.w2", "model.layers.4.block_sparse_moe.experts.85.w2", "model.layers.4.block_sparse_moe.experts.86.w2", "model.layers.4.block_sparse_moe.experts.87.w2", "model.layers.4.block_sparse_moe.experts.88.w2", "model.layers.4.block_sparse_moe.experts.89.w2", "model.layers.4.block_sparse_moe.experts.90.w2", "model.layers.4.block_sparse_moe.experts.91.w2", "model.layers.4.block_sparse_moe.experts.92.w2", "model.layers.4.block_sparse_moe.experts.93.w2", "model.layers.4.block_sparse_moe.experts.94.w2", "model.layers.4.block_sparse_moe.experts.95.w2", "model.layers.4.block_sparse_moe.experts.96.w2", "model.layers.4.block_sparse_moe.experts.97.w2", "model.layers.4.block_sparse_moe.experts.98.w2", "model.layers.4.block_sparse_moe.experts.99.w2", "model.layers.4.block_sparse_moe.experts.100.w2", "model.layers.4.block_sparse_moe.experts.101.w2", "model.layers.4.block_sparse_moe.experts.102.w2", "model.layers.4.block_sparse_moe.experts.103.w2", "model.layers.4.block_sparse_moe.experts.104.w2", "model.layers.4.block_sparse_moe.experts.105.w2", "model.layers.4.block_sparse_moe.experts.106.w2", "model.layers.4.block_sparse_moe.experts.107.w2", "model.layers.4.block_sparse_moe.experts.108.w2", "model.layers.4.block_sparse_moe.experts.109.w2", "model.layers.4.block_sparse_moe.experts.110.w2", "model.layers.4.block_sparse_moe.experts.111.w2", "model.layers.4.block_sparse_moe.experts.112.w2", "model.layers.4.block_sparse_moe.experts.113.w2", "model.layers.4.block_sparse_moe.experts.114.w2", "model.layers.4.block_sparse_moe.experts.115.w2", "model.layers.4.block_sparse_moe.experts.116.w2", "model.layers.4.block_sparse_moe.experts.117.w2", "model.layers.4.block_sparse_moe.experts.118.w2", "model.layers.4.block_sparse_moe.experts.119.w2", "model.layers.4.block_sparse_moe.experts.120.w2", "model.layers.4.block_sparse_moe.experts.121.w2", "model.layers.4.block_sparse_moe.experts.122.w2", "model.layers.4.block_sparse_moe.experts.123.w2", "model.layers.4.block_sparse_moe.experts.124.w2", "model.layers.4.block_sparse_moe.experts.125.w2", "model.layers.4.block_sparse_moe.experts.126.w2", "model.layers.4.block_sparse_moe.experts.127.w2", "model.layers.4.block_sparse_moe.experts.128.w2", "model.layers.4.block_sparse_moe.experts.129.w2", "model.layers.4.block_sparse_moe.experts.130.w2", "model.layers.4.block_sparse_moe.experts.131.w2", "model.layers.4.block_sparse_moe.experts.132.w2", "model.layers.4.block_sparse_moe.experts.133.w2", "model.layers.4.block_sparse_moe.experts.134.w2", "model.layers.4.block_sparse_moe.experts.135.w2", "model.layers.4.block_sparse_moe.experts.136.w2", "model.layers.4.block_sparse_moe.experts.137.w2", "model.layers.4.block_sparse_moe.experts.138.w2", "model.layers.4.block_sparse_moe.experts.139.w2", "model.layers.4.block_sparse_moe.experts.140.w2", "model.layers.4.block_sparse_moe.experts.141.w2", "model.layers.4.block_sparse_moe.experts.142.w2", "model.layers.4.block_sparse_moe.experts.143.w2", "model.layers.4.block_sparse_moe.experts.144.w2", "model.layers.4.block_sparse_moe.experts.145.w2", "model.layers.4.block_sparse_moe.experts.146.w2", "model.layers.4.block_sparse_moe.experts.147.w2", "model.layers.4.block_sparse_moe.experts.148.w2", "model.layers.4.block_sparse_moe.experts.149.w2", "model.layers.4.block_sparse_moe.experts.150.w2", "model.layers.4.block_sparse_moe.experts.151.w2", "model.layers.4.block_sparse_moe.experts.152.w2", "model.layers.4.block_sparse_moe.experts.153.w2", "model.layers.4.block_sparse_moe.experts.154.w2", "model.layers.4.block_sparse_moe.experts.155.w2", "model.layers.4.block_sparse_moe.experts.156.w2", "model.layers.4.block_sparse_moe.experts.157.w2", "model.layers.4.block_sparse_moe.experts.158.w2", "model.layers.4.block_sparse_moe.experts.159.w2", "model.layers.4.block_sparse_moe.experts.160.w2", "model.layers.4.block_sparse_moe.experts.161.w2", "model.layers.4.block_sparse_moe.experts.162.w2", "model.layers.4.block_sparse_moe.experts.163.w2", "model.layers.4.block_sparse_moe.experts.164.w2", "model.layers.4.block_sparse_moe.experts.165.w2", "model.layers.4.block_sparse_moe.experts.166.w2", "model.layers.4.block_sparse_moe.experts.167.w2", "model.layers.4.block_sparse_moe.experts.168.w2", "model.layers.4.block_sparse_moe.experts.169.w2", "model.layers.4.block_sparse_moe.experts.170.w2", "model.layers.4.block_sparse_moe.experts.171.w2", "model.layers.4.block_sparse_moe.experts.172.w2", "model.layers.4.block_sparse_moe.experts.173.w2", "model.layers.4.block_sparse_moe.experts.174.w2", "model.layers.4.block_sparse_moe.experts.175.w2", "model.layers.4.block_sparse_moe.experts.176.w2", "model.layers.4.block_sparse_moe.experts.177.w2", "model.layers.4.block_sparse_moe.experts.178.w2", "model.layers.4.block_sparse_moe.experts.179.w2", "model.layers.4.block_sparse_moe.experts.180.w2", "model.layers.4.block_sparse_moe.experts.181.w2", "model.layers.4.block_sparse_moe.experts.182.w2", "model.layers.4.block_sparse_moe.experts.183.w2", "model.layers.4.block_sparse_moe.experts.184.w2", "model.layers.4.block_sparse_moe.experts.185.w2", "model.layers.4.block_sparse_moe.experts.186.w2", "model.layers.4.block_sparse_moe.experts.187.w2", "model.layers.4.block_sparse_moe.experts.188.w2", "model.layers.4.block_sparse_moe.experts.189.w2", "model.layers.4.block_sparse_moe.experts.190.w2", "model.layers.4.block_sparse_moe.experts.191.w2", "model.layers.4.block_sparse_moe.experts.192.w2", "model.layers.4.block_sparse_moe.experts.193.w2", "model.layers.4.block_sparse_moe.experts.194.w2", "model.layers.4.block_sparse_moe.experts.195.w2", "model.layers.4.block_sparse_moe.experts.196.w2", "model.layers.4.block_sparse_moe.experts.197.w2", "model.layers.4.block_sparse_moe.experts.198.w2", "model.layers.4.block_sparse_moe.experts.199.w2", "model.layers.4.block_sparse_moe.experts.200.w2", "model.layers.4.block_sparse_moe.experts.201.w2", "model.layers.4.block_sparse_moe.experts.202.w2", "model.layers.4.block_sparse_moe.experts.203.w2", "model.layers.4.block_sparse_moe.experts.204.w2", "model.layers.4.block_sparse_moe.experts.205.w2", "model.layers.4.block_sparse_moe.experts.206.w2", "model.layers.4.block_sparse_moe.experts.207.w2", "model.layers.4.block_sparse_moe.experts.208.w2", "model.layers.4.block_sparse_moe.experts.209.w2", "model.layers.4.block_sparse_moe.experts.210.w2", "model.layers.4.block_sparse_moe.experts.211.w2", "model.layers.4.block_sparse_moe.experts.212.w2", "model.layers.4.block_sparse_moe.experts.213.w2", "model.layers.4.block_sparse_moe.experts.214.w2", "model.layers.4.block_sparse_moe.experts.215.w2", "model.layers.4.block_sparse_moe.experts.216.w2", "model.layers.4.block_sparse_moe.experts.217.w2", "model.layers.4.block_sparse_moe.experts.218.w2", "model.layers.4.block_sparse_moe.experts.219.w2", "model.layers.4.block_sparse_moe.experts.220.w2", "model.layers.4.block_sparse_moe.experts.221.w2", "model.layers.4.block_sparse_moe.experts.222.w2", "model.layers.4.block_sparse_moe.experts.223.w2", "model.layers.4.block_sparse_moe.experts.224.w2", "model.layers.4.block_sparse_moe.experts.225.w2", "model.layers.4.block_sparse_moe.experts.226.w2", "model.layers.4.block_sparse_moe.experts.227.w2", "model.layers.4.block_sparse_moe.experts.228.w2", "model.layers.4.block_sparse_moe.experts.229.w2", "model.layers.4.block_sparse_moe.experts.230.w2", "model.layers.4.block_sparse_moe.experts.231.w2", "model.layers.4.block_sparse_moe.experts.232.w2", "model.layers.4.block_sparse_moe.experts.233.w2", "model.layers.4.block_sparse_moe.experts.234.w2", "model.layers.4.block_sparse_moe.experts.235.w2", "model.layers.4.block_sparse_moe.experts.236.w2", "model.layers.4.block_sparse_moe.experts.237.w2", "model.layers.4.block_sparse_moe.experts.238.w2", "model.layers.4.block_sparse_moe.experts.239.w2", "model.layers.4.block_sparse_moe.experts.240.w2", "model.layers.4.block_sparse_moe.experts.241.w2", "model.layers.4.block_sparse_moe.experts.242.w2", "model.layers.4.block_sparse_moe.experts.243.w2", "model.layers.4.block_sparse_moe.experts.244.w2", "model.layers.4.block_sparse_moe.experts.245.w2", "model.layers.4.block_sparse_moe.experts.246.w2", "model.layers.4.block_sparse_moe.experts.247.w2", "model.layers.4.block_sparse_moe.experts.248.w2", "model.layers.4.block_sparse_moe.experts.249.w2", "model.layers.4.block_sparse_moe.experts.250.w2", "model.layers.4.block_sparse_moe.experts.251.w2", "model.layers.4.block_sparse_moe.experts.252.w2", "model.layers.4.block_sparse_moe.experts.253.w2", "model.layers.4.block_sparse_moe.experts.254.w2", "model.layers.4.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.016441536508500554, "dbits": 3623878656 } ] }, { "idx": 10, "layers": [ "model.layers.5.self_attn.q_proj", "model.layers.5.self_attn.k_proj", "model.layers.5.self_attn.v_proj", "model.layers.5.self_attn.o_proj" ], "candidates": [ { "dkld": 0.004483678564429272, "dbits": 44040192 } ] }, { "idx": 11, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w1", "model.layers.5.block_sparse_moe.experts.1.w1", "model.layers.5.block_sparse_moe.experts.2.w1", "model.layers.5.block_sparse_moe.experts.3.w1", "model.layers.5.block_sparse_moe.experts.4.w1", "model.layers.5.block_sparse_moe.experts.5.w1", "model.layers.5.block_sparse_moe.experts.6.w1", "model.layers.5.block_sparse_moe.experts.7.w1", "model.layers.5.block_sparse_moe.experts.8.w1", "model.layers.5.block_sparse_moe.experts.9.w1", "model.layers.5.block_sparse_moe.experts.10.w1", "model.layers.5.block_sparse_moe.experts.11.w1", "model.layers.5.block_sparse_moe.experts.12.w1", "model.layers.5.block_sparse_moe.experts.13.w1", "model.layers.5.block_sparse_moe.experts.14.w1", "model.layers.5.block_sparse_moe.experts.15.w1", "model.layers.5.block_sparse_moe.experts.16.w1", "model.layers.5.block_sparse_moe.experts.17.w1", "model.layers.5.block_sparse_moe.experts.18.w1", "model.layers.5.block_sparse_moe.experts.19.w1", "model.layers.5.block_sparse_moe.experts.20.w1", "model.layers.5.block_sparse_moe.experts.21.w1", "model.layers.5.block_sparse_moe.experts.22.w1", "model.layers.5.block_sparse_moe.experts.23.w1", "model.layers.5.block_sparse_moe.experts.24.w1", "model.layers.5.block_sparse_moe.experts.25.w1", "model.layers.5.block_sparse_moe.experts.26.w1", "model.layers.5.block_sparse_moe.experts.27.w1", "model.layers.5.block_sparse_moe.experts.28.w1", "model.layers.5.block_sparse_moe.experts.29.w1", "model.layers.5.block_sparse_moe.experts.30.w1", "model.layers.5.block_sparse_moe.experts.31.w1", "model.layers.5.block_sparse_moe.experts.32.w1", "model.layers.5.block_sparse_moe.experts.33.w1", "model.layers.5.block_sparse_moe.experts.34.w1", "model.layers.5.block_sparse_moe.experts.35.w1", "model.layers.5.block_sparse_moe.experts.36.w1", "model.layers.5.block_sparse_moe.experts.37.w1", "model.layers.5.block_sparse_moe.experts.38.w1", "model.layers.5.block_sparse_moe.experts.39.w1", "model.layers.5.block_sparse_moe.experts.40.w1", "model.layers.5.block_sparse_moe.experts.41.w1", "model.layers.5.block_sparse_moe.experts.42.w1", "model.layers.5.block_sparse_moe.experts.43.w1", "model.layers.5.block_sparse_moe.experts.44.w1", "model.layers.5.block_sparse_moe.experts.45.w1", "model.layers.5.block_sparse_moe.experts.46.w1", "model.layers.5.block_sparse_moe.experts.47.w1", "model.layers.5.block_sparse_moe.experts.48.w1", "model.layers.5.block_sparse_moe.experts.49.w1", "model.layers.5.block_sparse_moe.experts.50.w1", "model.layers.5.block_sparse_moe.experts.51.w1", "model.layers.5.block_sparse_moe.experts.52.w1", "model.layers.5.block_sparse_moe.experts.53.w1", "model.layers.5.block_sparse_moe.experts.54.w1", "model.layers.5.block_sparse_moe.experts.55.w1", "model.layers.5.block_sparse_moe.experts.56.w1", "model.layers.5.block_sparse_moe.experts.57.w1", "model.layers.5.block_sparse_moe.experts.58.w1", "model.layers.5.block_sparse_moe.experts.59.w1", "model.layers.5.block_sparse_moe.experts.60.w1", "model.layers.5.block_sparse_moe.experts.61.w1", "model.layers.5.block_sparse_moe.experts.62.w1", "model.layers.5.block_sparse_moe.experts.63.w1", "model.layers.5.block_sparse_moe.experts.64.w1", "model.layers.5.block_sparse_moe.experts.65.w1", "model.layers.5.block_sparse_moe.experts.66.w1", "model.layers.5.block_sparse_moe.experts.67.w1", "model.layers.5.block_sparse_moe.experts.68.w1", "model.layers.5.block_sparse_moe.experts.69.w1", "model.layers.5.block_sparse_moe.experts.70.w1", "model.layers.5.block_sparse_moe.experts.71.w1", "model.layers.5.block_sparse_moe.experts.72.w1", "model.layers.5.block_sparse_moe.experts.73.w1", "model.layers.5.block_sparse_moe.experts.74.w1", "model.layers.5.block_sparse_moe.experts.75.w1", "model.layers.5.block_sparse_moe.experts.76.w1", "model.layers.5.block_sparse_moe.experts.77.w1", "model.layers.5.block_sparse_moe.experts.78.w1", "model.layers.5.block_sparse_moe.experts.79.w1", "model.layers.5.block_sparse_moe.experts.80.w1", "model.layers.5.block_sparse_moe.experts.81.w1", "model.layers.5.block_sparse_moe.experts.82.w1", "model.layers.5.block_sparse_moe.experts.83.w1", "model.layers.5.block_sparse_moe.experts.84.w1", "model.layers.5.block_sparse_moe.experts.85.w1", "model.layers.5.block_sparse_moe.experts.86.w1", "model.layers.5.block_sparse_moe.experts.87.w1", "model.layers.5.block_sparse_moe.experts.88.w1", "model.layers.5.block_sparse_moe.experts.89.w1", "model.layers.5.block_sparse_moe.experts.90.w1", "model.layers.5.block_sparse_moe.experts.91.w1", "model.layers.5.block_sparse_moe.experts.92.w1", "model.layers.5.block_sparse_moe.experts.93.w1", "model.layers.5.block_sparse_moe.experts.94.w1", "model.layers.5.block_sparse_moe.experts.95.w1", "model.layers.5.block_sparse_moe.experts.96.w1", "model.layers.5.block_sparse_moe.experts.97.w1", "model.layers.5.block_sparse_moe.experts.98.w1", "model.layers.5.block_sparse_moe.experts.99.w1", "model.layers.5.block_sparse_moe.experts.100.w1", "model.layers.5.block_sparse_moe.experts.101.w1", "model.layers.5.block_sparse_moe.experts.102.w1", "model.layers.5.block_sparse_moe.experts.103.w1", "model.layers.5.block_sparse_moe.experts.104.w1", "model.layers.5.block_sparse_moe.experts.105.w1", "model.layers.5.block_sparse_moe.experts.106.w1", "model.layers.5.block_sparse_moe.experts.107.w1", "model.layers.5.block_sparse_moe.experts.108.w1", "model.layers.5.block_sparse_moe.experts.109.w1", "model.layers.5.block_sparse_moe.experts.110.w1", "model.layers.5.block_sparse_moe.experts.111.w1", "model.layers.5.block_sparse_moe.experts.112.w1", "model.layers.5.block_sparse_moe.experts.113.w1", "model.layers.5.block_sparse_moe.experts.114.w1", "model.layers.5.block_sparse_moe.experts.115.w1", "model.layers.5.block_sparse_moe.experts.116.w1", "model.layers.5.block_sparse_moe.experts.117.w1", "model.layers.5.block_sparse_moe.experts.118.w1", "model.layers.5.block_sparse_moe.experts.119.w1", "model.layers.5.block_sparse_moe.experts.120.w1", "model.layers.5.block_sparse_moe.experts.121.w1", "model.layers.5.block_sparse_moe.experts.122.w1", "model.layers.5.block_sparse_moe.experts.123.w1", "model.layers.5.block_sparse_moe.experts.124.w1", "model.layers.5.block_sparse_moe.experts.125.w1", "model.layers.5.block_sparse_moe.experts.126.w1", "model.layers.5.block_sparse_moe.experts.127.w1", "model.layers.5.block_sparse_moe.experts.128.w1", "model.layers.5.block_sparse_moe.experts.129.w1", "model.layers.5.block_sparse_moe.experts.130.w1", "model.layers.5.block_sparse_moe.experts.131.w1", "model.layers.5.block_sparse_moe.experts.132.w1", "model.layers.5.block_sparse_moe.experts.133.w1", "model.layers.5.block_sparse_moe.experts.134.w1", "model.layers.5.block_sparse_moe.experts.135.w1", "model.layers.5.block_sparse_moe.experts.136.w1", "model.layers.5.block_sparse_moe.experts.137.w1", "model.layers.5.block_sparse_moe.experts.138.w1", "model.layers.5.block_sparse_moe.experts.139.w1", "model.layers.5.block_sparse_moe.experts.140.w1", "model.layers.5.block_sparse_moe.experts.141.w1", "model.layers.5.block_sparse_moe.experts.142.w1", "model.layers.5.block_sparse_moe.experts.143.w1", "model.layers.5.block_sparse_moe.experts.144.w1", "model.layers.5.block_sparse_moe.experts.145.w1", "model.layers.5.block_sparse_moe.experts.146.w1", "model.layers.5.block_sparse_moe.experts.147.w1", "model.layers.5.block_sparse_moe.experts.148.w1", "model.layers.5.block_sparse_moe.experts.149.w1", "model.layers.5.block_sparse_moe.experts.150.w1", "model.layers.5.block_sparse_moe.experts.151.w1", "model.layers.5.block_sparse_moe.experts.152.w1", "model.layers.5.block_sparse_moe.experts.153.w1", "model.layers.5.block_sparse_moe.experts.154.w1", "model.layers.5.block_sparse_moe.experts.155.w1", "model.layers.5.block_sparse_moe.experts.156.w1", "model.layers.5.block_sparse_moe.experts.157.w1", "model.layers.5.block_sparse_moe.experts.158.w1", "model.layers.5.block_sparse_moe.experts.159.w1", "model.layers.5.block_sparse_moe.experts.160.w1", "model.layers.5.block_sparse_moe.experts.161.w1", "model.layers.5.block_sparse_moe.experts.162.w1", "model.layers.5.block_sparse_moe.experts.163.w1", "model.layers.5.block_sparse_moe.experts.164.w1", "model.layers.5.block_sparse_moe.experts.165.w1", "model.layers.5.block_sparse_moe.experts.166.w1", "model.layers.5.block_sparse_moe.experts.167.w1", "model.layers.5.block_sparse_moe.experts.168.w1", "model.layers.5.block_sparse_moe.experts.169.w1", "model.layers.5.block_sparse_moe.experts.170.w1", "model.layers.5.block_sparse_moe.experts.171.w1", "model.layers.5.block_sparse_moe.experts.172.w1", "model.layers.5.block_sparse_moe.experts.173.w1", "model.layers.5.block_sparse_moe.experts.174.w1", "model.layers.5.block_sparse_moe.experts.175.w1", "model.layers.5.block_sparse_moe.experts.176.w1", "model.layers.5.block_sparse_moe.experts.177.w1", "model.layers.5.block_sparse_moe.experts.178.w1", "model.layers.5.block_sparse_moe.experts.179.w1", "model.layers.5.block_sparse_moe.experts.180.w1", "model.layers.5.block_sparse_moe.experts.181.w1", "model.layers.5.block_sparse_moe.experts.182.w1", "model.layers.5.block_sparse_moe.experts.183.w1", "model.layers.5.block_sparse_moe.experts.184.w1", "model.layers.5.block_sparse_moe.experts.185.w1", "model.layers.5.block_sparse_moe.experts.186.w1", "model.layers.5.block_sparse_moe.experts.187.w1", "model.layers.5.block_sparse_moe.experts.188.w1", "model.layers.5.block_sparse_moe.experts.189.w1", "model.layers.5.block_sparse_moe.experts.190.w1", "model.layers.5.block_sparse_moe.experts.191.w1", "model.layers.5.block_sparse_moe.experts.192.w1", "model.layers.5.block_sparse_moe.experts.193.w1", "model.layers.5.block_sparse_moe.experts.194.w1", "model.layers.5.block_sparse_moe.experts.195.w1", "model.layers.5.block_sparse_moe.experts.196.w1", "model.layers.5.block_sparse_moe.experts.197.w1", "model.layers.5.block_sparse_moe.experts.198.w1", "model.layers.5.block_sparse_moe.experts.199.w1", "model.layers.5.block_sparse_moe.experts.200.w1", "model.layers.5.block_sparse_moe.experts.201.w1", "model.layers.5.block_sparse_moe.experts.202.w1", "model.layers.5.block_sparse_moe.experts.203.w1", "model.layers.5.block_sparse_moe.experts.204.w1", "model.layers.5.block_sparse_moe.experts.205.w1", "model.layers.5.block_sparse_moe.experts.206.w1", "model.layers.5.block_sparse_moe.experts.207.w1", "model.layers.5.block_sparse_moe.experts.208.w1", "model.layers.5.block_sparse_moe.experts.209.w1", "model.layers.5.block_sparse_moe.experts.210.w1", "model.layers.5.block_sparse_moe.experts.211.w1", "model.layers.5.block_sparse_moe.experts.212.w1", "model.layers.5.block_sparse_moe.experts.213.w1", "model.layers.5.block_sparse_moe.experts.214.w1", "model.layers.5.block_sparse_moe.experts.215.w1", "model.layers.5.block_sparse_moe.experts.216.w1", "model.layers.5.block_sparse_moe.experts.217.w1", "model.layers.5.block_sparse_moe.experts.218.w1", "model.layers.5.block_sparse_moe.experts.219.w1", "model.layers.5.block_sparse_moe.experts.220.w1", "model.layers.5.block_sparse_moe.experts.221.w1", "model.layers.5.block_sparse_moe.experts.222.w1", "model.layers.5.block_sparse_moe.experts.223.w1", "model.layers.5.block_sparse_moe.experts.224.w1", "model.layers.5.block_sparse_moe.experts.225.w1", "model.layers.5.block_sparse_moe.experts.226.w1", "model.layers.5.block_sparse_moe.experts.227.w1", "model.layers.5.block_sparse_moe.experts.228.w1", "model.layers.5.block_sparse_moe.experts.229.w1", "model.layers.5.block_sparse_moe.experts.230.w1", "model.layers.5.block_sparse_moe.experts.231.w1", "model.layers.5.block_sparse_moe.experts.232.w1", "model.layers.5.block_sparse_moe.experts.233.w1", "model.layers.5.block_sparse_moe.experts.234.w1", "model.layers.5.block_sparse_moe.experts.235.w1", "model.layers.5.block_sparse_moe.experts.236.w1", "model.layers.5.block_sparse_moe.experts.237.w1", "model.layers.5.block_sparse_moe.experts.238.w1", "model.layers.5.block_sparse_moe.experts.239.w1", "model.layers.5.block_sparse_moe.experts.240.w1", "model.layers.5.block_sparse_moe.experts.241.w1", "model.layers.5.block_sparse_moe.experts.242.w1", "model.layers.5.block_sparse_moe.experts.243.w1", "model.layers.5.block_sparse_moe.experts.244.w1", "model.layers.5.block_sparse_moe.experts.245.w1", "model.layers.5.block_sparse_moe.experts.246.w1", "model.layers.5.block_sparse_moe.experts.247.w1", "model.layers.5.block_sparse_moe.experts.248.w1", "model.layers.5.block_sparse_moe.experts.249.w1", "model.layers.5.block_sparse_moe.experts.250.w1", "model.layers.5.block_sparse_moe.experts.251.w1", "model.layers.5.block_sparse_moe.experts.252.w1", "model.layers.5.block_sparse_moe.experts.253.w1", "model.layers.5.block_sparse_moe.experts.254.w1", "model.layers.5.block_sparse_moe.experts.255.w1", "model.layers.5.block_sparse_moe.experts.0.w3", "model.layers.5.block_sparse_moe.experts.1.w3", "model.layers.5.block_sparse_moe.experts.2.w3", "model.layers.5.block_sparse_moe.experts.3.w3", "model.layers.5.block_sparse_moe.experts.4.w3", "model.layers.5.block_sparse_moe.experts.5.w3", "model.layers.5.block_sparse_moe.experts.6.w3", "model.layers.5.block_sparse_moe.experts.7.w3", "model.layers.5.block_sparse_moe.experts.8.w3", "model.layers.5.block_sparse_moe.experts.9.w3", "model.layers.5.block_sparse_moe.experts.10.w3", "model.layers.5.block_sparse_moe.experts.11.w3", "model.layers.5.block_sparse_moe.experts.12.w3", "model.layers.5.block_sparse_moe.experts.13.w3", "model.layers.5.block_sparse_moe.experts.14.w3", "model.layers.5.block_sparse_moe.experts.15.w3", "model.layers.5.block_sparse_moe.experts.16.w3", "model.layers.5.block_sparse_moe.experts.17.w3", "model.layers.5.block_sparse_moe.experts.18.w3", "model.layers.5.block_sparse_moe.experts.19.w3", "model.layers.5.block_sparse_moe.experts.20.w3", "model.layers.5.block_sparse_moe.experts.21.w3", "model.layers.5.block_sparse_moe.experts.22.w3", "model.layers.5.block_sparse_moe.experts.23.w3", "model.layers.5.block_sparse_moe.experts.24.w3", "model.layers.5.block_sparse_moe.experts.25.w3", "model.layers.5.block_sparse_moe.experts.26.w3", "model.layers.5.block_sparse_moe.experts.27.w3", "model.layers.5.block_sparse_moe.experts.28.w3", "model.layers.5.block_sparse_moe.experts.29.w3", "model.layers.5.block_sparse_moe.experts.30.w3", "model.layers.5.block_sparse_moe.experts.31.w3", "model.layers.5.block_sparse_moe.experts.32.w3", "model.layers.5.block_sparse_moe.experts.33.w3", "model.layers.5.block_sparse_moe.experts.34.w3", "model.layers.5.block_sparse_moe.experts.35.w3", "model.layers.5.block_sparse_moe.experts.36.w3", "model.layers.5.block_sparse_moe.experts.37.w3", "model.layers.5.block_sparse_moe.experts.38.w3", "model.layers.5.block_sparse_moe.experts.39.w3", "model.layers.5.block_sparse_moe.experts.40.w3", "model.layers.5.block_sparse_moe.experts.41.w3", "model.layers.5.block_sparse_moe.experts.42.w3", "model.layers.5.block_sparse_moe.experts.43.w3", "model.layers.5.block_sparse_moe.experts.44.w3", "model.layers.5.block_sparse_moe.experts.45.w3", "model.layers.5.block_sparse_moe.experts.46.w3", "model.layers.5.block_sparse_moe.experts.47.w3", "model.layers.5.block_sparse_moe.experts.48.w3", "model.layers.5.block_sparse_moe.experts.49.w3", "model.layers.5.block_sparse_moe.experts.50.w3", "model.layers.5.block_sparse_moe.experts.51.w3", "model.layers.5.block_sparse_moe.experts.52.w3", "model.layers.5.block_sparse_moe.experts.53.w3", "model.layers.5.block_sparse_moe.experts.54.w3", "model.layers.5.block_sparse_moe.experts.55.w3", "model.layers.5.block_sparse_moe.experts.56.w3", "model.layers.5.block_sparse_moe.experts.57.w3", "model.layers.5.block_sparse_moe.experts.58.w3", "model.layers.5.block_sparse_moe.experts.59.w3", "model.layers.5.block_sparse_moe.experts.60.w3", "model.layers.5.block_sparse_moe.experts.61.w3", "model.layers.5.block_sparse_moe.experts.62.w3", "model.layers.5.block_sparse_moe.experts.63.w3", "model.layers.5.block_sparse_moe.experts.64.w3", "model.layers.5.block_sparse_moe.experts.65.w3", "model.layers.5.block_sparse_moe.experts.66.w3", "model.layers.5.block_sparse_moe.experts.67.w3", "model.layers.5.block_sparse_moe.experts.68.w3", "model.layers.5.block_sparse_moe.experts.69.w3", "model.layers.5.block_sparse_moe.experts.70.w3", "model.layers.5.block_sparse_moe.experts.71.w3", "model.layers.5.block_sparse_moe.experts.72.w3", "model.layers.5.block_sparse_moe.experts.73.w3", "model.layers.5.block_sparse_moe.experts.74.w3", "model.layers.5.block_sparse_moe.experts.75.w3", "model.layers.5.block_sparse_moe.experts.76.w3", "model.layers.5.block_sparse_moe.experts.77.w3", "model.layers.5.block_sparse_moe.experts.78.w3", "model.layers.5.block_sparse_moe.experts.79.w3", "model.layers.5.block_sparse_moe.experts.80.w3", "model.layers.5.block_sparse_moe.experts.81.w3", "model.layers.5.block_sparse_moe.experts.82.w3", "model.layers.5.block_sparse_moe.experts.83.w3", "model.layers.5.block_sparse_moe.experts.84.w3", "model.layers.5.block_sparse_moe.experts.85.w3", "model.layers.5.block_sparse_moe.experts.86.w3", "model.layers.5.block_sparse_moe.experts.87.w3", "model.layers.5.block_sparse_moe.experts.88.w3", "model.layers.5.block_sparse_moe.experts.89.w3", "model.layers.5.block_sparse_moe.experts.90.w3", "model.layers.5.block_sparse_moe.experts.91.w3", "model.layers.5.block_sparse_moe.experts.92.w3", "model.layers.5.block_sparse_moe.experts.93.w3", "model.layers.5.block_sparse_moe.experts.94.w3", "model.layers.5.block_sparse_moe.experts.95.w3", "model.layers.5.block_sparse_moe.experts.96.w3", "model.layers.5.block_sparse_moe.experts.97.w3", "model.layers.5.block_sparse_moe.experts.98.w3", "model.layers.5.block_sparse_moe.experts.99.w3", "model.layers.5.block_sparse_moe.experts.100.w3", "model.layers.5.block_sparse_moe.experts.101.w3", "model.layers.5.block_sparse_moe.experts.102.w3", "model.layers.5.block_sparse_moe.experts.103.w3", "model.layers.5.block_sparse_moe.experts.104.w3", "model.layers.5.block_sparse_moe.experts.105.w3", "model.layers.5.block_sparse_moe.experts.106.w3", "model.layers.5.block_sparse_moe.experts.107.w3", "model.layers.5.block_sparse_moe.experts.108.w3", "model.layers.5.block_sparse_moe.experts.109.w3", "model.layers.5.block_sparse_moe.experts.110.w3", "model.layers.5.block_sparse_moe.experts.111.w3", "model.layers.5.block_sparse_moe.experts.112.w3", "model.layers.5.block_sparse_moe.experts.113.w3", "model.layers.5.block_sparse_moe.experts.114.w3", "model.layers.5.block_sparse_moe.experts.115.w3", "model.layers.5.block_sparse_moe.experts.116.w3", "model.layers.5.block_sparse_moe.experts.117.w3", "model.layers.5.block_sparse_moe.experts.118.w3", "model.layers.5.block_sparse_moe.experts.119.w3", "model.layers.5.block_sparse_moe.experts.120.w3", "model.layers.5.block_sparse_moe.experts.121.w3", "model.layers.5.block_sparse_moe.experts.122.w3", "model.layers.5.block_sparse_moe.experts.123.w3", "model.layers.5.block_sparse_moe.experts.124.w3", "model.layers.5.block_sparse_moe.experts.125.w3", "model.layers.5.block_sparse_moe.experts.126.w3", "model.layers.5.block_sparse_moe.experts.127.w3", "model.layers.5.block_sparse_moe.experts.128.w3", "model.layers.5.block_sparse_moe.experts.129.w3", "model.layers.5.block_sparse_moe.experts.130.w3", "model.layers.5.block_sparse_moe.experts.131.w3", "model.layers.5.block_sparse_moe.experts.132.w3", "model.layers.5.block_sparse_moe.experts.133.w3", "model.layers.5.block_sparse_moe.experts.134.w3", "model.layers.5.block_sparse_moe.experts.135.w3", "model.layers.5.block_sparse_moe.experts.136.w3", "model.layers.5.block_sparse_moe.experts.137.w3", "model.layers.5.block_sparse_moe.experts.138.w3", "model.layers.5.block_sparse_moe.experts.139.w3", "model.layers.5.block_sparse_moe.experts.140.w3", "model.layers.5.block_sparse_moe.experts.141.w3", "model.layers.5.block_sparse_moe.experts.142.w3", "model.layers.5.block_sparse_moe.experts.143.w3", "model.layers.5.block_sparse_moe.experts.144.w3", "model.layers.5.block_sparse_moe.experts.145.w3", "model.layers.5.block_sparse_moe.experts.146.w3", "model.layers.5.block_sparse_moe.experts.147.w3", "model.layers.5.block_sparse_moe.experts.148.w3", "model.layers.5.block_sparse_moe.experts.149.w3", "model.layers.5.block_sparse_moe.experts.150.w3", "model.layers.5.block_sparse_moe.experts.151.w3", "model.layers.5.block_sparse_moe.experts.152.w3", "model.layers.5.block_sparse_moe.experts.153.w3", "model.layers.5.block_sparse_moe.experts.154.w3", "model.layers.5.block_sparse_moe.experts.155.w3", "model.layers.5.block_sparse_moe.experts.156.w3", "model.layers.5.block_sparse_moe.experts.157.w3", "model.layers.5.block_sparse_moe.experts.158.w3", "model.layers.5.block_sparse_moe.experts.159.w3", "model.layers.5.block_sparse_moe.experts.160.w3", "model.layers.5.block_sparse_moe.experts.161.w3", "model.layers.5.block_sparse_moe.experts.162.w3", "model.layers.5.block_sparse_moe.experts.163.w3", "model.layers.5.block_sparse_moe.experts.164.w3", "model.layers.5.block_sparse_moe.experts.165.w3", "model.layers.5.block_sparse_moe.experts.166.w3", "model.layers.5.block_sparse_moe.experts.167.w3", "model.layers.5.block_sparse_moe.experts.168.w3", "model.layers.5.block_sparse_moe.experts.169.w3", "model.layers.5.block_sparse_moe.experts.170.w3", "model.layers.5.block_sparse_moe.experts.171.w3", "model.layers.5.block_sparse_moe.experts.172.w3", "model.layers.5.block_sparse_moe.experts.173.w3", "model.layers.5.block_sparse_moe.experts.174.w3", "model.layers.5.block_sparse_moe.experts.175.w3", "model.layers.5.block_sparse_moe.experts.176.w3", "model.layers.5.block_sparse_moe.experts.177.w3", "model.layers.5.block_sparse_moe.experts.178.w3", "model.layers.5.block_sparse_moe.experts.179.w3", "model.layers.5.block_sparse_moe.experts.180.w3", "model.layers.5.block_sparse_moe.experts.181.w3", "model.layers.5.block_sparse_moe.experts.182.w3", "model.layers.5.block_sparse_moe.experts.183.w3", "model.layers.5.block_sparse_moe.experts.184.w3", "model.layers.5.block_sparse_moe.experts.185.w3", "model.layers.5.block_sparse_moe.experts.186.w3", "model.layers.5.block_sparse_moe.experts.187.w3", "model.layers.5.block_sparse_moe.experts.188.w3", "model.layers.5.block_sparse_moe.experts.189.w3", "model.layers.5.block_sparse_moe.experts.190.w3", "model.layers.5.block_sparse_moe.experts.191.w3", "model.layers.5.block_sparse_moe.experts.192.w3", "model.layers.5.block_sparse_moe.experts.193.w3", "model.layers.5.block_sparse_moe.experts.194.w3", "model.layers.5.block_sparse_moe.experts.195.w3", "model.layers.5.block_sparse_moe.experts.196.w3", "model.layers.5.block_sparse_moe.experts.197.w3", "model.layers.5.block_sparse_moe.experts.198.w3", "model.layers.5.block_sparse_moe.experts.199.w3", "model.layers.5.block_sparse_moe.experts.200.w3", "model.layers.5.block_sparse_moe.experts.201.w3", "model.layers.5.block_sparse_moe.experts.202.w3", "model.layers.5.block_sparse_moe.experts.203.w3", "model.layers.5.block_sparse_moe.experts.204.w3", "model.layers.5.block_sparse_moe.experts.205.w3", "model.layers.5.block_sparse_moe.experts.206.w3", "model.layers.5.block_sparse_moe.experts.207.w3", "model.layers.5.block_sparse_moe.experts.208.w3", "model.layers.5.block_sparse_moe.experts.209.w3", "model.layers.5.block_sparse_moe.experts.210.w3", "model.layers.5.block_sparse_moe.experts.211.w3", "model.layers.5.block_sparse_moe.experts.212.w3", "model.layers.5.block_sparse_moe.experts.213.w3", "model.layers.5.block_sparse_moe.experts.214.w3", "model.layers.5.block_sparse_moe.experts.215.w3", "model.layers.5.block_sparse_moe.experts.216.w3", "model.layers.5.block_sparse_moe.experts.217.w3", "model.layers.5.block_sparse_moe.experts.218.w3", "model.layers.5.block_sparse_moe.experts.219.w3", "model.layers.5.block_sparse_moe.experts.220.w3", "model.layers.5.block_sparse_moe.experts.221.w3", "model.layers.5.block_sparse_moe.experts.222.w3", "model.layers.5.block_sparse_moe.experts.223.w3", "model.layers.5.block_sparse_moe.experts.224.w3", "model.layers.5.block_sparse_moe.experts.225.w3", "model.layers.5.block_sparse_moe.experts.226.w3", "model.layers.5.block_sparse_moe.experts.227.w3", "model.layers.5.block_sparse_moe.experts.228.w3", "model.layers.5.block_sparse_moe.experts.229.w3", "model.layers.5.block_sparse_moe.experts.230.w3", "model.layers.5.block_sparse_moe.experts.231.w3", "model.layers.5.block_sparse_moe.experts.232.w3", "model.layers.5.block_sparse_moe.experts.233.w3", "model.layers.5.block_sparse_moe.experts.234.w3", "model.layers.5.block_sparse_moe.experts.235.w3", "model.layers.5.block_sparse_moe.experts.236.w3", "model.layers.5.block_sparse_moe.experts.237.w3", "model.layers.5.block_sparse_moe.experts.238.w3", "model.layers.5.block_sparse_moe.experts.239.w3", "model.layers.5.block_sparse_moe.experts.240.w3", "model.layers.5.block_sparse_moe.experts.241.w3", "model.layers.5.block_sparse_moe.experts.242.w3", "model.layers.5.block_sparse_moe.experts.243.w3", "model.layers.5.block_sparse_moe.experts.244.w3", "model.layers.5.block_sparse_moe.experts.245.w3", "model.layers.5.block_sparse_moe.experts.246.w3", "model.layers.5.block_sparse_moe.experts.247.w3", "model.layers.5.block_sparse_moe.experts.248.w3", "model.layers.5.block_sparse_moe.experts.249.w3", "model.layers.5.block_sparse_moe.experts.250.w3", "model.layers.5.block_sparse_moe.experts.251.w3", "model.layers.5.block_sparse_moe.experts.252.w3", "model.layers.5.block_sparse_moe.experts.253.w3", "model.layers.5.block_sparse_moe.experts.254.w3", "model.layers.5.block_sparse_moe.experts.255.w3", "model.layers.5.block_sparse_moe.experts.0.w2", "model.layers.5.block_sparse_moe.experts.1.w2", "model.layers.5.block_sparse_moe.experts.2.w2", "model.layers.5.block_sparse_moe.experts.3.w2", "model.layers.5.block_sparse_moe.experts.4.w2", "model.layers.5.block_sparse_moe.experts.5.w2", "model.layers.5.block_sparse_moe.experts.6.w2", "model.layers.5.block_sparse_moe.experts.7.w2", "model.layers.5.block_sparse_moe.experts.8.w2", "model.layers.5.block_sparse_moe.experts.9.w2", "model.layers.5.block_sparse_moe.experts.10.w2", "model.layers.5.block_sparse_moe.experts.11.w2", "model.layers.5.block_sparse_moe.experts.12.w2", "model.layers.5.block_sparse_moe.experts.13.w2", "model.layers.5.block_sparse_moe.experts.14.w2", "model.layers.5.block_sparse_moe.experts.15.w2", "model.layers.5.block_sparse_moe.experts.16.w2", "model.layers.5.block_sparse_moe.experts.17.w2", "model.layers.5.block_sparse_moe.experts.18.w2", "model.layers.5.block_sparse_moe.experts.19.w2", "model.layers.5.block_sparse_moe.experts.20.w2", "model.layers.5.block_sparse_moe.experts.21.w2", "model.layers.5.block_sparse_moe.experts.22.w2", "model.layers.5.block_sparse_moe.experts.23.w2", "model.layers.5.block_sparse_moe.experts.24.w2", "model.layers.5.block_sparse_moe.experts.25.w2", "model.layers.5.block_sparse_moe.experts.26.w2", "model.layers.5.block_sparse_moe.experts.27.w2", "model.layers.5.block_sparse_moe.experts.28.w2", "model.layers.5.block_sparse_moe.experts.29.w2", "model.layers.5.block_sparse_moe.experts.30.w2", "model.layers.5.block_sparse_moe.experts.31.w2", "model.layers.5.block_sparse_moe.experts.32.w2", "model.layers.5.block_sparse_moe.experts.33.w2", "model.layers.5.block_sparse_moe.experts.34.w2", "model.layers.5.block_sparse_moe.experts.35.w2", "model.layers.5.block_sparse_moe.experts.36.w2", "model.layers.5.block_sparse_moe.experts.37.w2", "model.layers.5.block_sparse_moe.experts.38.w2", "model.layers.5.block_sparse_moe.experts.39.w2", "model.layers.5.block_sparse_moe.experts.40.w2", "model.layers.5.block_sparse_moe.experts.41.w2", "model.layers.5.block_sparse_moe.experts.42.w2", "model.layers.5.block_sparse_moe.experts.43.w2", "model.layers.5.block_sparse_moe.experts.44.w2", "model.layers.5.block_sparse_moe.experts.45.w2", "model.layers.5.block_sparse_moe.experts.46.w2", "model.layers.5.block_sparse_moe.experts.47.w2", "model.layers.5.block_sparse_moe.experts.48.w2", "model.layers.5.block_sparse_moe.experts.49.w2", "model.layers.5.block_sparse_moe.experts.50.w2", "model.layers.5.block_sparse_moe.experts.51.w2", "model.layers.5.block_sparse_moe.experts.52.w2", "model.layers.5.block_sparse_moe.experts.53.w2", "model.layers.5.block_sparse_moe.experts.54.w2", "model.layers.5.block_sparse_moe.experts.55.w2", "model.layers.5.block_sparse_moe.experts.56.w2", "model.layers.5.block_sparse_moe.experts.57.w2", "model.layers.5.block_sparse_moe.experts.58.w2", "model.layers.5.block_sparse_moe.experts.59.w2", "model.layers.5.block_sparse_moe.experts.60.w2", "model.layers.5.block_sparse_moe.experts.61.w2", "model.layers.5.block_sparse_moe.experts.62.w2", "model.layers.5.block_sparse_moe.experts.63.w2", "model.layers.5.block_sparse_moe.experts.64.w2", "model.layers.5.block_sparse_moe.experts.65.w2", "model.layers.5.block_sparse_moe.experts.66.w2", "model.layers.5.block_sparse_moe.experts.67.w2", "model.layers.5.block_sparse_moe.experts.68.w2", "model.layers.5.block_sparse_moe.experts.69.w2", "model.layers.5.block_sparse_moe.experts.70.w2", "model.layers.5.block_sparse_moe.experts.71.w2", "model.layers.5.block_sparse_moe.experts.72.w2", "model.layers.5.block_sparse_moe.experts.73.w2", "model.layers.5.block_sparse_moe.experts.74.w2", "model.layers.5.block_sparse_moe.experts.75.w2", "model.layers.5.block_sparse_moe.experts.76.w2", "model.layers.5.block_sparse_moe.experts.77.w2", "model.layers.5.block_sparse_moe.experts.78.w2", "model.layers.5.block_sparse_moe.experts.79.w2", "model.layers.5.block_sparse_moe.experts.80.w2", "model.layers.5.block_sparse_moe.experts.81.w2", "model.layers.5.block_sparse_moe.experts.82.w2", "model.layers.5.block_sparse_moe.experts.83.w2", "model.layers.5.block_sparse_moe.experts.84.w2", "model.layers.5.block_sparse_moe.experts.85.w2", "model.layers.5.block_sparse_moe.experts.86.w2", "model.layers.5.block_sparse_moe.experts.87.w2", "model.layers.5.block_sparse_moe.experts.88.w2", "model.layers.5.block_sparse_moe.experts.89.w2", "model.layers.5.block_sparse_moe.experts.90.w2", "model.layers.5.block_sparse_moe.experts.91.w2", "model.layers.5.block_sparse_moe.experts.92.w2", "model.layers.5.block_sparse_moe.experts.93.w2", "model.layers.5.block_sparse_moe.experts.94.w2", "model.layers.5.block_sparse_moe.experts.95.w2", "model.layers.5.block_sparse_moe.experts.96.w2", "model.layers.5.block_sparse_moe.experts.97.w2", "model.layers.5.block_sparse_moe.experts.98.w2", "model.layers.5.block_sparse_moe.experts.99.w2", "model.layers.5.block_sparse_moe.experts.100.w2", "model.layers.5.block_sparse_moe.experts.101.w2", "model.layers.5.block_sparse_moe.experts.102.w2", "model.layers.5.block_sparse_moe.experts.103.w2", "model.layers.5.block_sparse_moe.experts.104.w2", "model.layers.5.block_sparse_moe.experts.105.w2", "model.layers.5.block_sparse_moe.experts.106.w2", "model.layers.5.block_sparse_moe.experts.107.w2", "model.layers.5.block_sparse_moe.experts.108.w2", "model.layers.5.block_sparse_moe.experts.109.w2", "model.layers.5.block_sparse_moe.experts.110.w2", "model.layers.5.block_sparse_moe.experts.111.w2", "model.layers.5.block_sparse_moe.experts.112.w2", "model.layers.5.block_sparse_moe.experts.113.w2", "model.layers.5.block_sparse_moe.experts.114.w2", "model.layers.5.block_sparse_moe.experts.115.w2", "model.layers.5.block_sparse_moe.experts.116.w2", "model.layers.5.block_sparse_moe.experts.117.w2", "model.layers.5.block_sparse_moe.experts.118.w2", "model.layers.5.block_sparse_moe.experts.119.w2", "model.layers.5.block_sparse_moe.experts.120.w2", "model.layers.5.block_sparse_moe.experts.121.w2", "model.layers.5.block_sparse_moe.experts.122.w2", "model.layers.5.block_sparse_moe.experts.123.w2", "model.layers.5.block_sparse_moe.experts.124.w2", "model.layers.5.block_sparse_moe.experts.125.w2", "model.layers.5.block_sparse_moe.experts.126.w2", "model.layers.5.block_sparse_moe.experts.127.w2", "model.layers.5.block_sparse_moe.experts.128.w2", "model.layers.5.block_sparse_moe.experts.129.w2", "model.layers.5.block_sparse_moe.experts.130.w2", "model.layers.5.block_sparse_moe.experts.131.w2", "model.layers.5.block_sparse_moe.experts.132.w2", "model.layers.5.block_sparse_moe.experts.133.w2", "model.layers.5.block_sparse_moe.experts.134.w2", "model.layers.5.block_sparse_moe.experts.135.w2", "model.layers.5.block_sparse_moe.experts.136.w2", "model.layers.5.block_sparse_moe.experts.137.w2", "model.layers.5.block_sparse_moe.experts.138.w2", "model.layers.5.block_sparse_moe.experts.139.w2", "model.layers.5.block_sparse_moe.experts.140.w2", "model.layers.5.block_sparse_moe.experts.141.w2", "model.layers.5.block_sparse_moe.experts.142.w2", "model.layers.5.block_sparse_moe.experts.143.w2", "model.layers.5.block_sparse_moe.experts.144.w2", "model.layers.5.block_sparse_moe.experts.145.w2", "model.layers.5.block_sparse_moe.experts.146.w2", "model.layers.5.block_sparse_moe.experts.147.w2", "model.layers.5.block_sparse_moe.experts.148.w2", "model.layers.5.block_sparse_moe.experts.149.w2", "model.layers.5.block_sparse_moe.experts.150.w2", "model.layers.5.block_sparse_moe.experts.151.w2", "model.layers.5.block_sparse_moe.experts.152.w2", "model.layers.5.block_sparse_moe.experts.153.w2", "model.layers.5.block_sparse_moe.experts.154.w2", "model.layers.5.block_sparse_moe.experts.155.w2", "model.layers.5.block_sparse_moe.experts.156.w2", "model.layers.5.block_sparse_moe.experts.157.w2", "model.layers.5.block_sparse_moe.experts.158.w2", "model.layers.5.block_sparse_moe.experts.159.w2", "model.layers.5.block_sparse_moe.experts.160.w2", "model.layers.5.block_sparse_moe.experts.161.w2", "model.layers.5.block_sparse_moe.experts.162.w2", "model.layers.5.block_sparse_moe.experts.163.w2", "model.layers.5.block_sparse_moe.experts.164.w2", "model.layers.5.block_sparse_moe.experts.165.w2", "model.layers.5.block_sparse_moe.experts.166.w2", "model.layers.5.block_sparse_moe.experts.167.w2", "model.layers.5.block_sparse_moe.experts.168.w2", "model.layers.5.block_sparse_moe.experts.169.w2", "model.layers.5.block_sparse_moe.experts.170.w2", "model.layers.5.block_sparse_moe.experts.171.w2", "model.layers.5.block_sparse_moe.experts.172.w2", "model.layers.5.block_sparse_moe.experts.173.w2", "model.layers.5.block_sparse_moe.experts.174.w2", "model.layers.5.block_sparse_moe.experts.175.w2", "model.layers.5.block_sparse_moe.experts.176.w2", "model.layers.5.block_sparse_moe.experts.177.w2", "model.layers.5.block_sparse_moe.experts.178.w2", "model.layers.5.block_sparse_moe.experts.179.w2", "model.layers.5.block_sparse_moe.experts.180.w2", "model.layers.5.block_sparse_moe.experts.181.w2", "model.layers.5.block_sparse_moe.experts.182.w2", "model.layers.5.block_sparse_moe.experts.183.w2", "model.layers.5.block_sparse_moe.experts.184.w2", "model.layers.5.block_sparse_moe.experts.185.w2", "model.layers.5.block_sparse_moe.experts.186.w2", "model.layers.5.block_sparse_moe.experts.187.w2", "model.layers.5.block_sparse_moe.experts.188.w2", "model.layers.5.block_sparse_moe.experts.189.w2", "model.layers.5.block_sparse_moe.experts.190.w2", "model.layers.5.block_sparse_moe.experts.191.w2", "model.layers.5.block_sparse_moe.experts.192.w2", "model.layers.5.block_sparse_moe.experts.193.w2", "model.layers.5.block_sparse_moe.experts.194.w2", "model.layers.5.block_sparse_moe.experts.195.w2", "model.layers.5.block_sparse_moe.experts.196.w2", "model.layers.5.block_sparse_moe.experts.197.w2", "model.layers.5.block_sparse_moe.experts.198.w2", "model.layers.5.block_sparse_moe.experts.199.w2", "model.layers.5.block_sparse_moe.experts.200.w2", "model.layers.5.block_sparse_moe.experts.201.w2", "model.layers.5.block_sparse_moe.experts.202.w2", "model.layers.5.block_sparse_moe.experts.203.w2", "model.layers.5.block_sparse_moe.experts.204.w2", "model.layers.5.block_sparse_moe.experts.205.w2", "model.layers.5.block_sparse_moe.experts.206.w2", "model.layers.5.block_sparse_moe.experts.207.w2", "model.layers.5.block_sparse_moe.experts.208.w2", "model.layers.5.block_sparse_moe.experts.209.w2", "model.layers.5.block_sparse_moe.experts.210.w2", "model.layers.5.block_sparse_moe.experts.211.w2", "model.layers.5.block_sparse_moe.experts.212.w2", "model.layers.5.block_sparse_moe.experts.213.w2", "model.layers.5.block_sparse_moe.experts.214.w2", "model.layers.5.block_sparse_moe.experts.215.w2", "model.layers.5.block_sparse_moe.experts.216.w2", "model.layers.5.block_sparse_moe.experts.217.w2", "model.layers.5.block_sparse_moe.experts.218.w2", "model.layers.5.block_sparse_moe.experts.219.w2", "model.layers.5.block_sparse_moe.experts.220.w2", "model.layers.5.block_sparse_moe.experts.221.w2", "model.layers.5.block_sparse_moe.experts.222.w2", "model.layers.5.block_sparse_moe.experts.223.w2", "model.layers.5.block_sparse_moe.experts.224.w2", "model.layers.5.block_sparse_moe.experts.225.w2", "model.layers.5.block_sparse_moe.experts.226.w2", "model.layers.5.block_sparse_moe.experts.227.w2", "model.layers.5.block_sparse_moe.experts.228.w2", "model.layers.5.block_sparse_moe.experts.229.w2", "model.layers.5.block_sparse_moe.experts.230.w2", "model.layers.5.block_sparse_moe.experts.231.w2", "model.layers.5.block_sparse_moe.experts.232.w2", "model.layers.5.block_sparse_moe.experts.233.w2", "model.layers.5.block_sparse_moe.experts.234.w2", "model.layers.5.block_sparse_moe.experts.235.w2", "model.layers.5.block_sparse_moe.experts.236.w2", "model.layers.5.block_sparse_moe.experts.237.w2", "model.layers.5.block_sparse_moe.experts.238.w2", "model.layers.5.block_sparse_moe.experts.239.w2", "model.layers.5.block_sparse_moe.experts.240.w2", "model.layers.5.block_sparse_moe.experts.241.w2", "model.layers.5.block_sparse_moe.experts.242.w2", "model.layers.5.block_sparse_moe.experts.243.w2", "model.layers.5.block_sparse_moe.experts.244.w2", "model.layers.5.block_sparse_moe.experts.245.w2", "model.layers.5.block_sparse_moe.experts.246.w2", "model.layers.5.block_sparse_moe.experts.247.w2", "model.layers.5.block_sparse_moe.experts.248.w2", "model.layers.5.block_sparse_moe.experts.249.w2", "model.layers.5.block_sparse_moe.experts.250.w2", "model.layers.5.block_sparse_moe.experts.251.w2", "model.layers.5.block_sparse_moe.experts.252.w2", "model.layers.5.block_sparse_moe.experts.253.w2", "model.layers.5.block_sparse_moe.experts.254.w2", "model.layers.5.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.01862958334386347, "dbits": 3623878656 } ] }, { "idx": 12, "layers": [ "model.layers.6.self_attn.q_proj", "model.layers.6.self_attn.k_proj", "model.layers.6.self_attn.v_proj", "model.layers.6.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0012394921854138041, "dbits": 44040192 } ] }, { "idx": 13, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w1", "model.layers.6.block_sparse_moe.experts.1.w1", "model.layers.6.block_sparse_moe.experts.2.w1", "model.layers.6.block_sparse_moe.experts.3.w1", "model.layers.6.block_sparse_moe.experts.4.w1", "model.layers.6.block_sparse_moe.experts.5.w1", "model.layers.6.block_sparse_moe.experts.6.w1", "model.layers.6.block_sparse_moe.experts.7.w1", "model.layers.6.block_sparse_moe.experts.8.w1", "model.layers.6.block_sparse_moe.experts.9.w1", "model.layers.6.block_sparse_moe.experts.10.w1", "model.layers.6.block_sparse_moe.experts.11.w1", "model.layers.6.block_sparse_moe.experts.12.w1", "model.layers.6.block_sparse_moe.experts.13.w1", "model.layers.6.block_sparse_moe.experts.14.w1", "model.layers.6.block_sparse_moe.experts.15.w1", "model.layers.6.block_sparse_moe.experts.16.w1", "model.layers.6.block_sparse_moe.experts.17.w1", "model.layers.6.block_sparse_moe.experts.18.w1", "model.layers.6.block_sparse_moe.experts.19.w1", "model.layers.6.block_sparse_moe.experts.20.w1", "model.layers.6.block_sparse_moe.experts.21.w1", "model.layers.6.block_sparse_moe.experts.22.w1", "model.layers.6.block_sparse_moe.experts.23.w1", "model.layers.6.block_sparse_moe.experts.24.w1", "model.layers.6.block_sparse_moe.experts.25.w1", "model.layers.6.block_sparse_moe.experts.26.w1", "model.layers.6.block_sparse_moe.experts.27.w1", "model.layers.6.block_sparse_moe.experts.28.w1", "model.layers.6.block_sparse_moe.experts.29.w1", "model.layers.6.block_sparse_moe.experts.30.w1", "model.layers.6.block_sparse_moe.experts.31.w1", "model.layers.6.block_sparse_moe.experts.32.w1", "model.layers.6.block_sparse_moe.experts.33.w1", "model.layers.6.block_sparse_moe.experts.34.w1", "model.layers.6.block_sparse_moe.experts.35.w1", "model.layers.6.block_sparse_moe.experts.36.w1", "model.layers.6.block_sparse_moe.experts.37.w1", "model.layers.6.block_sparse_moe.experts.38.w1", "model.layers.6.block_sparse_moe.experts.39.w1", "model.layers.6.block_sparse_moe.experts.40.w1", "model.layers.6.block_sparse_moe.experts.41.w1", "model.layers.6.block_sparse_moe.experts.42.w1", "model.layers.6.block_sparse_moe.experts.43.w1", "model.layers.6.block_sparse_moe.experts.44.w1", "model.layers.6.block_sparse_moe.experts.45.w1", "model.layers.6.block_sparse_moe.experts.46.w1", "model.layers.6.block_sparse_moe.experts.47.w1", "model.layers.6.block_sparse_moe.experts.48.w1", "model.layers.6.block_sparse_moe.experts.49.w1", "model.layers.6.block_sparse_moe.experts.50.w1", "model.layers.6.block_sparse_moe.experts.51.w1", "model.layers.6.block_sparse_moe.experts.52.w1", "model.layers.6.block_sparse_moe.experts.53.w1", "model.layers.6.block_sparse_moe.experts.54.w1", "model.layers.6.block_sparse_moe.experts.55.w1", "model.layers.6.block_sparse_moe.experts.56.w1", "model.layers.6.block_sparse_moe.experts.57.w1", "model.layers.6.block_sparse_moe.experts.58.w1", "model.layers.6.block_sparse_moe.experts.59.w1", "model.layers.6.block_sparse_moe.experts.60.w1", "model.layers.6.block_sparse_moe.experts.61.w1", "model.layers.6.block_sparse_moe.experts.62.w1", "model.layers.6.block_sparse_moe.experts.63.w1", "model.layers.6.block_sparse_moe.experts.64.w1", "model.layers.6.block_sparse_moe.experts.65.w1", "model.layers.6.block_sparse_moe.experts.66.w1", "model.layers.6.block_sparse_moe.experts.67.w1", "model.layers.6.block_sparse_moe.experts.68.w1", "model.layers.6.block_sparse_moe.experts.69.w1", "model.layers.6.block_sparse_moe.experts.70.w1", "model.layers.6.block_sparse_moe.experts.71.w1", "model.layers.6.block_sparse_moe.experts.72.w1", "model.layers.6.block_sparse_moe.experts.73.w1", "model.layers.6.block_sparse_moe.experts.74.w1", "model.layers.6.block_sparse_moe.experts.75.w1", "model.layers.6.block_sparse_moe.experts.76.w1", "model.layers.6.block_sparse_moe.experts.77.w1", "model.layers.6.block_sparse_moe.experts.78.w1", "model.layers.6.block_sparse_moe.experts.79.w1", "model.layers.6.block_sparse_moe.experts.80.w1", "model.layers.6.block_sparse_moe.experts.81.w1", "model.layers.6.block_sparse_moe.experts.82.w1", "model.layers.6.block_sparse_moe.experts.83.w1", "model.layers.6.block_sparse_moe.experts.84.w1", "model.layers.6.block_sparse_moe.experts.85.w1", "model.layers.6.block_sparse_moe.experts.86.w1", "model.layers.6.block_sparse_moe.experts.87.w1", "model.layers.6.block_sparse_moe.experts.88.w1", "model.layers.6.block_sparse_moe.experts.89.w1", "model.layers.6.block_sparse_moe.experts.90.w1", "model.layers.6.block_sparse_moe.experts.91.w1", "model.layers.6.block_sparse_moe.experts.92.w1", "model.layers.6.block_sparse_moe.experts.93.w1", "model.layers.6.block_sparse_moe.experts.94.w1", "model.layers.6.block_sparse_moe.experts.95.w1", "model.layers.6.block_sparse_moe.experts.96.w1", "model.layers.6.block_sparse_moe.experts.97.w1", "model.layers.6.block_sparse_moe.experts.98.w1", "model.layers.6.block_sparse_moe.experts.99.w1", "model.layers.6.block_sparse_moe.experts.100.w1", "model.layers.6.block_sparse_moe.experts.101.w1", "model.layers.6.block_sparse_moe.experts.102.w1", "model.layers.6.block_sparse_moe.experts.103.w1", "model.layers.6.block_sparse_moe.experts.104.w1", "model.layers.6.block_sparse_moe.experts.105.w1", "model.layers.6.block_sparse_moe.experts.106.w1", "model.layers.6.block_sparse_moe.experts.107.w1", "model.layers.6.block_sparse_moe.experts.108.w1", "model.layers.6.block_sparse_moe.experts.109.w1", "model.layers.6.block_sparse_moe.experts.110.w1", "model.layers.6.block_sparse_moe.experts.111.w1", "model.layers.6.block_sparse_moe.experts.112.w1", "model.layers.6.block_sparse_moe.experts.113.w1", "model.layers.6.block_sparse_moe.experts.114.w1", "model.layers.6.block_sparse_moe.experts.115.w1", "model.layers.6.block_sparse_moe.experts.116.w1", "model.layers.6.block_sparse_moe.experts.117.w1", "model.layers.6.block_sparse_moe.experts.118.w1", "model.layers.6.block_sparse_moe.experts.119.w1", "model.layers.6.block_sparse_moe.experts.120.w1", "model.layers.6.block_sparse_moe.experts.121.w1", "model.layers.6.block_sparse_moe.experts.122.w1", "model.layers.6.block_sparse_moe.experts.123.w1", "model.layers.6.block_sparse_moe.experts.124.w1", "model.layers.6.block_sparse_moe.experts.125.w1", "model.layers.6.block_sparse_moe.experts.126.w1", "model.layers.6.block_sparse_moe.experts.127.w1", "model.layers.6.block_sparse_moe.experts.128.w1", "model.layers.6.block_sparse_moe.experts.129.w1", "model.layers.6.block_sparse_moe.experts.130.w1", "model.layers.6.block_sparse_moe.experts.131.w1", "model.layers.6.block_sparse_moe.experts.132.w1", "model.layers.6.block_sparse_moe.experts.133.w1", "model.layers.6.block_sparse_moe.experts.134.w1", "model.layers.6.block_sparse_moe.experts.135.w1", "model.layers.6.block_sparse_moe.experts.136.w1", "model.layers.6.block_sparse_moe.experts.137.w1", "model.layers.6.block_sparse_moe.experts.138.w1", "model.layers.6.block_sparse_moe.experts.139.w1", "model.layers.6.block_sparse_moe.experts.140.w1", "model.layers.6.block_sparse_moe.experts.141.w1", "model.layers.6.block_sparse_moe.experts.142.w1", "model.layers.6.block_sparse_moe.experts.143.w1", "model.layers.6.block_sparse_moe.experts.144.w1", "model.layers.6.block_sparse_moe.experts.145.w1", "model.layers.6.block_sparse_moe.experts.146.w1", "model.layers.6.block_sparse_moe.experts.147.w1", "model.layers.6.block_sparse_moe.experts.148.w1", "model.layers.6.block_sparse_moe.experts.149.w1", "model.layers.6.block_sparse_moe.experts.150.w1", "model.layers.6.block_sparse_moe.experts.151.w1", "model.layers.6.block_sparse_moe.experts.152.w1", "model.layers.6.block_sparse_moe.experts.153.w1", "model.layers.6.block_sparse_moe.experts.154.w1", "model.layers.6.block_sparse_moe.experts.155.w1", "model.layers.6.block_sparse_moe.experts.156.w1", "model.layers.6.block_sparse_moe.experts.157.w1", "model.layers.6.block_sparse_moe.experts.158.w1", "model.layers.6.block_sparse_moe.experts.159.w1", "model.layers.6.block_sparse_moe.experts.160.w1", "model.layers.6.block_sparse_moe.experts.161.w1", "model.layers.6.block_sparse_moe.experts.162.w1", "model.layers.6.block_sparse_moe.experts.163.w1", "model.layers.6.block_sparse_moe.experts.164.w1", "model.layers.6.block_sparse_moe.experts.165.w1", "model.layers.6.block_sparse_moe.experts.166.w1", "model.layers.6.block_sparse_moe.experts.167.w1", "model.layers.6.block_sparse_moe.experts.168.w1", "model.layers.6.block_sparse_moe.experts.169.w1", "model.layers.6.block_sparse_moe.experts.170.w1", "model.layers.6.block_sparse_moe.experts.171.w1", "model.layers.6.block_sparse_moe.experts.172.w1", "model.layers.6.block_sparse_moe.experts.173.w1", "model.layers.6.block_sparse_moe.experts.174.w1", "model.layers.6.block_sparse_moe.experts.175.w1", "model.layers.6.block_sparse_moe.experts.176.w1", "model.layers.6.block_sparse_moe.experts.177.w1", "model.layers.6.block_sparse_moe.experts.178.w1", "model.layers.6.block_sparse_moe.experts.179.w1", "model.layers.6.block_sparse_moe.experts.180.w1", "model.layers.6.block_sparse_moe.experts.181.w1", "model.layers.6.block_sparse_moe.experts.182.w1", "model.layers.6.block_sparse_moe.experts.183.w1", "model.layers.6.block_sparse_moe.experts.184.w1", "model.layers.6.block_sparse_moe.experts.185.w1", "model.layers.6.block_sparse_moe.experts.186.w1", "model.layers.6.block_sparse_moe.experts.187.w1", "model.layers.6.block_sparse_moe.experts.188.w1", "model.layers.6.block_sparse_moe.experts.189.w1", "model.layers.6.block_sparse_moe.experts.190.w1", "model.layers.6.block_sparse_moe.experts.191.w1", "model.layers.6.block_sparse_moe.experts.192.w1", "model.layers.6.block_sparse_moe.experts.193.w1", "model.layers.6.block_sparse_moe.experts.194.w1", "model.layers.6.block_sparse_moe.experts.195.w1", "model.layers.6.block_sparse_moe.experts.196.w1", "model.layers.6.block_sparse_moe.experts.197.w1", "model.layers.6.block_sparse_moe.experts.198.w1", "model.layers.6.block_sparse_moe.experts.199.w1", "model.layers.6.block_sparse_moe.experts.200.w1", "model.layers.6.block_sparse_moe.experts.201.w1", "model.layers.6.block_sparse_moe.experts.202.w1", "model.layers.6.block_sparse_moe.experts.203.w1", "model.layers.6.block_sparse_moe.experts.204.w1", "model.layers.6.block_sparse_moe.experts.205.w1", "model.layers.6.block_sparse_moe.experts.206.w1", "model.layers.6.block_sparse_moe.experts.207.w1", "model.layers.6.block_sparse_moe.experts.208.w1", "model.layers.6.block_sparse_moe.experts.209.w1", "model.layers.6.block_sparse_moe.experts.210.w1", "model.layers.6.block_sparse_moe.experts.211.w1", "model.layers.6.block_sparse_moe.experts.212.w1", "model.layers.6.block_sparse_moe.experts.213.w1", "model.layers.6.block_sparse_moe.experts.214.w1", "model.layers.6.block_sparse_moe.experts.215.w1", "model.layers.6.block_sparse_moe.experts.216.w1", "model.layers.6.block_sparse_moe.experts.217.w1", "model.layers.6.block_sparse_moe.experts.218.w1", "model.layers.6.block_sparse_moe.experts.219.w1", "model.layers.6.block_sparse_moe.experts.220.w1", "model.layers.6.block_sparse_moe.experts.221.w1", "model.layers.6.block_sparse_moe.experts.222.w1", "model.layers.6.block_sparse_moe.experts.223.w1", "model.layers.6.block_sparse_moe.experts.224.w1", "model.layers.6.block_sparse_moe.experts.225.w1", "model.layers.6.block_sparse_moe.experts.226.w1", "model.layers.6.block_sparse_moe.experts.227.w1", "model.layers.6.block_sparse_moe.experts.228.w1", "model.layers.6.block_sparse_moe.experts.229.w1", "model.layers.6.block_sparse_moe.experts.230.w1", "model.layers.6.block_sparse_moe.experts.231.w1", "model.layers.6.block_sparse_moe.experts.232.w1", "model.layers.6.block_sparse_moe.experts.233.w1", "model.layers.6.block_sparse_moe.experts.234.w1", "model.layers.6.block_sparse_moe.experts.235.w1", "model.layers.6.block_sparse_moe.experts.236.w1", "model.layers.6.block_sparse_moe.experts.237.w1", "model.layers.6.block_sparse_moe.experts.238.w1", "model.layers.6.block_sparse_moe.experts.239.w1", "model.layers.6.block_sparse_moe.experts.240.w1", "model.layers.6.block_sparse_moe.experts.241.w1", "model.layers.6.block_sparse_moe.experts.242.w1", "model.layers.6.block_sparse_moe.experts.243.w1", "model.layers.6.block_sparse_moe.experts.244.w1", "model.layers.6.block_sparse_moe.experts.245.w1", "model.layers.6.block_sparse_moe.experts.246.w1", "model.layers.6.block_sparse_moe.experts.247.w1", "model.layers.6.block_sparse_moe.experts.248.w1", "model.layers.6.block_sparse_moe.experts.249.w1", "model.layers.6.block_sparse_moe.experts.250.w1", "model.layers.6.block_sparse_moe.experts.251.w1", "model.layers.6.block_sparse_moe.experts.252.w1", "model.layers.6.block_sparse_moe.experts.253.w1", "model.layers.6.block_sparse_moe.experts.254.w1", "model.layers.6.block_sparse_moe.experts.255.w1", "model.layers.6.block_sparse_moe.experts.0.w3", "model.layers.6.block_sparse_moe.experts.1.w3", "model.layers.6.block_sparse_moe.experts.2.w3", "model.layers.6.block_sparse_moe.experts.3.w3", "model.layers.6.block_sparse_moe.experts.4.w3", "model.layers.6.block_sparse_moe.experts.5.w3", "model.layers.6.block_sparse_moe.experts.6.w3", "model.layers.6.block_sparse_moe.experts.7.w3", "model.layers.6.block_sparse_moe.experts.8.w3", "model.layers.6.block_sparse_moe.experts.9.w3", "model.layers.6.block_sparse_moe.experts.10.w3", "model.layers.6.block_sparse_moe.experts.11.w3", "model.layers.6.block_sparse_moe.experts.12.w3", "model.layers.6.block_sparse_moe.experts.13.w3", "model.layers.6.block_sparse_moe.experts.14.w3", "model.layers.6.block_sparse_moe.experts.15.w3", "model.layers.6.block_sparse_moe.experts.16.w3", "model.layers.6.block_sparse_moe.experts.17.w3", "model.layers.6.block_sparse_moe.experts.18.w3", "model.layers.6.block_sparse_moe.experts.19.w3", "model.layers.6.block_sparse_moe.experts.20.w3", "model.layers.6.block_sparse_moe.experts.21.w3", "model.layers.6.block_sparse_moe.experts.22.w3", "model.layers.6.block_sparse_moe.experts.23.w3", "model.layers.6.block_sparse_moe.experts.24.w3", "model.layers.6.block_sparse_moe.experts.25.w3", "model.layers.6.block_sparse_moe.experts.26.w3", "model.layers.6.block_sparse_moe.experts.27.w3", "model.layers.6.block_sparse_moe.experts.28.w3", "model.layers.6.block_sparse_moe.experts.29.w3", "model.layers.6.block_sparse_moe.experts.30.w3", "model.layers.6.block_sparse_moe.experts.31.w3", "model.layers.6.block_sparse_moe.experts.32.w3", "model.layers.6.block_sparse_moe.experts.33.w3", "model.layers.6.block_sparse_moe.experts.34.w3", "model.layers.6.block_sparse_moe.experts.35.w3", "model.layers.6.block_sparse_moe.experts.36.w3", "model.layers.6.block_sparse_moe.experts.37.w3", "model.layers.6.block_sparse_moe.experts.38.w3", "model.layers.6.block_sparse_moe.experts.39.w3", "model.layers.6.block_sparse_moe.experts.40.w3", "model.layers.6.block_sparse_moe.experts.41.w3", "model.layers.6.block_sparse_moe.experts.42.w3", "model.layers.6.block_sparse_moe.experts.43.w3", "model.layers.6.block_sparse_moe.experts.44.w3", "model.layers.6.block_sparse_moe.experts.45.w3", "model.layers.6.block_sparse_moe.experts.46.w3", "model.layers.6.block_sparse_moe.experts.47.w3", "model.layers.6.block_sparse_moe.experts.48.w3", "model.layers.6.block_sparse_moe.experts.49.w3", "model.layers.6.block_sparse_moe.experts.50.w3", "model.layers.6.block_sparse_moe.experts.51.w3", "model.layers.6.block_sparse_moe.experts.52.w3", "model.layers.6.block_sparse_moe.experts.53.w3", "model.layers.6.block_sparse_moe.experts.54.w3", "model.layers.6.block_sparse_moe.experts.55.w3", "model.layers.6.block_sparse_moe.experts.56.w3", "model.layers.6.block_sparse_moe.experts.57.w3", "model.layers.6.block_sparse_moe.experts.58.w3", "model.layers.6.block_sparse_moe.experts.59.w3", "model.layers.6.block_sparse_moe.experts.60.w3", "model.layers.6.block_sparse_moe.experts.61.w3", "model.layers.6.block_sparse_moe.experts.62.w3", "model.layers.6.block_sparse_moe.experts.63.w3", "model.layers.6.block_sparse_moe.experts.64.w3", "model.layers.6.block_sparse_moe.experts.65.w3", "model.layers.6.block_sparse_moe.experts.66.w3", "model.layers.6.block_sparse_moe.experts.67.w3", "model.layers.6.block_sparse_moe.experts.68.w3", "model.layers.6.block_sparse_moe.experts.69.w3", "model.layers.6.block_sparse_moe.experts.70.w3", "model.layers.6.block_sparse_moe.experts.71.w3", "model.layers.6.block_sparse_moe.experts.72.w3", "model.layers.6.block_sparse_moe.experts.73.w3", "model.layers.6.block_sparse_moe.experts.74.w3", "model.layers.6.block_sparse_moe.experts.75.w3", "model.layers.6.block_sparse_moe.experts.76.w3", "model.layers.6.block_sparse_moe.experts.77.w3", "model.layers.6.block_sparse_moe.experts.78.w3", "model.layers.6.block_sparse_moe.experts.79.w3", "model.layers.6.block_sparse_moe.experts.80.w3", "model.layers.6.block_sparse_moe.experts.81.w3", "model.layers.6.block_sparse_moe.experts.82.w3", "model.layers.6.block_sparse_moe.experts.83.w3", "model.layers.6.block_sparse_moe.experts.84.w3", "model.layers.6.block_sparse_moe.experts.85.w3", "model.layers.6.block_sparse_moe.experts.86.w3", "model.layers.6.block_sparse_moe.experts.87.w3", "model.layers.6.block_sparse_moe.experts.88.w3", "model.layers.6.block_sparse_moe.experts.89.w3", "model.layers.6.block_sparse_moe.experts.90.w3", "model.layers.6.block_sparse_moe.experts.91.w3", "model.layers.6.block_sparse_moe.experts.92.w3", "model.layers.6.block_sparse_moe.experts.93.w3", "model.layers.6.block_sparse_moe.experts.94.w3", "model.layers.6.block_sparse_moe.experts.95.w3", "model.layers.6.block_sparse_moe.experts.96.w3", "model.layers.6.block_sparse_moe.experts.97.w3", "model.layers.6.block_sparse_moe.experts.98.w3", "model.layers.6.block_sparse_moe.experts.99.w3", "model.layers.6.block_sparse_moe.experts.100.w3", "model.layers.6.block_sparse_moe.experts.101.w3", "model.layers.6.block_sparse_moe.experts.102.w3", "model.layers.6.block_sparse_moe.experts.103.w3", "model.layers.6.block_sparse_moe.experts.104.w3", "model.layers.6.block_sparse_moe.experts.105.w3", "model.layers.6.block_sparse_moe.experts.106.w3", "model.layers.6.block_sparse_moe.experts.107.w3", "model.layers.6.block_sparse_moe.experts.108.w3", "model.layers.6.block_sparse_moe.experts.109.w3", "model.layers.6.block_sparse_moe.experts.110.w3", "model.layers.6.block_sparse_moe.experts.111.w3", "model.layers.6.block_sparse_moe.experts.112.w3", "model.layers.6.block_sparse_moe.experts.113.w3", "model.layers.6.block_sparse_moe.experts.114.w3", "model.layers.6.block_sparse_moe.experts.115.w3", "model.layers.6.block_sparse_moe.experts.116.w3", "model.layers.6.block_sparse_moe.experts.117.w3", "model.layers.6.block_sparse_moe.experts.118.w3", "model.layers.6.block_sparse_moe.experts.119.w3", "model.layers.6.block_sparse_moe.experts.120.w3", "model.layers.6.block_sparse_moe.experts.121.w3", "model.layers.6.block_sparse_moe.experts.122.w3", "model.layers.6.block_sparse_moe.experts.123.w3", "model.layers.6.block_sparse_moe.experts.124.w3", "model.layers.6.block_sparse_moe.experts.125.w3", "model.layers.6.block_sparse_moe.experts.126.w3", "model.layers.6.block_sparse_moe.experts.127.w3", "model.layers.6.block_sparse_moe.experts.128.w3", "model.layers.6.block_sparse_moe.experts.129.w3", "model.layers.6.block_sparse_moe.experts.130.w3", "model.layers.6.block_sparse_moe.experts.131.w3", "model.layers.6.block_sparse_moe.experts.132.w3", "model.layers.6.block_sparse_moe.experts.133.w3", "model.layers.6.block_sparse_moe.experts.134.w3", "model.layers.6.block_sparse_moe.experts.135.w3", "model.layers.6.block_sparse_moe.experts.136.w3", "model.layers.6.block_sparse_moe.experts.137.w3", "model.layers.6.block_sparse_moe.experts.138.w3", "model.layers.6.block_sparse_moe.experts.139.w3", "model.layers.6.block_sparse_moe.experts.140.w3", "model.layers.6.block_sparse_moe.experts.141.w3", "model.layers.6.block_sparse_moe.experts.142.w3", "model.layers.6.block_sparse_moe.experts.143.w3", "model.layers.6.block_sparse_moe.experts.144.w3", "model.layers.6.block_sparse_moe.experts.145.w3", "model.layers.6.block_sparse_moe.experts.146.w3", "model.layers.6.block_sparse_moe.experts.147.w3", "model.layers.6.block_sparse_moe.experts.148.w3", "model.layers.6.block_sparse_moe.experts.149.w3", "model.layers.6.block_sparse_moe.experts.150.w3", "model.layers.6.block_sparse_moe.experts.151.w3", "model.layers.6.block_sparse_moe.experts.152.w3", "model.layers.6.block_sparse_moe.experts.153.w3", "model.layers.6.block_sparse_moe.experts.154.w3", "model.layers.6.block_sparse_moe.experts.155.w3", "model.layers.6.block_sparse_moe.experts.156.w3", "model.layers.6.block_sparse_moe.experts.157.w3", "model.layers.6.block_sparse_moe.experts.158.w3", "model.layers.6.block_sparse_moe.experts.159.w3", "model.layers.6.block_sparse_moe.experts.160.w3", "model.layers.6.block_sparse_moe.experts.161.w3", "model.layers.6.block_sparse_moe.experts.162.w3", "model.layers.6.block_sparse_moe.experts.163.w3", "model.layers.6.block_sparse_moe.experts.164.w3", "model.layers.6.block_sparse_moe.experts.165.w3", "model.layers.6.block_sparse_moe.experts.166.w3", "model.layers.6.block_sparse_moe.experts.167.w3", "model.layers.6.block_sparse_moe.experts.168.w3", "model.layers.6.block_sparse_moe.experts.169.w3", "model.layers.6.block_sparse_moe.experts.170.w3", "model.layers.6.block_sparse_moe.experts.171.w3", "model.layers.6.block_sparse_moe.experts.172.w3", "model.layers.6.block_sparse_moe.experts.173.w3", "model.layers.6.block_sparse_moe.experts.174.w3", "model.layers.6.block_sparse_moe.experts.175.w3", "model.layers.6.block_sparse_moe.experts.176.w3", "model.layers.6.block_sparse_moe.experts.177.w3", "model.layers.6.block_sparse_moe.experts.178.w3", "model.layers.6.block_sparse_moe.experts.179.w3", "model.layers.6.block_sparse_moe.experts.180.w3", "model.layers.6.block_sparse_moe.experts.181.w3", "model.layers.6.block_sparse_moe.experts.182.w3", "model.layers.6.block_sparse_moe.experts.183.w3", "model.layers.6.block_sparse_moe.experts.184.w3", "model.layers.6.block_sparse_moe.experts.185.w3", "model.layers.6.block_sparse_moe.experts.186.w3", "model.layers.6.block_sparse_moe.experts.187.w3", "model.layers.6.block_sparse_moe.experts.188.w3", "model.layers.6.block_sparse_moe.experts.189.w3", "model.layers.6.block_sparse_moe.experts.190.w3", "model.layers.6.block_sparse_moe.experts.191.w3", "model.layers.6.block_sparse_moe.experts.192.w3", "model.layers.6.block_sparse_moe.experts.193.w3", "model.layers.6.block_sparse_moe.experts.194.w3", "model.layers.6.block_sparse_moe.experts.195.w3", "model.layers.6.block_sparse_moe.experts.196.w3", "model.layers.6.block_sparse_moe.experts.197.w3", "model.layers.6.block_sparse_moe.experts.198.w3", "model.layers.6.block_sparse_moe.experts.199.w3", "model.layers.6.block_sparse_moe.experts.200.w3", "model.layers.6.block_sparse_moe.experts.201.w3", "model.layers.6.block_sparse_moe.experts.202.w3", "model.layers.6.block_sparse_moe.experts.203.w3", "model.layers.6.block_sparse_moe.experts.204.w3", "model.layers.6.block_sparse_moe.experts.205.w3", "model.layers.6.block_sparse_moe.experts.206.w3", "model.layers.6.block_sparse_moe.experts.207.w3", "model.layers.6.block_sparse_moe.experts.208.w3", "model.layers.6.block_sparse_moe.experts.209.w3", "model.layers.6.block_sparse_moe.experts.210.w3", "model.layers.6.block_sparse_moe.experts.211.w3", "model.layers.6.block_sparse_moe.experts.212.w3", "model.layers.6.block_sparse_moe.experts.213.w3", "model.layers.6.block_sparse_moe.experts.214.w3", "model.layers.6.block_sparse_moe.experts.215.w3", "model.layers.6.block_sparse_moe.experts.216.w3", "model.layers.6.block_sparse_moe.experts.217.w3", "model.layers.6.block_sparse_moe.experts.218.w3", "model.layers.6.block_sparse_moe.experts.219.w3", "model.layers.6.block_sparse_moe.experts.220.w3", "model.layers.6.block_sparse_moe.experts.221.w3", "model.layers.6.block_sparse_moe.experts.222.w3", "model.layers.6.block_sparse_moe.experts.223.w3", "model.layers.6.block_sparse_moe.experts.224.w3", "model.layers.6.block_sparse_moe.experts.225.w3", "model.layers.6.block_sparse_moe.experts.226.w3", "model.layers.6.block_sparse_moe.experts.227.w3", "model.layers.6.block_sparse_moe.experts.228.w3", "model.layers.6.block_sparse_moe.experts.229.w3", "model.layers.6.block_sparse_moe.experts.230.w3", "model.layers.6.block_sparse_moe.experts.231.w3", "model.layers.6.block_sparse_moe.experts.232.w3", "model.layers.6.block_sparse_moe.experts.233.w3", "model.layers.6.block_sparse_moe.experts.234.w3", "model.layers.6.block_sparse_moe.experts.235.w3", "model.layers.6.block_sparse_moe.experts.236.w3", "model.layers.6.block_sparse_moe.experts.237.w3", "model.layers.6.block_sparse_moe.experts.238.w3", "model.layers.6.block_sparse_moe.experts.239.w3", "model.layers.6.block_sparse_moe.experts.240.w3", "model.layers.6.block_sparse_moe.experts.241.w3", "model.layers.6.block_sparse_moe.experts.242.w3", "model.layers.6.block_sparse_moe.experts.243.w3", "model.layers.6.block_sparse_moe.experts.244.w3", "model.layers.6.block_sparse_moe.experts.245.w3", "model.layers.6.block_sparse_moe.experts.246.w3", "model.layers.6.block_sparse_moe.experts.247.w3", "model.layers.6.block_sparse_moe.experts.248.w3", "model.layers.6.block_sparse_moe.experts.249.w3", "model.layers.6.block_sparse_moe.experts.250.w3", "model.layers.6.block_sparse_moe.experts.251.w3", "model.layers.6.block_sparse_moe.experts.252.w3", "model.layers.6.block_sparse_moe.experts.253.w3", "model.layers.6.block_sparse_moe.experts.254.w3", "model.layers.6.block_sparse_moe.experts.255.w3", "model.layers.6.block_sparse_moe.experts.0.w2", "model.layers.6.block_sparse_moe.experts.1.w2", "model.layers.6.block_sparse_moe.experts.2.w2", "model.layers.6.block_sparse_moe.experts.3.w2", "model.layers.6.block_sparse_moe.experts.4.w2", "model.layers.6.block_sparse_moe.experts.5.w2", "model.layers.6.block_sparse_moe.experts.6.w2", "model.layers.6.block_sparse_moe.experts.7.w2", "model.layers.6.block_sparse_moe.experts.8.w2", "model.layers.6.block_sparse_moe.experts.9.w2", "model.layers.6.block_sparse_moe.experts.10.w2", "model.layers.6.block_sparse_moe.experts.11.w2", "model.layers.6.block_sparse_moe.experts.12.w2", "model.layers.6.block_sparse_moe.experts.13.w2", "model.layers.6.block_sparse_moe.experts.14.w2", "model.layers.6.block_sparse_moe.experts.15.w2", "model.layers.6.block_sparse_moe.experts.16.w2", "model.layers.6.block_sparse_moe.experts.17.w2", "model.layers.6.block_sparse_moe.experts.18.w2", "model.layers.6.block_sparse_moe.experts.19.w2", "model.layers.6.block_sparse_moe.experts.20.w2", "model.layers.6.block_sparse_moe.experts.21.w2", "model.layers.6.block_sparse_moe.experts.22.w2", "model.layers.6.block_sparse_moe.experts.23.w2", "model.layers.6.block_sparse_moe.experts.24.w2", "model.layers.6.block_sparse_moe.experts.25.w2", "model.layers.6.block_sparse_moe.experts.26.w2", "model.layers.6.block_sparse_moe.experts.27.w2", "model.layers.6.block_sparse_moe.experts.28.w2", "model.layers.6.block_sparse_moe.experts.29.w2", "model.layers.6.block_sparse_moe.experts.30.w2", "model.layers.6.block_sparse_moe.experts.31.w2", "model.layers.6.block_sparse_moe.experts.32.w2", "model.layers.6.block_sparse_moe.experts.33.w2", "model.layers.6.block_sparse_moe.experts.34.w2", "model.layers.6.block_sparse_moe.experts.35.w2", "model.layers.6.block_sparse_moe.experts.36.w2", "model.layers.6.block_sparse_moe.experts.37.w2", "model.layers.6.block_sparse_moe.experts.38.w2", "model.layers.6.block_sparse_moe.experts.39.w2", "model.layers.6.block_sparse_moe.experts.40.w2", "model.layers.6.block_sparse_moe.experts.41.w2", "model.layers.6.block_sparse_moe.experts.42.w2", "model.layers.6.block_sparse_moe.experts.43.w2", "model.layers.6.block_sparse_moe.experts.44.w2", "model.layers.6.block_sparse_moe.experts.45.w2", "model.layers.6.block_sparse_moe.experts.46.w2", "model.layers.6.block_sparse_moe.experts.47.w2", "model.layers.6.block_sparse_moe.experts.48.w2", "model.layers.6.block_sparse_moe.experts.49.w2", "model.layers.6.block_sparse_moe.experts.50.w2", "model.layers.6.block_sparse_moe.experts.51.w2", "model.layers.6.block_sparse_moe.experts.52.w2", "model.layers.6.block_sparse_moe.experts.53.w2", "model.layers.6.block_sparse_moe.experts.54.w2", "model.layers.6.block_sparse_moe.experts.55.w2", "model.layers.6.block_sparse_moe.experts.56.w2", "model.layers.6.block_sparse_moe.experts.57.w2", "model.layers.6.block_sparse_moe.experts.58.w2", "model.layers.6.block_sparse_moe.experts.59.w2", "model.layers.6.block_sparse_moe.experts.60.w2", "model.layers.6.block_sparse_moe.experts.61.w2", "model.layers.6.block_sparse_moe.experts.62.w2", "model.layers.6.block_sparse_moe.experts.63.w2", "model.layers.6.block_sparse_moe.experts.64.w2", "model.layers.6.block_sparse_moe.experts.65.w2", "model.layers.6.block_sparse_moe.experts.66.w2", "model.layers.6.block_sparse_moe.experts.67.w2", "model.layers.6.block_sparse_moe.experts.68.w2", "model.layers.6.block_sparse_moe.experts.69.w2", "model.layers.6.block_sparse_moe.experts.70.w2", "model.layers.6.block_sparse_moe.experts.71.w2", "model.layers.6.block_sparse_moe.experts.72.w2", "model.layers.6.block_sparse_moe.experts.73.w2", "model.layers.6.block_sparse_moe.experts.74.w2", "model.layers.6.block_sparse_moe.experts.75.w2", "model.layers.6.block_sparse_moe.experts.76.w2", "model.layers.6.block_sparse_moe.experts.77.w2", "model.layers.6.block_sparse_moe.experts.78.w2", "model.layers.6.block_sparse_moe.experts.79.w2", "model.layers.6.block_sparse_moe.experts.80.w2", "model.layers.6.block_sparse_moe.experts.81.w2", "model.layers.6.block_sparse_moe.experts.82.w2", "model.layers.6.block_sparse_moe.experts.83.w2", "model.layers.6.block_sparse_moe.experts.84.w2", "model.layers.6.block_sparse_moe.experts.85.w2", "model.layers.6.block_sparse_moe.experts.86.w2", "model.layers.6.block_sparse_moe.experts.87.w2", "model.layers.6.block_sparse_moe.experts.88.w2", "model.layers.6.block_sparse_moe.experts.89.w2", "model.layers.6.block_sparse_moe.experts.90.w2", "model.layers.6.block_sparse_moe.experts.91.w2", "model.layers.6.block_sparse_moe.experts.92.w2", "model.layers.6.block_sparse_moe.experts.93.w2", "model.layers.6.block_sparse_moe.experts.94.w2", "model.layers.6.block_sparse_moe.experts.95.w2", "model.layers.6.block_sparse_moe.experts.96.w2", "model.layers.6.block_sparse_moe.experts.97.w2", "model.layers.6.block_sparse_moe.experts.98.w2", "model.layers.6.block_sparse_moe.experts.99.w2", "model.layers.6.block_sparse_moe.experts.100.w2", "model.layers.6.block_sparse_moe.experts.101.w2", "model.layers.6.block_sparse_moe.experts.102.w2", "model.layers.6.block_sparse_moe.experts.103.w2", "model.layers.6.block_sparse_moe.experts.104.w2", "model.layers.6.block_sparse_moe.experts.105.w2", "model.layers.6.block_sparse_moe.experts.106.w2", "model.layers.6.block_sparse_moe.experts.107.w2", "model.layers.6.block_sparse_moe.experts.108.w2", "model.layers.6.block_sparse_moe.experts.109.w2", "model.layers.6.block_sparse_moe.experts.110.w2", "model.layers.6.block_sparse_moe.experts.111.w2", "model.layers.6.block_sparse_moe.experts.112.w2", "model.layers.6.block_sparse_moe.experts.113.w2", "model.layers.6.block_sparse_moe.experts.114.w2", "model.layers.6.block_sparse_moe.experts.115.w2", "model.layers.6.block_sparse_moe.experts.116.w2", "model.layers.6.block_sparse_moe.experts.117.w2", "model.layers.6.block_sparse_moe.experts.118.w2", "model.layers.6.block_sparse_moe.experts.119.w2", "model.layers.6.block_sparse_moe.experts.120.w2", "model.layers.6.block_sparse_moe.experts.121.w2", "model.layers.6.block_sparse_moe.experts.122.w2", "model.layers.6.block_sparse_moe.experts.123.w2", "model.layers.6.block_sparse_moe.experts.124.w2", "model.layers.6.block_sparse_moe.experts.125.w2", "model.layers.6.block_sparse_moe.experts.126.w2", "model.layers.6.block_sparse_moe.experts.127.w2", "model.layers.6.block_sparse_moe.experts.128.w2", "model.layers.6.block_sparse_moe.experts.129.w2", "model.layers.6.block_sparse_moe.experts.130.w2", "model.layers.6.block_sparse_moe.experts.131.w2", "model.layers.6.block_sparse_moe.experts.132.w2", "model.layers.6.block_sparse_moe.experts.133.w2", "model.layers.6.block_sparse_moe.experts.134.w2", "model.layers.6.block_sparse_moe.experts.135.w2", "model.layers.6.block_sparse_moe.experts.136.w2", "model.layers.6.block_sparse_moe.experts.137.w2", "model.layers.6.block_sparse_moe.experts.138.w2", "model.layers.6.block_sparse_moe.experts.139.w2", "model.layers.6.block_sparse_moe.experts.140.w2", "model.layers.6.block_sparse_moe.experts.141.w2", "model.layers.6.block_sparse_moe.experts.142.w2", "model.layers.6.block_sparse_moe.experts.143.w2", "model.layers.6.block_sparse_moe.experts.144.w2", "model.layers.6.block_sparse_moe.experts.145.w2", "model.layers.6.block_sparse_moe.experts.146.w2", "model.layers.6.block_sparse_moe.experts.147.w2", "model.layers.6.block_sparse_moe.experts.148.w2", "model.layers.6.block_sparse_moe.experts.149.w2", "model.layers.6.block_sparse_moe.experts.150.w2", "model.layers.6.block_sparse_moe.experts.151.w2", "model.layers.6.block_sparse_moe.experts.152.w2", "model.layers.6.block_sparse_moe.experts.153.w2", "model.layers.6.block_sparse_moe.experts.154.w2", "model.layers.6.block_sparse_moe.experts.155.w2", "model.layers.6.block_sparse_moe.experts.156.w2", "model.layers.6.block_sparse_moe.experts.157.w2", "model.layers.6.block_sparse_moe.experts.158.w2", "model.layers.6.block_sparse_moe.experts.159.w2", "model.layers.6.block_sparse_moe.experts.160.w2", "model.layers.6.block_sparse_moe.experts.161.w2", "model.layers.6.block_sparse_moe.experts.162.w2", "model.layers.6.block_sparse_moe.experts.163.w2", "model.layers.6.block_sparse_moe.experts.164.w2", "model.layers.6.block_sparse_moe.experts.165.w2", "model.layers.6.block_sparse_moe.experts.166.w2", "model.layers.6.block_sparse_moe.experts.167.w2", "model.layers.6.block_sparse_moe.experts.168.w2", "model.layers.6.block_sparse_moe.experts.169.w2", "model.layers.6.block_sparse_moe.experts.170.w2", "model.layers.6.block_sparse_moe.experts.171.w2", "model.layers.6.block_sparse_moe.experts.172.w2", "model.layers.6.block_sparse_moe.experts.173.w2", "model.layers.6.block_sparse_moe.experts.174.w2", "model.layers.6.block_sparse_moe.experts.175.w2", "model.layers.6.block_sparse_moe.experts.176.w2", "model.layers.6.block_sparse_moe.experts.177.w2", "model.layers.6.block_sparse_moe.experts.178.w2", "model.layers.6.block_sparse_moe.experts.179.w2", "model.layers.6.block_sparse_moe.experts.180.w2", "model.layers.6.block_sparse_moe.experts.181.w2", "model.layers.6.block_sparse_moe.experts.182.w2", "model.layers.6.block_sparse_moe.experts.183.w2", "model.layers.6.block_sparse_moe.experts.184.w2", "model.layers.6.block_sparse_moe.experts.185.w2", "model.layers.6.block_sparse_moe.experts.186.w2", "model.layers.6.block_sparse_moe.experts.187.w2", "model.layers.6.block_sparse_moe.experts.188.w2", "model.layers.6.block_sparse_moe.experts.189.w2", "model.layers.6.block_sparse_moe.experts.190.w2", "model.layers.6.block_sparse_moe.experts.191.w2", "model.layers.6.block_sparse_moe.experts.192.w2", "model.layers.6.block_sparse_moe.experts.193.w2", "model.layers.6.block_sparse_moe.experts.194.w2", "model.layers.6.block_sparse_moe.experts.195.w2", "model.layers.6.block_sparse_moe.experts.196.w2", "model.layers.6.block_sparse_moe.experts.197.w2", "model.layers.6.block_sparse_moe.experts.198.w2", "model.layers.6.block_sparse_moe.experts.199.w2", "model.layers.6.block_sparse_moe.experts.200.w2", "model.layers.6.block_sparse_moe.experts.201.w2", "model.layers.6.block_sparse_moe.experts.202.w2", "model.layers.6.block_sparse_moe.experts.203.w2", "model.layers.6.block_sparse_moe.experts.204.w2", "model.layers.6.block_sparse_moe.experts.205.w2", "model.layers.6.block_sparse_moe.experts.206.w2", "model.layers.6.block_sparse_moe.experts.207.w2", "model.layers.6.block_sparse_moe.experts.208.w2", "model.layers.6.block_sparse_moe.experts.209.w2", "model.layers.6.block_sparse_moe.experts.210.w2", "model.layers.6.block_sparse_moe.experts.211.w2", "model.layers.6.block_sparse_moe.experts.212.w2", "model.layers.6.block_sparse_moe.experts.213.w2", "model.layers.6.block_sparse_moe.experts.214.w2", "model.layers.6.block_sparse_moe.experts.215.w2", "model.layers.6.block_sparse_moe.experts.216.w2", "model.layers.6.block_sparse_moe.experts.217.w2", "model.layers.6.block_sparse_moe.experts.218.w2", "model.layers.6.block_sparse_moe.experts.219.w2", "model.layers.6.block_sparse_moe.experts.220.w2", "model.layers.6.block_sparse_moe.experts.221.w2", "model.layers.6.block_sparse_moe.experts.222.w2", "model.layers.6.block_sparse_moe.experts.223.w2", "model.layers.6.block_sparse_moe.experts.224.w2", "model.layers.6.block_sparse_moe.experts.225.w2", "model.layers.6.block_sparse_moe.experts.226.w2", "model.layers.6.block_sparse_moe.experts.227.w2", "model.layers.6.block_sparse_moe.experts.228.w2", "model.layers.6.block_sparse_moe.experts.229.w2", "model.layers.6.block_sparse_moe.experts.230.w2", "model.layers.6.block_sparse_moe.experts.231.w2", "model.layers.6.block_sparse_moe.experts.232.w2", "model.layers.6.block_sparse_moe.experts.233.w2", "model.layers.6.block_sparse_moe.experts.234.w2", "model.layers.6.block_sparse_moe.experts.235.w2", "model.layers.6.block_sparse_moe.experts.236.w2", "model.layers.6.block_sparse_moe.experts.237.w2", "model.layers.6.block_sparse_moe.experts.238.w2", "model.layers.6.block_sparse_moe.experts.239.w2", "model.layers.6.block_sparse_moe.experts.240.w2", "model.layers.6.block_sparse_moe.experts.241.w2", "model.layers.6.block_sparse_moe.experts.242.w2", "model.layers.6.block_sparse_moe.experts.243.w2", "model.layers.6.block_sparse_moe.experts.244.w2", "model.layers.6.block_sparse_moe.experts.245.w2", "model.layers.6.block_sparse_moe.experts.246.w2", "model.layers.6.block_sparse_moe.experts.247.w2", "model.layers.6.block_sparse_moe.experts.248.w2", "model.layers.6.block_sparse_moe.experts.249.w2", "model.layers.6.block_sparse_moe.experts.250.w2", "model.layers.6.block_sparse_moe.experts.251.w2", "model.layers.6.block_sparse_moe.experts.252.w2", "model.layers.6.block_sparse_moe.experts.253.w2", "model.layers.6.block_sparse_moe.experts.254.w2", "model.layers.6.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005089685320854298, "dbits": 3623878656 } ] }, { "idx": 14, "layers": [ "model.layers.7.self_attn.q_proj", "model.layers.7.self_attn.k_proj", "model.layers.7.self_attn.v_proj", "model.layers.7.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0007911615073680767, "dbits": 44040192 } ] }, { "idx": 15, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w1", "model.layers.7.block_sparse_moe.experts.1.w1", "model.layers.7.block_sparse_moe.experts.2.w1", "model.layers.7.block_sparse_moe.experts.3.w1", "model.layers.7.block_sparse_moe.experts.4.w1", "model.layers.7.block_sparse_moe.experts.5.w1", "model.layers.7.block_sparse_moe.experts.6.w1", "model.layers.7.block_sparse_moe.experts.7.w1", "model.layers.7.block_sparse_moe.experts.8.w1", "model.layers.7.block_sparse_moe.experts.9.w1", "model.layers.7.block_sparse_moe.experts.10.w1", "model.layers.7.block_sparse_moe.experts.11.w1", "model.layers.7.block_sparse_moe.experts.12.w1", "model.layers.7.block_sparse_moe.experts.13.w1", "model.layers.7.block_sparse_moe.experts.14.w1", "model.layers.7.block_sparse_moe.experts.15.w1", "model.layers.7.block_sparse_moe.experts.16.w1", "model.layers.7.block_sparse_moe.experts.17.w1", "model.layers.7.block_sparse_moe.experts.18.w1", "model.layers.7.block_sparse_moe.experts.19.w1", "model.layers.7.block_sparse_moe.experts.20.w1", "model.layers.7.block_sparse_moe.experts.21.w1", "model.layers.7.block_sparse_moe.experts.22.w1", "model.layers.7.block_sparse_moe.experts.23.w1", "model.layers.7.block_sparse_moe.experts.24.w1", "model.layers.7.block_sparse_moe.experts.25.w1", "model.layers.7.block_sparse_moe.experts.26.w1", "model.layers.7.block_sparse_moe.experts.27.w1", "model.layers.7.block_sparse_moe.experts.28.w1", "model.layers.7.block_sparse_moe.experts.29.w1", "model.layers.7.block_sparse_moe.experts.30.w1", "model.layers.7.block_sparse_moe.experts.31.w1", "model.layers.7.block_sparse_moe.experts.32.w1", "model.layers.7.block_sparse_moe.experts.33.w1", "model.layers.7.block_sparse_moe.experts.34.w1", "model.layers.7.block_sparse_moe.experts.35.w1", "model.layers.7.block_sparse_moe.experts.36.w1", "model.layers.7.block_sparse_moe.experts.37.w1", "model.layers.7.block_sparse_moe.experts.38.w1", "model.layers.7.block_sparse_moe.experts.39.w1", "model.layers.7.block_sparse_moe.experts.40.w1", "model.layers.7.block_sparse_moe.experts.41.w1", "model.layers.7.block_sparse_moe.experts.42.w1", "model.layers.7.block_sparse_moe.experts.43.w1", "model.layers.7.block_sparse_moe.experts.44.w1", "model.layers.7.block_sparse_moe.experts.45.w1", "model.layers.7.block_sparse_moe.experts.46.w1", "model.layers.7.block_sparse_moe.experts.47.w1", "model.layers.7.block_sparse_moe.experts.48.w1", "model.layers.7.block_sparse_moe.experts.49.w1", "model.layers.7.block_sparse_moe.experts.50.w1", "model.layers.7.block_sparse_moe.experts.51.w1", "model.layers.7.block_sparse_moe.experts.52.w1", "model.layers.7.block_sparse_moe.experts.53.w1", "model.layers.7.block_sparse_moe.experts.54.w1", "model.layers.7.block_sparse_moe.experts.55.w1", "model.layers.7.block_sparse_moe.experts.56.w1", "model.layers.7.block_sparse_moe.experts.57.w1", "model.layers.7.block_sparse_moe.experts.58.w1", "model.layers.7.block_sparse_moe.experts.59.w1", "model.layers.7.block_sparse_moe.experts.60.w1", "model.layers.7.block_sparse_moe.experts.61.w1", "model.layers.7.block_sparse_moe.experts.62.w1", "model.layers.7.block_sparse_moe.experts.63.w1", "model.layers.7.block_sparse_moe.experts.64.w1", "model.layers.7.block_sparse_moe.experts.65.w1", "model.layers.7.block_sparse_moe.experts.66.w1", "model.layers.7.block_sparse_moe.experts.67.w1", "model.layers.7.block_sparse_moe.experts.68.w1", "model.layers.7.block_sparse_moe.experts.69.w1", "model.layers.7.block_sparse_moe.experts.70.w1", "model.layers.7.block_sparse_moe.experts.71.w1", "model.layers.7.block_sparse_moe.experts.72.w1", "model.layers.7.block_sparse_moe.experts.73.w1", "model.layers.7.block_sparse_moe.experts.74.w1", "model.layers.7.block_sparse_moe.experts.75.w1", "model.layers.7.block_sparse_moe.experts.76.w1", "model.layers.7.block_sparse_moe.experts.77.w1", "model.layers.7.block_sparse_moe.experts.78.w1", "model.layers.7.block_sparse_moe.experts.79.w1", "model.layers.7.block_sparse_moe.experts.80.w1", "model.layers.7.block_sparse_moe.experts.81.w1", "model.layers.7.block_sparse_moe.experts.82.w1", "model.layers.7.block_sparse_moe.experts.83.w1", "model.layers.7.block_sparse_moe.experts.84.w1", "model.layers.7.block_sparse_moe.experts.85.w1", "model.layers.7.block_sparse_moe.experts.86.w1", "model.layers.7.block_sparse_moe.experts.87.w1", "model.layers.7.block_sparse_moe.experts.88.w1", "model.layers.7.block_sparse_moe.experts.89.w1", "model.layers.7.block_sparse_moe.experts.90.w1", "model.layers.7.block_sparse_moe.experts.91.w1", "model.layers.7.block_sparse_moe.experts.92.w1", "model.layers.7.block_sparse_moe.experts.93.w1", "model.layers.7.block_sparse_moe.experts.94.w1", "model.layers.7.block_sparse_moe.experts.95.w1", "model.layers.7.block_sparse_moe.experts.96.w1", "model.layers.7.block_sparse_moe.experts.97.w1", "model.layers.7.block_sparse_moe.experts.98.w1", "model.layers.7.block_sparse_moe.experts.99.w1", "model.layers.7.block_sparse_moe.experts.100.w1", "model.layers.7.block_sparse_moe.experts.101.w1", "model.layers.7.block_sparse_moe.experts.102.w1", "model.layers.7.block_sparse_moe.experts.103.w1", "model.layers.7.block_sparse_moe.experts.104.w1", "model.layers.7.block_sparse_moe.experts.105.w1", "model.layers.7.block_sparse_moe.experts.106.w1", "model.layers.7.block_sparse_moe.experts.107.w1", "model.layers.7.block_sparse_moe.experts.108.w1", "model.layers.7.block_sparse_moe.experts.109.w1", "model.layers.7.block_sparse_moe.experts.110.w1", "model.layers.7.block_sparse_moe.experts.111.w1", "model.layers.7.block_sparse_moe.experts.112.w1", "model.layers.7.block_sparse_moe.experts.113.w1", "model.layers.7.block_sparse_moe.experts.114.w1", "model.layers.7.block_sparse_moe.experts.115.w1", "model.layers.7.block_sparse_moe.experts.116.w1", "model.layers.7.block_sparse_moe.experts.117.w1", "model.layers.7.block_sparse_moe.experts.118.w1", "model.layers.7.block_sparse_moe.experts.119.w1", "model.layers.7.block_sparse_moe.experts.120.w1", "model.layers.7.block_sparse_moe.experts.121.w1", "model.layers.7.block_sparse_moe.experts.122.w1", "model.layers.7.block_sparse_moe.experts.123.w1", "model.layers.7.block_sparse_moe.experts.124.w1", "model.layers.7.block_sparse_moe.experts.125.w1", "model.layers.7.block_sparse_moe.experts.126.w1", "model.layers.7.block_sparse_moe.experts.127.w1", "model.layers.7.block_sparse_moe.experts.128.w1", "model.layers.7.block_sparse_moe.experts.129.w1", "model.layers.7.block_sparse_moe.experts.130.w1", "model.layers.7.block_sparse_moe.experts.131.w1", "model.layers.7.block_sparse_moe.experts.132.w1", "model.layers.7.block_sparse_moe.experts.133.w1", "model.layers.7.block_sparse_moe.experts.134.w1", "model.layers.7.block_sparse_moe.experts.135.w1", "model.layers.7.block_sparse_moe.experts.136.w1", "model.layers.7.block_sparse_moe.experts.137.w1", "model.layers.7.block_sparse_moe.experts.138.w1", "model.layers.7.block_sparse_moe.experts.139.w1", "model.layers.7.block_sparse_moe.experts.140.w1", "model.layers.7.block_sparse_moe.experts.141.w1", "model.layers.7.block_sparse_moe.experts.142.w1", "model.layers.7.block_sparse_moe.experts.143.w1", "model.layers.7.block_sparse_moe.experts.144.w1", "model.layers.7.block_sparse_moe.experts.145.w1", "model.layers.7.block_sparse_moe.experts.146.w1", "model.layers.7.block_sparse_moe.experts.147.w1", "model.layers.7.block_sparse_moe.experts.148.w1", "model.layers.7.block_sparse_moe.experts.149.w1", "model.layers.7.block_sparse_moe.experts.150.w1", "model.layers.7.block_sparse_moe.experts.151.w1", "model.layers.7.block_sparse_moe.experts.152.w1", "model.layers.7.block_sparse_moe.experts.153.w1", "model.layers.7.block_sparse_moe.experts.154.w1", "model.layers.7.block_sparse_moe.experts.155.w1", "model.layers.7.block_sparse_moe.experts.156.w1", "model.layers.7.block_sparse_moe.experts.157.w1", "model.layers.7.block_sparse_moe.experts.158.w1", "model.layers.7.block_sparse_moe.experts.159.w1", "model.layers.7.block_sparse_moe.experts.160.w1", "model.layers.7.block_sparse_moe.experts.161.w1", "model.layers.7.block_sparse_moe.experts.162.w1", "model.layers.7.block_sparse_moe.experts.163.w1", "model.layers.7.block_sparse_moe.experts.164.w1", "model.layers.7.block_sparse_moe.experts.165.w1", "model.layers.7.block_sparse_moe.experts.166.w1", "model.layers.7.block_sparse_moe.experts.167.w1", "model.layers.7.block_sparse_moe.experts.168.w1", "model.layers.7.block_sparse_moe.experts.169.w1", "model.layers.7.block_sparse_moe.experts.170.w1", "model.layers.7.block_sparse_moe.experts.171.w1", "model.layers.7.block_sparse_moe.experts.172.w1", "model.layers.7.block_sparse_moe.experts.173.w1", "model.layers.7.block_sparse_moe.experts.174.w1", "model.layers.7.block_sparse_moe.experts.175.w1", "model.layers.7.block_sparse_moe.experts.176.w1", "model.layers.7.block_sparse_moe.experts.177.w1", "model.layers.7.block_sparse_moe.experts.178.w1", "model.layers.7.block_sparse_moe.experts.179.w1", "model.layers.7.block_sparse_moe.experts.180.w1", "model.layers.7.block_sparse_moe.experts.181.w1", "model.layers.7.block_sparse_moe.experts.182.w1", "model.layers.7.block_sparse_moe.experts.183.w1", "model.layers.7.block_sparse_moe.experts.184.w1", "model.layers.7.block_sparse_moe.experts.185.w1", "model.layers.7.block_sparse_moe.experts.186.w1", "model.layers.7.block_sparse_moe.experts.187.w1", "model.layers.7.block_sparse_moe.experts.188.w1", "model.layers.7.block_sparse_moe.experts.189.w1", "model.layers.7.block_sparse_moe.experts.190.w1", "model.layers.7.block_sparse_moe.experts.191.w1", "model.layers.7.block_sparse_moe.experts.192.w1", "model.layers.7.block_sparse_moe.experts.193.w1", "model.layers.7.block_sparse_moe.experts.194.w1", "model.layers.7.block_sparse_moe.experts.195.w1", "model.layers.7.block_sparse_moe.experts.196.w1", "model.layers.7.block_sparse_moe.experts.197.w1", "model.layers.7.block_sparse_moe.experts.198.w1", "model.layers.7.block_sparse_moe.experts.199.w1", "model.layers.7.block_sparse_moe.experts.200.w1", "model.layers.7.block_sparse_moe.experts.201.w1", "model.layers.7.block_sparse_moe.experts.202.w1", "model.layers.7.block_sparse_moe.experts.203.w1", "model.layers.7.block_sparse_moe.experts.204.w1", "model.layers.7.block_sparse_moe.experts.205.w1", "model.layers.7.block_sparse_moe.experts.206.w1", "model.layers.7.block_sparse_moe.experts.207.w1", "model.layers.7.block_sparse_moe.experts.208.w1", "model.layers.7.block_sparse_moe.experts.209.w1", "model.layers.7.block_sparse_moe.experts.210.w1", "model.layers.7.block_sparse_moe.experts.211.w1", "model.layers.7.block_sparse_moe.experts.212.w1", "model.layers.7.block_sparse_moe.experts.213.w1", "model.layers.7.block_sparse_moe.experts.214.w1", "model.layers.7.block_sparse_moe.experts.215.w1", "model.layers.7.block_sparse_moe.experts.216.w1", "model.layers.7.block_sparse_moe.experts.217.w1", "model.layers.7.block_sparse_moe.experts.218.w1", "model.layers.7.block_sparse_moe.experts.219.w1", "model.layers.7.block_sparse_moe.experts.220.w1", "model.layers.7.block_sparse_moe.experts.221.w1", "model.layers.7.block_sparse_moe.experts.222.w1", "model.layers.7.block_sparse_moe.experts.223.w1", "model.layers.7.block_sparse_moe.experts.224.w1", "model.layers.7.block_sparse_moe.experts.225.w1", "model.layers.7.block_sparse_moe.experts.226.w1", "model.layers.7.block_sparse_moe.experts.227.w1", "model.layers.7.block_sparse_moe.experts.228.w1", "model.layers.7.block_sparse_moe.experts.229.w1", "model.layers.7.block_sparse_moe.experts.230.w1", "model.layers.7.block_sparse_moe.experts.231.w1", "model.layers.7.block_sparse_moe.experts.232.w1", "model.layers.7.block_sparse_moe.experts.233.w1", "model.layers.7.block_sparse_moe.experts.234.w1", "model.layers.7.block_sparse_moe.experts.235.w1", "model.layers.7.block_sparse_moe.experts.236.w1", "model.layers.7.block_sparse_moe.experts.237.w1", "model.layers.7.block_sparse_moe.experts.238.w1", "model.layers.7.block_sparse_moe.experts.239.w1", "model.layers.7.block_sparse_moe.experts.240.w1", "model.layers.7.block_sparse_moe.experts.241.w1", "model.layers.7.block_sparse_moe.experts.242.w1", "model.layers.7.block_sparse_moe.experts.243.w1", "model.layers.7.block_sparse_moe.experts.244.w1", "model.layers.7.block_sparse_moe.experts.245.w1", "model.layers.7.block_sparse_moe.experts.246.w1", "model.layers.7.block_sparse_moe.experts.247.w1", "model.layers.7.block_sparse_moe.experts.248.w1", "model.layers.7.block_sparse_moe.experts.249.w1", "model.layers.7.block_sparse_moe.experts.250.w1", "model.layers.7.block_sparse_moe.experts.251.w1", "model.layers.7.block_sparse_moe.experts.252.w1", "model.layers.7.block_sparse_moe.experts.253.w1", "model.layers.7.block_sparse_moe.experts.254.w1", "model.layers.7.block_sparse_moe.experts.255.w1", "model.layers.7.block_sparse_moe.experts.0.w3", "model.layers.7.block_sparse_moe.experts.1.w3", "model.layers.7.block_sparse_moe.experts.2.w3", "model.layers.7.block_sparse_moe.experts.3.w3", "model.layers.7.block_sparse_moe.experts.4.w3", "model.layers.7.block_sparse_moe.experts.5.w3", "model.layers.7.block_sparse_moe.experts.6.w3", "model.layers.7.block_sparse_moe.experts.7.w3", "model.layers.7.block_sparse_moe.experts.8.w3", "model.layers.7.block_sparse_moe.experts.9.w3", "model.layers.7.block_sparse_moe.experts.10.w3", "model.layers.7.block_sparse_moe.experts.11.w3", "model.layers.7.block_sparse_moe.experts.12.w3", "model.layers.7.block_sparse_moe.experts.13.w3", "model.layers.7.block_sparse_moe.experts.14.w3", "model.layers.7.block_sparse_moe.experts.15.w3", "model.layers.7.block_sparse_moe.experts.16.w3", "model.layers.7.block_sparse_moe.experts.17.w3", "model.layers.7.block_sparse_moe.experts.18.w3", "model.layers.7.block_sparse_moe.experts.19.w3", "model.layers.7.block_sparse_moe.experts.20.w3", "model.layers.7.block_sparse_moe.experts.21.w3", "model.layers.7.block_sparse_moe.experts.22.w3", "model.layers.7.block_sparse_moe.experts.23.w3", "model.layers.7.block_sparse_moe.experts.24.w3", "model.layers.7.block_sparse_moe.experts.25.w3", "model.layers.7.block_sparse_moe.experts.26.w3", "model.layers.7.block_sparse_moe.experts.27.w3", "model.layers.7.block_sparse_moe.experts.28.w3", "model.layers.7.block_sparse_moe.experts.29.w3", "model.layers.7.block_sparse_moe.experts.30.w3", "model.layers.7.block_sparse_moe.experts.31.w3", "model.layers.7.block_sparse_moe.experts.32.w3", "model.layers.7.block_sparse_moe.experts.33.w3", "model.layers.7.block_sparse_moe.experts.34.w3", "model.layers.7.block_sparse_moe.experts.35.w3", "model.layers.7.block_sparse_moe.experts.36.w3", "model.layers.7.block_sparse_moe.experts.37.w3", "model.layers.7.block_sparse_moe.experts.38.w3", "model.layers.7.block_sparse_moe.experts.39.w3", "model.layers.7.block_sparse_moe.experts.40.w3", "model.layers.7.block_sparse_moe.experts.41.w3", "model.layers.7.block_sparse_moe.experts.42.w3", "model.layers.7.block_sparse_moe.experts.43.w3", "model.layers.7.block_sparse_moe.experts.44.w3", "model.layers.7.block_sparse_moe.experts.45.w3", "model.layers.7.block_sparse_moe.experts.46.w3", "model.layers.7.block_sparse_moe.experts.47.w3", "model.layers.7.block_sparse_moe.experts.48.w3", "model.layers.7.block_sparse_moe.experts.49.w3", "model.layers.7.block_sparse_moe.experts.50.w3", "model.layers.7.block_sparse_moe.experts.51.w3", "model.layers.7.block_sparse_moe.experts.52.w3", "model.layers.7.block_sparse_moe.experts.53.w3", "model.layers.7.block_sparse_moe.experts.54.w3", "model.layers.7.block_sparse_moe.experts.55.w3", "model.layers.7.block_sparse_moe.experts.56.w3", "model.layers.7.block_sparse_moe.experts.57.w3", "model.layers.7.block_sparse_moe.experts.58.w3", "model.layers.7.block_sparse_moe.experts.59.w3", "model.layers.7.block_sparse_moe.experts.60.w3", "model.layers.7.block_sparse_moe.experts.61.w3", "model.layers.7.block_sparse_moe.experts.62.w3", "model.layers.7.block_sparse_moe.experts.63.w3", "model.layers.7.block_sparse_moe.experts.64.w3", "model.layers.7.block_sparse_moe.experts.65.w3", "model.layers.7.block_sparse_moe.experts.66.w3", "model.layers.7.block_sparse_moe.experts.67.w3", "model.layers.7.block_sparse_moe.experts.68.w3", "model.layers.7.block_sparse_moe.experts.69.w3", "model.layers.7.block_sparse_moe.experts.70.w3", "model.layers.7.block_sparse_moe.experts.71.w3", "model.layers.7.block_sparse_moe.experts.72.w3", "model.layers.7.block_sparse_moe.experts.73.w3", "model.layers.7.block_sparse_moe.experts.74.w3", "model.layers.7.block_sparse_moe.experts.75.w3", "model.layers.7.block_sparse_moe.experts.76.w3", "model.layers.7.block_sparse_moe.experts.77.w3", "model.layers.7.block_sparse_moe.experts.78.w3", "model.layers.7.block_sparse_moe.experts.79.w3", "model.layers.7.block_sparse_moe.experts.80.w3", "model.layers.7.block_sparse_moe.experts.81.w3", "model.layers.7.block_sparse_moe.experts.82.w3", "model.layers.7.block_sparse_moe.experts.83.w3", "model.layers.7.block_sparse_moe.experts.84.w3", "model.layers.7.block_sparse_moe.experts.85.w3", "model.layers.7.block_sparse_moe.experts.86.w3", "model.layers.7.block_sparse_moe.experts.87.w3", "model.layers.7.block_sparse_moe.experts.88.w3", "model.layers.7.block_sparse_moe.experts.89.w3", "model.layers.7.block_sparse_moe.experts.90.w3", "model.layers.7.block_sparse_moe.experts.91.w3", "model.layers.7.block_sparse_moe.experts.92.w3", "model.layers.7.block_sparse_moe.experts.93.w3", "model.layers.7.block_sparse_moe.experts.94.w3", "model.layers.7.block_sparse_moe.experts.95.w3", "model.layers.7.block_sparse_moe.experts.96.w3", "model.layers.7.block_sparse_moe.experts.97.w3", "model.layers.7.block_sparse_moe.experts.98.w3", "model.layers.7.block_sparse_moe.experts.99.w3", "model.layers.7.block_sparse_moe.experts.100.w3", "model.layers.7.block_sparse_moe.experts.101.w3", "model.layers.7.block_sparse_moe.experts.102.w3", "model.layers.7.block_sparse_moe.experts.103.w3", "model.layers.7.block_sparse_moe.experts.104.w3", "model.layers.7.block_sparse_moe.experts.105.w3", "model.layers.7.block_sparse_moe.experts.106.w3", "model.layers.7.block_sparse_moe.experts.107.w3", "model.layers.7.block_sparse_moe.experts.108.w3", "model.layers.7.block_sparse_moe.experts.109.w3", "model.layers.7.block_sparse_moe.experts.110.w3", "model.layers.7.block_sparse_moe.experts.111.w3", "model.layers.7.block_sparse_moe.experts.112.w3", "model.layers.7.block_sparse_moe.experts.113.w3", "model.layers.7.block_sparse_moe.experts.114.w3", "model.layers.7.block_sparse_moe.experts.115.w3", "model.layers.7.block_sparse_moe.experts.116.w3", "model.layers.7.block_sparse_moe.experts.117.w3", "model.layers.7.block_sparse_moe.experts.118.w3", "model.layers.7.block_sparse_moe.experts.119.w3", "model.layers.7.block_sparse_moe.experts.120.w3", "model.layers.7.block_sparse_moe.experts.121.w3", "model.layers.7.block_sparse_moe.experts.122.w3", "model.layers.7.block_sparse_moe.experts.123.w3", "model.layers.7.block_sparse_moe.experts.124.w3", "model.layers.7.block_sparse_moe.experts.125.w3", "model.layers.7.block_sparse_moe.experts.126.w3", "model.layers.7.block_sparse_moe.experts.127.w3", "model.layers.7.block_sparse_moe.experts.128.w3", "model.layers.7.block_sparse_moe.experts.129.w3", "model.layers.7.block_sparse_moe.experts.130.w3", "model.layers.7.block_sparse_moe.experts.131.w3", "model.layers.7.block_sparse_moe.experts.132.w3", "model.layers.7.block_sparse_moe.experts.133.w3", "model.layers.7.block_sparse_moe.experts.134.w3", "model.layers.7.block_sparse_moe.experts.135.w3", "model.layers.7.block_sparse_moe.experts.136.w3", "model.layers.7.block_sparse_moe.experts.137.w3", "model.layers.7.block_sparse_moe.experts.138.w3", "model.layers.7.block_sparse_moe.experts.139.w3", "model.layers.7.block_sparse_moe.experts.140.w3", "model.layers.7.block_sparse_moe.experts.141.w3", "model.layers.7.block_sparse_moe.experts.142.w3", "model.layers.7.block_sparse_moe.experts.143.w3", "model.layers.7.block_sparse_moe.experts.144.w3", "model.layers.7.block_sparse_moe.experts.145.w3", "model.layers.7.block_sparse_moe.experts.146.w3", "model.layers.7.block_sparse_moe.experts.147.w3", "model.layers.7.block_sparse_moe.experts.148.w3", "model.layers.7.block_sparse_moe.experts.149.w3", "model.layers.7.block_sparse_moe.experts.150.w3", "model.layers.7.block_sparse_moe.experts.151.w3", "model.layers.7.block_sparse_moe.experts.152.w3", "model.layers.7.block_sparse_moe.experts.153.w3", "model.layers.7.block_sparse_moe.experts.154.w3", "model.layers.7.block_sparse_moe.experts.155.w3", "model.layers.7.block_sparse_moe.experts.156.w3", "model.layers.7.block_sparse_moe.experts.157.w3", "model.layers.7.block_sparse_moe.experts.158.w3", "model.layers.7.block_sparse_moe.experts.159.w3", "model.layers.7.block_sparse_moe.experts.160.w3", "model.layers.7.block_sparse_moe.experts.161.w3", "model.layers.7.block_sparse_moe.experts.162.w3", "model.layers.7.block_sparse_moe.experts.163.w3", "model.layers.7.block_sparse_moe.experts.164.w3", "model.layers.7.block_sparse_moe.experts.165.w3", "model.layers.7.block_sparse_moe.experts.166.w3", "model.layers.7.block_sparse_moe.experts.167.w3", "model.layers.7.block_sparse_moe.experts.168.w3", "model.layers.7.block_sparse_moe.experts.169.w3", "model.layers.7.block_sparse_moe.experts.170.w3", "model.layers.7.block_sparse_moe.experts.171.w3", "model.layers.7.block_sparse_moe.experts.172.w3", "model.layers.7.block_sparse_moe.experts.173.w3", "model.layers.7.block_sparse_moe.experts.174.w3", "model.layers.7.block_sparse_moe.experts.175.w3", "model.layers.7.block_sparse_moe.experts.176.w3", "model.layers.7.block_sparse_moe.experts.177.w3", "model.layers.7.block_sparse_moe.experts.178.w3", "model.layers.7.block_sparse_moe.experts.179.w3", "model.layers.7.block_sparse_moe.experts.180.w3", "model.layers.7.block_sparse_moe.experts.181.w3", "model.layers.7.block_sparse_moe.experts.182.w3", "model.layers.7.block_sparse_moe.experts.183.w3", "model.layers.7.block_sparse_moe.experts.184.w3", "model.layers.7.block_sparse_moe.experts.185.w3", "model.layers.7.block_sparse_moe.experts.186.w3", "model.layers.7.block_sparse_moe.experts.187.w3", "model.layers.7.block_sparse_moe.experts.188.w3", "model.layers.7.block_sparse_moe.experts.189.w3", "model.layers.7.block_sparse_moe.experts.190.w3", "model.layers.7.block_sparse_moe.experts.191.w3", "model.layers.7.block_sparse_moe.experts.192.w3", "model.layers.7.block_sparse_moe.experts.193.w3", "model.layers.7.block_sparse_moe.experts.194.w3", "model.layers.7.block_sparse_moe.experts.195.w3", "model.layers.7.block_sparse_moe.experts.196.w3", "model.layers.7.block_sparse_moe.experts.197.w3", "model.layers.7.block_sparse_moe.experts.198.w3", "model.layers.7.block_sparse_moe.experts.199.w3", "model.layers.7.block_sparse_moe.experts.200.w3", "model.layers.7.block_sparse_moe.experts.201.w3", "model.layers.7.block_sparse_moe.experts.202.w3", "model.layers.7.block_sparse_moe.experts.203.w3", "model.layers.7.block_sparse_moe.experts.204.w3", "model.layers.7.block_sparse_moe.experts.205.w3", "model.layers.7.block_sparse_moe.experts.206.w3", "model.layers.7.block_sparse_moe.experts.207.w3", "model.layers.7.block_sparse_moe.experts.208.w3", "model.layers.7.block_sparse_moe.experts.209.w3", "model.layers.7.block_sparse_moe.experts.210.w3", "model.layers.7.block_sparse_moe.experts.211.w3", "model.layers.7.block_sparse_moe.experts.212.w3", "model.layers.7.block_sparse_moe.experts.213.w3", "model.layers.7.block_sparse_moe.experts.214.w3", "model.layers.7.block_sparse_moe.experts.215.w3", "model.layers.7.block_sparse_moe.experts.216.w3", "model.layers.7.block_sparse_moe.experts.217.w3", "model.layers.7.block_sparse_moe.experts.218.w3", "model.layers.7.block_sparse_moe.experts.219.w3", "model.layers.7.block_sparse_moe.experts.220.w3", "model.layers.7.block_sparse_moe.experts.221.w3", "model.layers.7.block_sparse_moe.experts.222.w3", "model.layers.7.block_sparse_moe.experts.223.w3", "model.layers.7.block_sparse_moe.experts.224.w3", "model.layers.7.block_sparse_moe.experts.225.w3", "model.layers.7.block_sparse_moe.experts.226.w3", "model.layers.7.block_sparse_moe.experts.227.w3", "model.layers.7.block_sparse_moe.experts.228.w3", "model.layers.7.block_sparse_moe.experts.229.w3", "model.layers.7.block_sparse_moe.experts.230.w3", "model.layers.7.block_sparse_moe.experts.231.w3", "model.layers.7.block_sparse_moe.experts.232.w3", "model.layers.7.block_sparse_moe.experts.233.w3", "model.layers.7.block_sparse_moe.experts.234.w3", "model.layers.7.block_sparse_moe.experts.235.w3", "model.layers.7.block_sparse_moe.experts.236.w3", "model.layers.7.block_sparse_moe.experts.237.w3", "model.layers.7.block_sparse_moe.experts.238.w3", "model.layers.7.block_sparse_moe.experts.239.w3", "model.layers.7.block_sparse_moe.experts.240.w3", "model.layers.7.block_sparse_moe.experts.241.w3", "model.layers.7.block_sparse_moe.experts.242.w3", "model.layers.7.block_sparse_moe.experts.243.w3", "model.layers.7.block_sparse_moe.experts.244.w3", "model.layers.7.block_sparse_moe.experts.245.w3", "model.layers.7.block_sparse_moe.experts.246.w3", "model.layers.7.block_sparse_moe.experts.247.w3", "model.layers.7.block_sparse_moe.experts.248.w3", "model.layers.7.block_sparse_moe.experts.249.w3", "model.layers.7.block_sparse_moe.experts.250.w3", "model.layers.7.block_sparse_moe.experts.251.w3", "model.layers.7.block_sparse_moe.experts.252.w3", "model.layers.7.block_sparse_moe.experts.253.w3", "model.layers.7.block_sparse_moe.experts.254.w3", "model.layers.7.block_sparse_moe.experts.255.w3", "model.layers.7.block_sparse_moe.experts.0.w2", "model.layers.7.block_sparse_moe.experts.1.w2", "model.layers.7.block_sparse_moe.experts.2.w2", "model.layers.7.block_sparse_moe.experts.3.w2", "model.layers.7.block_sparse_moe.experts.4.w2", "model.layers.7.block_sparse_moe.experts.5.w2", "model.layers.7.block_sparse_moe.experts.6.w2", "model.layers.7.block_sparse_moe.experts.7.w2", "model.layers.7.block_sparse_moe.experts.8.w2", "model.layers.7.block_sparse_moe.experts.9.w2", "model.layers.7.block_sparse_moe.experts.10.w2", "model.layers.7.block_sparse_moe.experts.11.w2", "model.layers.7.block_sparse_moe.experts.12.w2", "model.layers.7.block_sparse_moe.experts.13.w2", "model.layers.7.block_sparse_moe.experts.14.w2", "model.layers.7.block_sparse_moe.experts.15.w2", "model.layers.7.block_sparse_moe.experts.16.w2", "model.layers.7.block_sparse_moe.experts.17.w2", "model.layers.7.block_sparse_moe.experts.18.w2", "model.layers.7.block_sparse_moe.experts.19.w2", "model.layers.7.block_sparse_moe.experts.20.w2", "model.layers.7.block_sparse_moe.experts.21.w2", "model.layers.7.block_sparse_moe.experts.22.w2", "model.layers.7.block_sparse_moe.experts.23.w2", "model.layers.7.block_sparse_moe.experts.24.w2", "model.layers.7.block_sparse_moe.experts.25.w2", "model.layers.7.block_sparse_moe.experts.26.w2", "model.layers.7.block_sparse_moe.experts.27.w2", "model.layers.7.block_sparse_moe.experts.28.w2", "model.layers.7.block_sparse_moe.experts.29.w2", "model.layers.7.block_sparse_moe.experts.30.w2", "model.layers.7.block_sparse_moe.experts.31.w2", "model.layers.7.block_sparse_moe.experts.32.w2", "model.layers.7.block_sparse_moe.experts.33.w2", "model.layers.7.block_sparse_moe.experts.34.w2", "model.layers.7.block_sparse_moe.experts.35.w2", "model.layers.7.block_sparse_moe.experts.36.w2", "model.layers.7.block_sparse_moe.experts.37.w2", "model.layers.7.block_sparse_moe.experts.38.w2", "model.layers.7.block_sparse_moe.experts.39.w2", "model.layers.7.block_sparse_moe.experts.40.w2", "model.layers.7.block_sparse_moe.experts.41.w2", "model.layers.7.block_sparse_moe.experts.42.w2", "model.layers.7.block_sparse_moe.experts.43.w2", "model.layers.7.block_sparse_moe.experts.44.w2", "model.layers.7.block_sparse_moe.experts.45.w2", "model.layers.7.block_sparse_moe.experts.46.w2", "model.layers.7.block_sparse_moe.experts.47.w2", "model.layers.7.block_sparse_moe.experts.48.w2", "model.layers.7.block_sparse_moe.experts.49.w2", "model.layers.7.block_sparse_moe.experts.50.w2", "model.layers.7.block_sparse_moe.experts.51.w2", "model.layers.7.block_sparse_moe.experts.52.w2", "model.layers.7.block_sparse_moe.experts.53.w2", "model.layers.7.block_sparse_moe.experts.54.w2", "model.layers.7.block_sparse_moe.experts.55.w2", "model.layers.7.block_sparse_moe.experts.56.w2", "model.layers.7.block_sparse_moe.experts.57.w2", "model.layers.7.block_sparse_moe.experts.58.w2", "model.layers.7.block_sparse_moe.experts.59.w2", "model.layers.7.block_sparse_moe.experts.60.w2", "model.layers.7.block_sparse_moe.experts.61.w2", "model.layers.7.block_sparse_moe.experts.62.w2", "model.layers.7.block_sparse_moe.experts.63.w2", "model.layers.7.block_sparse_moe.experts.64.w2", "model.layers.7.block_sparse_moe.experts.65.w2", "model.layers.7.block_sparse_moe.experts.66.w2", "model.layers.7.block_sparse_moe.experts.67.w2", "model.layers.7.block_sparse_moe.experts.68.w2", "model.layers.7.block_sparse_moe.experts.69.w2", "model.layers.7.block_sparse_moe.experts.70.w2", "model.layers.7.block_sparse_moe.experts.71.w2", "model.layers.7.block_sparse_moe.experts.72.w2", "model.layers.7.block_sparse_moe.experts.73.w2", "model.layers.7.block_sparse_moe.experts.74.w2", "model.layers.7.block_sparse_moe.experts.75.w2", "model.layers.7.block_sparse_moe.experts.76.w2", "model.layers.7.block_sparse_moe.experts.77.w2", "model.layers.7.block_sparse_moe.experts.78.w2", "model.layers.7.block_sparse_moe.experts.79.w2", "model.layers.7.block_sparse_moe.experts.80.w2", "model.layers.7.block_sparse_moe.experts.81.w2", "model.layers.7.block_sparse_moe.experts.82.w2", "model.layers.7.block_sparse_moe.experts.83.w2", "model.layers.7.block_sparse_moe.experts.84.w2", "model.layers.7.block_sparse_moe.experts.85.w2", "model.layers.7.block_sparse_moe.experts.86.w2", "model.layers.7.block_sparse_moe.experts.87.w2", "model.layers.7.block_sparse_moe.experts.88.w2", "model.layers.7.block_sparse_moe.experts.89.w2", "model.layers.7.block_sparse_moe.experts.90.w2", "model.layers.7.block_sparse_moe.experts.91.w2", "model.layers.7.block_sparse_moe.experts.92.w2", "model.layers.7.block_sparse_moe.experts.93.w2", "model.layers.7.block_sparse_moe.experts.94.w2", "model.layers.7.block_sparse_moe.experts.95.w2", "model.layers.7.block_sparse_moe.experts.96.w2", "model.layers.7.block_sparse_moe.experts.97.w2", "model.layers.7.block_sparse_moe.experts.98.w2", "model.layers.7.block_sparse_moe.experts.99.w2", "model.layers.7.block_sparse_moe.experts.100.w2", "model.layers.7.block_sparse_moe.experts.101.w2", "model.layers.7.block_sparse_moe.experts.102.w2", "model.layers.7.block_sparse_moe.experts.103.w2", "model.layers.7.block_sparse_moe.experts.104.w2", "model.layers.7.block_sparse_moe.experts.105.w2", "model.layers.7.block_sparse_moe.experts.106.w2", "model.layers.7.block_sparse_moe.experts.107.w2", "model.layers.7.block_sparse_moe.experts.108.w2", "model.layers.7.block_sparse_moe.experts.109.w2", "model.layers.7.block_sparse_moe.experts.110.w2", "model.layers.7.block_sparse_moe.experts.111.w2", "model.layers.7.block_sparse_moe.experts.112.w2", "model.layers.7.block_sparse_moe.experts.113.w2", "model.layers.7.block_sparse_moe.experts.114.w2", "model.layers.7.block_sparse_moe.experts.115.w2", "model.layers.7.block_sparse_moe.experts.116.w2", "model.layers.7.block_sparse_moe.experts.117.w2", "model.layers.7.block_sparse_moe.experts.118.w2", "model.layers.7.block_sparse_moe.experts.119.w2", "model.layers.7.block_sparse_moe.experts.120.w2", "model.layers.7.block_sparse_moe.experts.121.w2", "model.layers.7.block_sparse_moe.experts.122.w2", "model.layers.7.block_sparse_moe.experts.123.w2", "model.layers.7.block_sparse_moe.experts.124.w2", "model.layers.7.block_sparse_moe.experts.125.w2", "model.layers.7.block_sparse_moe.experts.126.w2", "model.layers.7.block_sparse_moe.experts.127.w2", "model.layers.7.block_sparse_moe.experts.128.w2", "model.layers.7.block_sparse_moe.experts.129.w2", "model.layers.7.block_sparse_moe.experts.130.w2", "model.layers.7.block_sparse_moe.experts.131.w2", "model.layers.7.block_sparse_moe.experts.132.w2", "model.layers.7.block_sparse_moe.experts.133.w2", "model.layers.7.block_sparse_moe.experts.134.w2", "model.layers.7.block_sparse_moe.experts.135.w2", "model.layers.7.block_sparse_moe.experts.136.w2", "model.layers.7.block_sparse_moe.experts.137.w2", "model.layers.7.block_sparse_moe.experts.138.w2", "model.layers.7.block_sparse_moe.experts.139.w2", "model.layers.7.block_sparse_moe.experts.140.w2", "model.layers.7.block_sparse_moe.experts.141.w2", "model.layers.7.block_sparse_moe.experts.142.w2", "model.layers.7.block_sparse_moe.experts.143.w2", "model.layers.7.block_sparse_moe.experts.144.w2", "model.layers.7.block_sparse_moe.experts.145.w2", "model.layers.7.block_sparse_moe.experts.146.w2", "model.layers.7.block_sparse_moe.experts.147.w2", "model.layers.7.block_sparse_moe.experts.148.w2", "model.layers.7.block_sparse_moe.experts.149.w2", "model.layers.7.block_sparse_moe.experts.150.w2", "model.layers.7.block_sparse_moe.experts.151.w2", "model.layers.7.block_sparse_moe.experts.152.w2", "model.layers.7.block_sparse_moe.experts.153.w2", "model.layers.7.block_sparse_moe.experts.154.w2", "model.layers.7.block_sparse_moe.experts.155.w2", "model.layers.7.block_sparse_moe.experts.156.w2", "model.layers.7.block_sparse_moe.experts.157.w2", "model.layers.7.block_sparse_moe.experts.158.w2", "model.layers.7.block_sparse_moe.experts.159.w2", "model.layers.7.block_sparse_moe.experts.160.w2", "model.layers.7.block_sparse_moe.experts.161.w2", "model.layers.7.block_sparse_moe.experts.162.w2", "model.layers.7.block_sparse_moe.experts.163.w2", "model.layers.7.block_sparse_moe.experts.164.w2", "model.layers.7.block_sparse_moe.experts.165.w2", "model.layers.7.block_sparse_moe.experts.166.w2", "model.layers.7.block_sparse_moe.experts.167.w2", "model.layers.7.block_sparse_moe.experts.168.w2", "model.layers.7.block_sparse_moe.experts.169.w2", "model.layers.7.block_sparse_moe.experts.170.w2", "model.layers.7.block_sparse_moe.experts.171.w2", "model.layers.7.block_sparse_moe.experts.172.w2", "model.layers.7.block_sparse_moe.experts.173.w2", "model.layers.7.block_sparse_moe.experts.174.w2", "model.layers.7.block_sparse_moe.experts.175.w2", "model.layers.7.block_sparse_moe.experts.176.w2", "model.layers.7.block_sparse_moe.experts.177.w2", "model.layers.7.block_sparse_moe.experts.178.w2", "model.layers.7.block_sparse_moe.experts.179.w2", "model.layers.7.block_sparse_moe.experts.180.w2", "model.layers.7.block_sparse_moe.experts.181.w2", "model.layers.7.block_sparse_moe.experts.182.w2", "model.layers.7.block_sparse_moe.experts.183.w2", "model.layers.7.block_sparse_moe.experts.184.w2", "model.layers.7.block_sparse_moe.experts.185.w2", "model.layers.7.block_sparse_moe.experts.186.w2", "model.layers.7.block_sparse_moe.experts.187.w2", "model.layers.7.block_sparse_moe.experts.188.w2", "model.layers.7.block_sparse_moe.experts.189.w2", "model.layers.7.block_sparse_moe.experts.190.w2", "model.layers.7.block_sparse_moe.experts.191.w2", "model.layers.7.block_sparse_moe.experts.192.w2", "model.layers.7.block_sparse_moe.experts.193.w2", "model.layers.7.block_sparse_moe.experts.194.w2", "model.layers.7.block_sparse_moe.experts.195.w2", "model.layers.7.block_sparse_moe.experts.196.w2", "model.layers.7.block_sparse_moe.experts.197.w2", "model.layers.7.block_sparse_moe.experts.198.w2", "model.layers.7.block_sparse_moe.experts.199.w2", "model.layers.7.block_sparse_moe.experts.200.w2", "model.layers.7.block_sparse_moe.experts.201.w2", "model.layers.7.block_sparse_moe.experts.202.w2", "model.layers.7.block_sparse_moe.experts.203.w2", "model.layers.7.block_sparse_moe.experts.204.w2", "model.layers.7.block_sparse_moe.experts.205.w2", "model.layers.7.block_sparse_moe.experts.206.w2", "model.layers.7.block_sparse_moe.experts.207.w2", "model.layers.7.block_sparse_moe.experts.208.w2", "model.layers.7.block_sparse_moe.experts.209.w2", "model.layers.7.block_sparse_moe.experts.210.w2", "model.layers.7.block_sparse_moe.experts.211.w2", "model.layers.7.block_sparse_moe.experts.212.w2", "model.layers.7.block_sparse_moe.experts.213.w2", "model.layers.7.block_sparse_moe.experts.214.w2", "model.layers.7.block_sparse_moe.experts.215.w2", "model.layers.7.block_sparse_moe.experts.216.w2", "model.layers.7.block_sparse_moe.experts.217.w2", "model.layers.7.block_sparse_moe.experts.218.w2", "model.layers.7.block_sparse_moe.experts.219.w2", "model.layers.7.block_sparse_moe.experts.220.w2", "model.layers.7.block_sparse_moe.experts.221.w2", "model.layers.7.block_sparse_moe.experts.222.w2", "model.layers.7.block_sparse_moe.experts.223.w2", "model.layers.7.block_sparse_moe.experts.224.w2", "model.layers.7.block_sparse_moe.experts.225.w2", "model.layers.7.block_sparse_moe.experts.226.w2", "model.layers.7.block_sparse_moe.experts.227.w2", "model.layers.7.block_sparse_moe.experts.228.w2", "model.layers.7.block_sparse_moe.experts.229.w2", "model.layers.7.block_sparse_moe.experts.230.w2", "model.layers.7.block_sparse_moe.experts.231.w2", "model.layers.7.block_sparse_moe.experts.232.w2", "model.layers.7.block_sparse_moe.experts.233.w2", "model.layers.7.block_sparse_moe.experts.234.w2", "model.layers.7.block_sparse_moe.experts.235.w2", "model.layers.7.block_sparse_moe.experts.236.w2", "model.layers.7.block_sparse_moe.experts.237.w2", "model.layers.7.block_sparse_moe.experts.238.w2", "model.layers.7.block_sparse_moe.experts.239.w2", "model.layers.7.block_sparse_moe.experts.240.w2", "model.layers.7.block_sparse_moe.experts.241.w2", "model.layers.7.block_sparse_moe.experts.242.w2", "model.layers.7.block_sparse_moe.experts.243.w2", "model.layers.7.block_sparse_moe.experts.244.w2", "model.layers.7.block_sparse_moe.experts.245.w2", "model.layers.7.block_sparse_moe.experts.246.w2", "model.layers.7.block_sparse_moe.experts.247.w2", "model.layers.7.block_sparse_moe.experts.248.w2", "model.layers.7.block_sparse_moe.experts.249.w2", "model.layers.7.block_sparse_moe.experts.250.w2", "model.layers.7.block_sparse_moe.experts.251.w2", "model.layers.7.block_sparse_moe.experts.252.w2", "model.layers.7.block_sparse_moe.experts.253.w2", "model.layers.7.block_sparse_moe.experts.254.w2", "model.layers.7.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.005825541913509369, "dbits": 3623878656 } ] }, { "idx": 16, "layers": [ "model.layers.8.self_attn.q_proj", "model.layers.8.self_attn.k_proj", "model.layers.8.self_attn.v_proj", "model.layers.8.self_attn.o_proj" ], "candidates": [ { "dkld": -0.002190388366579976, "dbits": 44040192 } ] }, { "idx": 17, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w1", "model.layers.8.block_sparse_moe.experts.1.w1", "model.layers.8.block_sparse_moe.experts.2.w1", "model.layers.8.block_sparse_moe.experts.3.w1", "model.layers.8.block_sparse_moe.experts.4.w1", "model.layers.8.block_sparse_moe.experts.5.w1", "model.layers.8.block_sparse_moe.experts.6.w1", "model.layers.8.block_sparse_moe.experts.7.w1", "model.layers.8.block_sparse_moe.experts.8.w1", "model.layers.8.block_sparse_moe.experts.9.w1", "model.layers.8.block_sparse_moe.experts.10.w1", "model.layers.8.block_sparse_moe.experts.11.w1", "model.layers.8.block_sparse_moe.experts.12.w1", "model.layers.8.block_sparse_moe.experts.13.w1", "model.layers.8.block_sparse_moe.experts.14.w1", "model.layers.8.block_sparse_moe.experts.15.w1", "model.layers.8.block_sparse_moe.experts.16.w1", "model.layers.8.block_sparse_moe.experts.17.w1", "model.layers.8.block_sparse_moe.experts.18.w1", "model.layers.8.block_sparse_moe.experts.19.w1", "model.layers.8.block_sparse_moe.experts.20.w1", "model.layers.8.block_sparse_moe.experts.21.w1", "model.layers.8.block_sparse_moe.experts.22.w1", "model.layers.8.block_sparse_moe.experts.23.w1", "model.layers.8.block_sparse_moe.experts.24.w1", "model.layers.8.block_sparse_moe.experts.25.w1", "model.layers.8.block_sparse_moe.experts.26.w1", "model.layers.8.block_sparse_moe.experts.27.w1", "model.layers.8.block_sparse_moe.experts.28.w1", "model.layers.8.block_sparse_moe.experts.29.w1", "model.layers.8.block_sparse_moe.experts.30.w1", "model.layers.8.block_sparse_moe.experts.31.w1", "model.layers.8.block_sparse_moe.experts.32.w1", "model.layers.8.block_sparse_moe.experts.33.w1", "model.layers.8.block_sparse_moe.experts.34.w1", "model.layers.8.block_sparse_moe.experts.35.w1", "model.layers.8.block_sparse_moe.experts.36.w1", "model.layers.8.block_sparse_moe.experts.37.w1", "model.layers.8.block_sparse_moe.experts.38.w1", "model.layers.8.block_sparse_moe.experts.39.w1", "model.layers.8.block_sparse_moe.experts.40.w1", "model.layers.8.block_sparse_moe.experts.41.w1", "model.layers.8.block_sparse_moe.experts.42.w1", "model.layers.8.block_sparse_moe.experts.43.w1", "model.layers.8.block_sparse_moe.experts.44.w1", "model.layers.8.block_sparse_moe.experts.45.w1", "model.layers.8.block_sparse_moe.experts.46.w1", "model.layers.8.block_sparse_moe.experts.47.w1", "model.layers.8.block_sparse_moe.experts.48.w1", "model.layers.8.block_sparse_moe.experts.49.w1", "model.layers.8.block_sparse_moe.experts.50.w1", "model.layers.8.block_sparse_moe.experts.51.w1", "model.layers.8.block_sparse_moe.experts.52.w1", "model.layers.8.block_sparse_moe.experts.53.w1", "model.layers.8.block_sparse_moe.experts.54.w1", "model.layers.8.block_sparse_moe.experts.55.w1", "model.layers.8.block_sparse_moe.experts.56.w1", "model.layers.8.block_sparse_moe.experts.57.w1", "model.layers.8.block_sparse_moe.experts.58.w1", "model.layers.8.block_sparse_moe.experts.59.w1", "model.layers.8.block_sparse_moe.experts.60.w1", "model.layers.8.block_sparse_moe.experts.61.w1", "model.layers.8.block_sparse_moe.experts.62.w1", "model.layers.8.block_sparse_moe.experts.63.w1", "model.layers.8.block_sparse_moe.experts.64.w1", "model.layers.8.block_sparse_moe.experts.65.w1", "model.layers.8.block_sparse_moe.experts.66.w1", "model.layers.8.block_sparse_moe.experts.67.w1", "model.layers.8.block_sparse_moe.experts.68.w1", "model.layers.8.block_sparse_moe.experts.69.w1", "model.layers.8.block_sparse_moe.experts.70.w1", "model.layers.8.block_sparse_moe.experts.71.w1", "model.layers.8.block_sparse_moe.experts.72.w1", "model.layers.8.block_sparse_moe.experts.73.w1", "model.layers.8.block_sparse_moe.experts.74.w1", "model.layers.8.block_sparse_moe.experts.75.w1", "model.layers.8.block_sparse_moe.experts.76.w1", "model.layers.8.block_sparse_moe.experts.77.w1", "model.layers.8.block_sparse_moe.experts.78.w1", "model.layers.8.block_sparse_moe.experts.79.w1", "model.layers.8.block_sparse_moe.experts.80.w1", "model.layers.8.block_sparse_moe.experts.81.w1", "model.layers.8.block_sparse_moe.experts.82.w1", "model.layers.8.block_sparse_moe.experts.83.w1", "model.layers.8.block_sparse_moe.experts.84.w1", "model.layers.8.block_sparse_moe.experts.85.w1", "model.layers.8.block_sparse_moe.experts.86.w1", "model.layers.8.block_sparse_moe.experts.87.w1", "model.layers.8.block_sparse_moe.experts.88.w1", "model.layers.8.block_sparse_moe.experts.89.w1", "model.layers.8.block_sparse_moe.experts.90.w1", "model.layers.8.block_sparse_moe.experts.91.w1", "model.layers.8.block_sparse_moe.experts.92.w1", "model.layers.8.block_sparse_moe.experts.93.w1", "model.layers.8.block_sparse_moe.experts.94.w1", "model.layers.8.block_sparse_moe.experts.95.w1", "model.layers.8.block_sparse_moe.experts.96.w1", "model.layers.8.block_sparse_moe.experts.97.w1", "model.layers.8.block_sparse_moe.experts.98.w1", "model.layers.8.block_sparse_moe.experts.99.w1", "model.layers.8.block_sparse_moe.experts.100.w1", "model.layers.8.block_sparse_moe.experts.101.w1", "model.layers.8.block_sparse_moe.experts.102.w1", "model.layers.8.block_sparse_moe.experts.103.w1", "model.layers.8.block_sparse_moe.experts.104.w1", "model.layers.8.block_sparse_moe.experts.105.w1", "model.layers.8.block_sparse_moe.experts.106.w1", "model.layers.8.block_sparse_moe.experts.107.w1", "model.layers.8.block_sparse_moe.experts.108.w1", "model.layers.8.block_sparse_moe.experts.109.w1", "model.layers.8.block_sparse_moe.experts.110.w1", "model.layers.8.block_sparse_moe.experts.111.w1", "model.layers.8.block_sparse_moe.experts.112.w1", "model.layers.8.block_sparse_moe.experts.113.w1", "model.layers.8.block_sparse_moe.experts.114.w1", "model.layers.8.block_sparse_moe.experts.115.w1", "model.layers.8.block_sparse_moe.experts.116.w1", "model.layers.8.block_sparse_moe.experts.117.w1", "model.layers.8.block_sparse_moe.experts.118.w1", "model.layers.8.block_sparse_moe.experts.119.w1", "model.layers.8.block_sparse_moe.experts.120.w1", "model.layers.8.block_sparse_moe.experts.121.w1", "model.layers.8.block_sparse_moe.experts.122.w1", "model.layers.8.block_sparse_moe.experts.123.w1", "model.layers.8.block_sparse_moe.experts.124.w1", "model.layers.8.block_sparse_moe.experts.125.w1", "model.layers.8.block_sparse_moe.experts.126.w1", "model.layers.8.block_sparse_moe.experts.127.w1", "model.layers.8.block_sparse_moe.experts.128.w1", "model.layers.8.block_sparse_moe.experts.129.w1", "model.layers.8.block_sparse_moe.experts.130.w1", "model.layers.8.block_sparse_moe.experts.131.w1", "model.layers.8.block_sparse_moe.experts.132.w1", "model.layers.8.block_sparse_moe.experts.133.w1", "model.layers.8.block_sparse_moe.experts.134.w1", "model.layers.8.block_sparse_moe.experts.135.w1", "model.layers.8.block_sparse_moe.experts.136.w1", "model.layers.8.block_sparse_moe.experts.137.w1", "model.layers.8.block_sparse_moe.experts.138.w1", "model.layers.8.block_sparse_moe.experts.139.w1", "model.layers.8.block_sparse_moe.experts.140.w1", "model.layers.8.block_sparse_moe.experts.141.w1", "model.layers.8.block_sparse_moe.experts.142.w1", "model.layers.8.block_sparse_moe.experts.143.w1", "model.layers.8.block_sparse_moe.experts.144.w1", "model.layers.8.block_sparse_moe.experts.145.w1", "model.layers.8.block_sparse_moe.experts.146.w1", "model.layers.8.block_sparse_moe.experts.147.w1", "model.layers.8.block_sparse_moe.experts.148.w1", "model.layers.8.block_sparse_moe.experts.149.w1", "model.layers.8.block_sparse_moe.experts.150.w1", "model.layers.8.block_sparse_moe.experts.151.w1", "model.layers.8.block_sparse_moe.experts.152.w1", "model.layers.8.block_sparse_moe.experts.153.w1", "model.layers.8.block_sparse_moe.experts.154.w1", "model.layers.8.block_sparse_moe.experts.155.w1", "model.layers.8.block_sparse_moe.experts.156.w1", "model.layers.8.block_sparse_moe.experts.157.w1", "model.layers.8.block_sparse_moe.experts.158.w1", "model.layers.8.block_sparse_moe.experts.159.w1", "model.layers.8.block_sparse_moe.experts.160.w1", "model.layers.8.block_sparse_moe.experts.161.w1", "model.layers.8.block_sparse_moe.experts.162.w1", "model.layers.8.block_sparse_moe.experts.163.w1", "model.layers.8.block_sparse_moe.experts.164.w1", "model.layers.8.block_sparse_moe.experts.165.w1", "model.layers.8.block_sparse_moe.experts.166.w1", "model.layers.8.block_sparse_moe.experts.167.w1", "model.layers.8.block_sparse_moe.experts.168.w1", "model.layers.8.block_sparse_moe.experts.169.w1", "model.layers.8.block_sparse_moe.experts.170.w1", "model.layers.8.block_sparse_moe.experts.171.w1", "model.layers.8.block_sparse_moe.experts.172.w1", "model.layers.8.block_sparse_moe.experts.173.w1", "model.layers.8.block_sparse_moe.experts.174.w1", "model.layers.8.block_sparse_moe.experts.175.w1", "model.layers.8.block_sparse_moe.experts.176.w1", "model.layers.8.block_sparse_moe.experts.177.w1", "model.layers.8.block_sparse_moe.experts.178.w1", "model.layers.8.block_sparse_moe.experts.179.w1", "model.layers.8.block_sparse_moe.experts.180.w1", "model.layers.8.block_sparse_moe.experts.181.w1", "model.layers.8.block_sparse_moe.experts.182.w1", "model.layers.8.block_sparse_moe.experts.183.w1", "model.layers.8.block_sparse_moe.experts.184.w1", "model.layers.8.block_sparse_moe.experts.185.w1", "model.layers.8.block_sparse_moe.experts.186.w1", "model.layers.8.block_sparse_moe.experts.187.w1", "model.layers.8.block_sparse_moe.experts.188.w1", "model.layers.8.block_sparse_moe.experts.189.w1", "model.layers.8.block_sparse_moe.experts.190.w1", "model.layers.8.block_sparse_moe.experts.191.w1", "model.layers.8.block_sparse_moe.experts.192.w1", "model.layers.8.block_sparse_moe.experts.193.w1", "model.layers.8.block_sparse_moe.experts.194.w1", "model.layers.8.block_sparse_moe.experts.195.w1", "model.layers.8.block_sparse_moe.experts.196.w1", "model.layers.8.block_sparse_moe.experts.197.w1", "model.layers.8.block_sparse_moe.experts.198.w1", "model.layers.8.block_sparse_moe.experts.199.w1", "model.layers.8.block_sparse_moe.experts.200.w1", "model.layers.8.block_sparse_moe.experts.201.w1", "model.layers.8.block_sparse_moe.experts.202.w1", "model.layers.8.block_sparse_moe.experts.203.w1", "model.layers.8.block_sparse_moe.experts.204.w1", "model.layers.8.block_sparse_moe.experts.205.w1", "model.layers.8.block_sparse_moe.experts.206.w1", "model.layers.8.block_sparse_moe.experts.207.w1", "model.layers.8.block_sparse_moe.experts.208.w1", "model.layers.8.block_sparse_moe.experts.209.w1", "model.layers.8.block_sparse_moe.experts.210.w1", "model.layers.8.block_sparse_moe.experts.211.w1", "model.layers.8.block_sparse_moe.experts.212.w1", "model.layers.8.block_sparse_moe.experts.213.w1", "model.layers.8.block_sparse_moe.experts.214.w1", "model.layers.8.block_sparse_moe.experts.215.w1", "model.layers.8.block_sparse_moe.experts.216.w1", "model.layers.8.block_sparse_moe.experts.217.w1", "model.layers.8.block_sparse_moe.experts.218.w1", "model.layers.8.block_sparse_moe.experts.219.w1", "model.layers.8.block_sparse_moe.experts.220.w1", "model.layers.8.block_sparse_moe.experts.221.w1", "model.layers.8.block_sparse_moe.experts.222.w1", "model.layers.8.block_sparse_moe.experts.223.w1", "model.layers.8.block_sparse_moe.experts.224.w1", "model.layers.8.block_sparse_moe.experts.225.w1", "model.layers.8.block_sparse_moe.experts.226.w1", "model.layers.8.block_sparse_moe.experts.227.w1", "model.layers.8.block_sparse_moe.experts.228.w1", "model.layers.8.block_sparse_moe.experts.229.w1", "model.layers.8.block_sparse_moe.experts.230.w1", "model.layers.8.block_sparse_moe.experts.231.w1", "model.layers.8.block_sparse_moe.experts.232.w1", "model.layers.8.block_sparse_moe.experts.233.w1", "model.layers.8.block_sparse_moe.experts.234.w1", "model.layers.8.block_sparse_moe.experts.235.w1", "model.layers.8.block_sparse_moe.experts.236.w1", "model.layers.8.block_sparse_moe.experts.237.w1", "model.layers.8.block_sparse_moe.experts.238.w1", "model.layers.8.block_sparse_moe.experts.239.w1", "model.layers.8.block_sparse_moe.experts.240.w1", "model.layers.8.block_sparse_moe.experts.241.w1", "model.layers.8.block_sparse_moe.experts.242.w1", "model.layers.8.block_sparse_moe.experts.243.w1", "model.layers.8.block_sparse_moe.experts.244.w1", "model.layers.8.block_sparse_moe.experts.245.w1", "model.layers.8.block_sparse_moe.experts.246.w1", "model.layers.8.block_sparse_moe.experts.247.w1", "model.layers.8.block_sparse_moe.experts.248.w1", "model.layers.8.block_sparse_moe.experts.249.w1", "model.layers.8.block_sparse_moe.experts.250.w1", "model.layers.8.block_sparse_moe.experts.251.w1", "model.layers.8.block_sparse_moe.experts.252.w1", "model.layers.8.block_sparse_moe.experts.253.w1", "model.layers.8.block_sparse_moe.experts.254.w1", "model.layers.8.block_sparse_moe.experts.255.w1", "model.layers.8.block_sparse_moe.experts.0.w3", "model.layers.8.block_sparse_moe.experts.1.w3", "model.layers.8.block_sparse_moe.experts.2.w3", "model.layers.8.block_sparse_moe.experts.3.w3", "model.layers.8.block_sparse_moe.experts.4.w3", "model.layers.8.block_sparse_moe.experts.5.w3", "model.layers.8.block_sparse_moe.experts.6.w3", "model.layers.8.block_sparse_moe.experts.7.w3", "model.layers.8.block_sparse_moe.experts.8.w3", "model.layers.8.block_sparse_moe.experts.9.w3", "model.layers.8.block_sparse_moe.experts.10.w3", "model.layers.8.block_sparse_moe.experts.11.w3", "model.layers.8.block_sparse_moe.experts.12.w3", "model.layers.8.block_sparse_moe.experts.13.w3", "model.layers.8.block_sparse_moe.experts.14.w3", "model.layers.8.block_sparse_moe.experts.15.w3", "model.layers.8.block_sparse_moe.experts.16.w3", "model.layers.8.block_sparse_moe.experts.17.w3", "model.layers.8.block_sparse_moe.experts.18.w3", "model.layers.8.block_sparse_moe.experts.19.w3", "model.layers.8.block_sparse_moe.experts.20.w3", "model.layers.8.block_sparse_moe.experts.21.w3", "model.layers.8.block_sparse_moe.experts.22.w3", "model.layers.8.block_sparse_moe.experts.23.w3", "model.layers.8.block_sparse_moe.experts.24.w3", "model.layers.8.block_sparse_moe.experts.25.w3", "model.layers.8.block_sparse_moe.experts.26.w3", "model.layers.8.block_sparse_moe.experts.27.w3", "model.layers.8.block_sparse_moe.experts.28.w3", "model.layers.8.block_sparse_moe.experts.29.w3", "model.layers.8.block_sparse_moe.experts.30.w3", "model.layers.8.block_sparse_moe.experts.31.w3", "model.layers.8.block_sparse_moe.experts.32.w3", "model.layers.8.block_sparse_moe.experts.33.w3", "model.layers.8.block_sparse_moe.experts.34.w3", "model.layers.8.block_sparse_moe.experts.35.w3", "model.layers.8.block_sparse_moe.experts.36.w3", "model.layers.8.block_sparse_moe.experts.37.w3", "model.layers.8.block_sparse_moe.experts.38.w3", "model.layers.8.block_sparse_moe.experts.39.w3", "model.layers.8.block_sparse_moe.experts.40.w3", "model.layers.8.block_sparse_moe.experts.41.w3", "model.layers.8.block_sparse_moe.experts.42.w3", "model.layers.8.block_sparse_moe.experts.43.w3", "model.layers.8.block_sparse_moe.experts.44.w3", "model.layers.8.block_sparse_moe.experts.45.w3", "model.layers.8.block_sparse_moe.experts.46.w3", "model.layers.8.block_sparse_moe.experts.47.w3", "model.layers.8.block_sparse_moe.experts.48.w3", "model.layers.8.block_sparse_moe.experts.49.w3", "model.layers.8.block_sparse_moe.experts.50.w3", "model.layers.8.block_sparse_moe.experts.51.w3", "model.layers.8.block_sparse_moe.experts.52.w3", "model.layers.8.block_sparse_moe.experts.53.w3", "model.layers.8.block_sparse_moe.experts.54.w3", "model.layers.8.block_sparse_moe.experts.55.w3", "model.layers.8.block_sparse_moe.experts.56.w3", "model.layers.8.block_sparse_moe.experts.57.w3", "model.layers.8.block_sparse_moe.experts.58.w3", "model.layers.8.block_sparse_moe.experts.59.w3", "model.layers.8.block_sparse_moe.experts.60.w3", "model.layers.8.block_sparse_moe.experts.61.w3", "model.layers.8.block_sparse_moe.experts.62.w3", "model.layers.8.block_sparse_moe.experts.63.w3", "model.layers.8.block_sparse_moe.experts.64.w3", "model.layers.8.block_sparse_moe.experts.65.w3", "model.layers.8.block_sparse_moe.experts.66.w3", "model.layers.8.block_sparse_moe.experts.67.w3", "model.layers.8.block_sparse_moe.experts.68.w3", "model.layers.8.block_sparse_moe.experts.69.w3", "model.layers.8.block_sparse_moe.experts.70.w3", "model.layers.8.block_sparse_moe.experts.71.w3", "model.layers.8.block_sparse_moe.experts.72.w3", "model.layers.8.block_sparse_moe.experts.73.w3", "model.layers.8.block_sparse_moe.experts.74.w3", "model.layers.8.block_sparse_moe.experts.75.w3", "model.layers.8.block_sparse_moe.experts.76.w3", "model.layers.8.block_sparse_moe.experts.77.w3", "model.layers.8.block_sparse_moe.experts.78.w3", "model.layers.8.block_sparse_moe.experts.79.w3", "model.layers.8.block_sparse_moe.experts.80.w3", "model.layers.8.block_sparse_moe.experts.81.w3", "model.layers.8.block_sparse_moe.experts.82.w3", "model.layers.8.block_sparse_moe.experts.83.w3", "model.layers.8.block_sparse_moe.experts.84.w3", "model.layers.8.block_sparse_moe.experts.85.w3", "model.layers.8.block_sparse_moe.experts.86.w3", "model.layers.8.block_sparse_moe.experts.87.w3", "model.layers.8.block_sparse_moe.experts.88.w3", "model.layers.8.block_sparse_moe.experts.89.w3", "model.layers.8.block_sparse_moe.experts.90.w3", "model.layers.8.block_sparse_moe.experts.91.w3", "model.layers.8.block_sparse_moe.experts.92.w3", "model.layers.8.block_sparse_moe.experts.93.w3", "model.layers.8.block_sparse_moe.experts.94.w3", "model.layers.8.block_sparse_moe.experts.95.w3", "model.layers.8.block_sparse_moe.experts.96.w3", "model.layers.8.block_sparse_moe.experts.97.w3", "model.layers.8.block_sparse_moe.experts.98.w3", "model.layers.8.block_sparse_moe.experts.99.w3", "model.layers.8.block_sparse_moe.experts.100.w3", "model.layers.8.block_sparse_moe.experts.101.w3", "model.layers.8.block_sparse_moe.experts.102.w3", "model.layers.8.block_sparse_moe.experts.103.w3", "model.layers.8.block_sparse_moe.experts.104.w3", "model.layers.8.block_sparse_moe.experts.105.w3", "model.layers.8.block_sparse_moe.experts.106.w3", "model.layers.8.block_sparse_moe.experts.107.w3", "model.layers.8.block_sparse_moe.experts.108.w3", "model.layers.8.block_sparse_moe.experts.109.w3", "model.layers.8.block_sparse_moe.experts.110.w3", "model.layers.8.block_sparse_moe.experts.111.w3", "model.layers.8.block_sparse_moe.experts.112.w3", "model.layers.8.block_sparse_moe.experts.113.w3", "model.layers.8.block_sparse_moe.experts.114.w3", "model.layers.8.block_sparse_moe.experts.115.w3", "model.layers.8.block_sparse_moe.experts.116.w3", "model.layers.8.block_sparse_moe.experts.117.w3", "model.layers.8.block_sparse_moe.experts.118.w3", "model.layers.8.block_sparse_moe.experts.119.w3", "model.layers.8.block_sparse_moe.experts.120.w3", "model.layers.8.block_sparse_moe.experts.121.w3", "model.layers.8.block_sparse_moe.experts.122.w3", "model.layers.8.block_sparse_moe.experts.123.w3", "model.layers.8.block_sparse_moe.experts.124.w3", "model.layers.8.block_sparse_moe.experts.125.w3", "model.layers.8.block_sparse_moe.experts.126.w3", "model.layers.8.block_sparse_moe.experts.127.w3", "model.layers.8.block_sparse_moe.experts.128.w3", "model.layers.8.block_sparse_moe.experts.129.w3", "model.layers.8.block_sparse_moe.experts.130.w3", "model.layers.8.block_sparse_moe.experts.131.w3", "model.layers.8.block_sparse_moe.experts.132.w3", "model.layers.8.block_sparse_moe.experts.133.w3", "model.layers.8.block_sparse_moe.experts.134.w3", "model.layers.8.block_sparse_moe.experts.135.w3", "model.layers.8.block_sparse_moe.experts.136.w3", "model.layers.8.block_sparse_moe.experts.137.w3", "model.layers.8.block_sparse_moe.experts.138.w3", "model.layers.8.block_sparse_moe.experts.139.w3", "model.layers.8.block_sparse_moe.experts.140.w3", "model.layers.8.block_sparse_moe.experts.141.w3", "model.layers.8.block_sparse_moe.experts.142.w3", "model.layers.8.block_sparse_moe.experts.143.w3", "model.layers.8.block_sparse_moe.experts.144.w3", "model.layers.8.block_sparse_moe.experts.145.w3", "model.layers.8.block_sparse_moe.experts.146.w3", "model.layers.8.block_sparse_moe.experts.147.w3", "model.layers.8.block_sparse_moe.experts.148.w3", "model.layers.8.block_sparse_moe.experts.149.w3", "model.layers.8.block_sparse_moe.experts.150.w3", "model.layers.8.block_sparse_moe.experts.151.w3", "model.layers.8.block_sparse_moe.experts.152.w3", "model.layers.8.block_sparse_moe.experts.153.w3", "model.layers.8.block_sparse_moe.experts.154.w3", "model.layers.8.block_sparse_moe.experts.155.w3", "model.layers.8.block_sparse_moe.experts.156.w3", "model.layers.8.block_sparse_moe.experts.157.w3", "model.layers.8.block_sparse_moe.experts.158.w3", "model.layers.8.block_sparse_moe.experts.159.w3", "model.layers.8.block_sparse_moe.experts.160.w3", "model.layers.8.block_sparse_moe.experts.161.w3", "model.layers.8.block_sparse_moe.experts.162.w3", "model.layers.8.block_sparse_moe.experts.163.w3", "model.layers.8.block_sparse_moe.experts.164.w3", "model.layers.8.block_sparse_moe.experts.165.w3", "model.layers.8.block_sparse_moe.experts.166.w3", "model.layers.8.block_sparse_moe.experts.167.w3", "model.layers.8.block_sparse_moe.experts.168.w3", "model.layers.8.block_sparse_moe.experts.169.w3", "model.layers.8.block_sparse_moe.experts.170.w3", "model.layers.8.block_sparse_moe.experts.171.w3", "model.layers.8.block_sparse_moe.experts.172.w3", "model.layers.8.block_sparse_moe.experts.173.w3", "model.layers.8.block_sparse_moe.experts.174.w3", "model.layers.8.block_sparse_moe.experts.175.w3", "model.layers.8.block_sparse_moe.experts.176.w3", "model.layers.8.block_sparse_moe.experts.177.w3", "model.layers.8.block_sparse_moe.experts.178.w3", "model.layers.8.block_sparse_moe.experts.179.w3", "model.layers.8.block_sparse_moe.experts.180.w3", "model.layers.8.block_sparse_moe.experts.181.w3", "model.layers.8.block_sparse_moe.experts.182.w3", "model.layers.8.block_sparse_moe.experts.183.w3", "model.layers.8.block_sparse_moe.experts.184.w3", "model.layers.8.block_sparse_moe.experts.185.w3", "model.layers.8.block_sparse_moe.experts.186.w3", "model.layers.8.block_sparse_moe.experts.187.w3", "model.layers.8.block_sparse_moe.experts.188.w3", "model.layers.8.block_sparse_moe.experts.189.w3", "model.layers.8.block_sparse_moe.experts.190.w3", "model.layers.8.block_sparse_moe.experts.191.w3", "model.layers.8.block_sparse_moe.experts.192.w3", "model.layers.8.block_sparse_moe.experts.193.w3", "model.layers.8.block_sparse_moe.experts.194.w3", "model.layers.8.block_sparse_moe.experts.195.w3", "model.layers.8.block_sparse_moe.experts.196.w3", "model.layers.8.block_sparse_moe.experts.197.w3", "model.layers.8.block_sparse_moe.experts.198.w3", "model.layers.8.block_sparse_moe.experts.199.w3", "model.layers.8.block_sparse_moe.experts.200.w3", "model.layers.8.block_sparse_moe.experts.201.w3", "model.layers.8.block_sparse_moe.experts.202.w3", "model.layers.8.block_sparse_moe.experts.203.w3", "model.layers.8.block_sparse_moe.experts.204.w3", "model.layers.8.block_sparse_moe.experts.205.w3", "model.layers.8.block_sparse_moe.experts.206.w3", "model.layers.8.block_sparse_moe.experts.207.w3", "model.layers.8.block_sparse_moe.experts.208.w3", "model.layers.8.block_sparse_moe.experts.209.w3", "model.layers.8.block_sparse_moe.experts.210.w3", "model.layers.8.block_sparse_moe.experts.211.w3", "model.layers.8.block_sparse_moe.experts.212.w3", "model.layers.8.block_sparse_moe.experts.213.w3", "model.layers.8.block_sparse_moe.experts.214.w3", "model.layers.8.block_sparse_moe.experts.215.w3", "model.layers.8.block_sparse_moe.experts.216.w3", "model.layers.8.block_sparse_moe.experts.217.w3", "model.layers.8.block_sparse_moe.experts.218.w3", "model.layers.8.block_sparse_moe.experts.219.w3", "model.layers.8.block_sparse_moe.experts.220.w3", "model.layers.8.block_sparse_moe.experts.221.w3", "model.layers.8.block_sparse_moe.experts.222.w3", "model.layers.8.block_sparse_moe.experts.223.w3", "model.layers.8.block_sparse_moe.experts.224.w3", "model.layers.8.block_sparse_moe.experts.225.w3", "model.layers.8.block_sparse_moe.experts.226.w3", "model.layers.8.block_sparse_moe.experts.227.w3", "model.layers.8.block_sparse_moe.experts.228.w3", "model.layers.8.block_sparse_moe.experts.229.w3", "model.layers.8.block_sparse_moe.experts.230.w3", "model.layers.8.block_sparse_moe.experts.231.w3", "model.layers.8.block_sparse_moe.experts.232.w3", "model.layers.8.block_sparse_moe.experts.233.w3", "model.layers.8.block_sparse_moe.experts.234.w3", "model.layers.8.block_sparse_moe.experts.235.w3", "model.layers.8.block_sparse_moe.experts.236.w3", "model.layers.8.block_sparse_moe.experts.237.w3", "model.layers.8.block_sparse_moe.experts.238.w3", "model.layers.8.block_sparse_moe.experts.239.w3", "model.layers.8.block_sparse_moe.experts.240.w3", "model.layers.8.block_sparse_moe.experts.241.w3", "model.layers.8.block_sparse_moe.experts.242.w3", "model.layers.8.block_sparse_moe.experts.243.w3", "model.layers.8.block_sparse_moe.experts.244.w3", "model.layers.8.block_sparse_moe.experts.245.w3", "model.layers.8.block_sparse_moe.experts.246.w3", "model.layers.8.block_sparse_moe.experts.247.w3", "model.layers.8.block_sparse_moe.experts.248.w3", "model.layers.8.block_sparse_moe.experts.249.w3", "model.layers.8.block_sparse_moe.experts.250.w3", "model.layers.8.block_sparse_moe.experts.251.w3", "model.layers.8.block_sparse_moe.experts.252.w3", "model.layers.8.block_sparse_moe.experts.253.w3", "model.layers.8.block_sparse_moe.experts.254.w3", "model.layers.8.block_sparse_moe.experts.255.w3", "model.layers.8.block_sparse_moe.experts.0.w2", "model.layers.8.block_sparse_moe.experts.1.w2", "model.layers.8.block_sparse_moe.experts.2.w2", "model.layers.8.block_sparse_moe.experts.3.w2", "model.layers.8.block_sparse_moe.experts.4.w2", "model.layers.8.block_sparse_moe.experts.5.w2", "model.layers.8.block_sparse_moe.experts.6.w2", "model.layers.8.block_sparse_moe.experts.7.w2", "model.layers.8.block_sparse_moe.experts.8.w2", "model.layers.8.block_sparse_moe.experts.9.w2", "model.layers.8.block_sparse_moe.experts.10.w2", "model.layers.8.block_sparse_moe.experts.11.w2", "model.layers.8.block_sparse_moe.experts.12.w2", "model.layers.8.block_sparse_moe.experts.13.w2", "model.layers.8.block_sparse_moe.experts.14.w2", "model.layers.8.block_sparse_moe.experts.15.w2", "model.layers.8.block_sparse_moe.experts.16.w2", "model.layers.8.block_sparse_moe.experts.17.w2", "model.layers.8.block_sparse_moe.experts.18.w2", "model.layers.8.block_sparse_moe.experts.19.w2", "model.layers.8.block_sparse_moe.experts.20.w2", "model.layers.8.block_sparse_moe.experts.21.w2", "model.layers.8.block_sparse_moe.experts.22.w2", "model.layers.8.block_sparse_moe.experts.23.w2", "model.layers.8.block_sparse_moe.experts.24.w2", "model.layers.8.block_sparse_moe.experts.25.w2", "model.layers.8.block_sparse_moe.experts.26.w2", "model.layers.8.block_sparse_moe.experts.27.w2", "model.layers.8.block_sparse_moe.experts.28.w2", "model.layers.8.block_sparse_moe.experts.29.w2", "model.layers.8.block_sparse_moe.experts.30.w2", "model.layers.8.block_sparse_moe.experts.31.w2", "model.layers.8.block_sparse_moe.experts.32.w2", "model.layers.8.block_sparse_moe.experts.33.w2", "model.layers.8.block_sparse_moe.experts.34.w2", "model.layers.8.block_sparse_moe.experts.35.w2", "model.layers.8.block_sparse_moe.experts.36.w2", "model.layers.8.block_sparse_moe.experts.37.w2", "model.layers.8.block_sparse_moe.experts.38.w2", "model.layers.8.block_sparse_moe.experts.39.w2", "model.layers.8.block_sparse_moe.experts.40.w2", "model.layers.8.block_sparse_moe.experts.41.w2", "model.layers.8.block_sparse_moe.experts.42.w2", "model.layers.8.block_sparse_moe.experts.43.w2", "model.layers.8.block_sparse_moe.experts.44.w2", "model.layers.8.block_sparse_moe.experts.45.w2", "model.layers.8.block_sparse_moe.experts.46.w2", "model.layers.8.block_sparse_moe.experts.47.w2", "model.layers.8.block_sparse_moe.experts.48.w2", "model.layers.8.block_sparse_moe.experts.49.w2", "model.layers.8.block_sparse_moe.experts.50.w2", "model.layers.8.block_sparse_moe.experts.51.w2", "model.layers.8.block_sparse_moe.experts.52.w2", "model.layers.8.block_sparse_moe.experts.53.w2", "model.layers.8.block_sparse_moe.experts.54.w2", "model.layers.8.block_sparse_moe.experts.55.w2", "model.layers.8.block_sparse_moe.experts.56.w2", "model.layers.8.block_sparse_moe.experts.57.w2", "model.layers.8.block_sparse_moe.experts.58.w2", "model.layers.8.block_sparse_moe.experts.59.w2", "model.layers.8.block_sparse_moe.experts.60.w2", "model.layers.8.block_sparse_moe.experts.61.w2", "model.layers.8.block_sparse_moe.experts.62.w2", "model.layers.8.block_sparse_moe.experts.63.w2", "model.layers.8.block_sparse_moe.experts.64.w2", "model.layers.8.block_sparse_moe.experts.65.w2", "model.layers.8.block_sparse_moe.experts.66.w2", "model.layers.8.block_sparse_moe.experts.67.w2", "model.layers.8.block_sparse_moe.experts.68.w2", "model.layers.8.block_sparse_moe.experts.69.w2", "model.layers.8.block_sparse_moe.experts.70.w2", "model.layers.8.block_sparse_moe.experts.71.w2", "model.layers.8.block_sparse_moe.experts.72.w2", "model.layers.8.block_sparse_moe.experts.73.w2", "model.layers.8.block_sparse_moe.experts.74.w2", "model.layers.8.block_sparse_moe.experts.75.w2", "model.layers.8.block_sparse_moe.experts.76.w2", "model.layers.8.block_sparse_moe.experts.77.w2", "model.layers.8.block_sparse_moe.experts.78.w2", "model.layers.8.block_sparse_moe.experts.79.w2", "model.layers.8.block_sparse_moe.experts.80.w2", "model.layers.8.block_sparse_moe.experts.81.w2", "model.layers.8.block_sparse_moe.experts.82.w2", "model.layers.8.block_sparse_moe.experts.83.w2", "model.layers.8.block_sparse_moe.experts.84.w2", "model.layers.8.block_sparse_moe.experts.85.w2", "model.layers.8.block_sparse_moe.experts.86.w2", "model.layers.8.block_sparse_moe.experts.87.w2", "model.layers.8.block_sparse_moe.experts.88.w2", "model.layers.8.block_sparse_moe.experts.89.w2", "model.layers.8.block_sparse_moe.experts.90.w2", "model.layers.8.block_sparse_moe.experts.91.w2", "model.layers.8.block_sparse_moe.experts.92.w2", "model.layers.8.block_sparse_moe.experts.93.w2", "model.layers.8.block_sparse_moe.experts.94.w2", "model.layers.8.block_sparse_moe.experts.95.w2", "model.layers.8.block_sparse_moe.experts.96.w2", "model.layers.8.block_sparse_moe.experts.97.w2", "model.layers.8.block_sparse_moe.experts.98.w2", "model.layers.8.block_sparse_moe.experts.99.w2", "model.layers.8.block_sparse_moe.experts.100.w2", "model.layers.8.block_sparse_moe.experts.101.w2", "model.layers.8.block_sparse_moe.experts.102.w2", "model.layers.8.block_sparse_moe.experts.103.w2", "model.layers.8.block_sparse_moe.experts.104.w2", "model.layers.8.block_sparse_moe.experts.105.w2", "model.layers.8.block_sparse_moe.experts.106.w2", "model.layers.8.block_sparse_moe.experts.107.w2", "model.layers.8.block_sparse_moe.experts.108.w2", "model.layers.8.block_sparse_moe.experts.109.w2", "model.layers.8.block_sparse_moe.experts.110.w2", "model.layers.8.block_sparse_moe.experts.111.w2", "model.layers.8.block_sparse_moe.experts.112.w2", "model.layers.8.block_sparse_moe.experts.113.w2", "model.layers.8.block_sparse_moe.experts.114.w2", "model.layers.8.block_sparse_moe.experts.115.w2", "model.layers.8.block_sparse_moe.experts.116.w2", "model.layers.8.block_sparse_moe.experts.117.w2", "model.layers.8.block_sparse_moe.experts.118.w2", "model.layers.8.block_sparse_moe.experts.119.w2", "model.layers.8.block_sparse_moe.experts.120.w2", "model.layers.8.block_sparse_moe.experts.121.w2", "model.layers.8.block_sparse_moe.experts.122.w2", "model.layers.8.block_sparse_moe.experts.123.w2", "model.layers.8.block_sparse_moe.experts.124.w2", "model.layers.8.block_sparse_moe.experts.125.w2", "model.layers.8.block_sparse_moe.experts.126.w2", "model.layers.8.block_sparse_moe.experts.127.w2", "model.layers.8.block_sparse_moe.experts.128.w2", "model.layers.8.block_sparse_moe.experts.129.w2", "model.layers.8.block_sparse_moe.experts.130.w2", "model.layers.8.block_sparse_moe.experts.131.w2", "model.layers.8.block_sparse_moe.experts.132.w2", "model.layers.8.block_sparse_moe.experts.133.w2", "model.layers.8.block_sparse_moe.experts.134.w2", "model.layers.8.block_sparse_moe.experts.135.w2", "model.layers.8.block_sparse_moe.experts.136.w2", "model.layers.8.block_sparse_moe.experts.137.w2", "model.layers.8.block_sparse_moe.experts.138.w2", "model.layers.8.block_sparse_moe.experts.139.w2", "model.layers.8.block_sparse_moe.experts.140.w2", "model.layers.8.block_sparse_moe.experts.141.w2", "model.layers.8.block_sparse_moe.experts.142.w2", "model.layers.8.block_sparse_moe.experts.143.w2", "model.layers.8.block_sparse_moe.experts.144.w2", "model.layers.8.block_sparse_moe.experts.145.w2", "model.layers.8.block_sparse_moe.experts.146.w2", "model.layers.8.block_sparse_moe.experts.147.w2", "model.layers.8.block_sparse_moe.experts.148.w2", "model.layers.8.block_sparse_moe.experts.149.w2", "model.layers.8.block_sparse_moe.experts.150.w2", "model.layers.8.block_sparse_moe.experts.151.w2", "model.layers.8.block_sparse_moe.experts.152.w2", "model.layers.8.block_sparse_moe.experts.153.w2", "model.layers.8.block_sparse_moe.experts.154.w2", "model.layers.8.block_sparse_moe.experts.155.w2", "model.layers.8.block_sparse_moe.experts.156.w2", "model.layers.8.block_sparse_moe.experts.157.w2", "model.layers.8.block_sparse_moe.experts.158.w2", "model.layers.8.block_sparse_moe.experts.159.w2", "model.layers.8.block_sparse_moe.experts.160.w2", "model.layers.8.block_sparse_moe.experts.161.w2", "model.layers.8.block_sparse_moe.experts.162.w2", "model.layers.8.block_sparse_moe.experts.163.w2", "model.layers.8.block_sparse_moe.experts.164.w2", "model.layers.8.block_sparse_moe.experts.165.w2", "model.layers.8.block_sparse_moe.experts.166.w2", "model.layers.8.block_sparse_moe.experts.167.w2", "model.layers.8.block_sparse_moe.experts.168.w2", "model.layers.8.block_sparse_moe.experts.169.w2", "model.layers.8.block_sparse_moe.experts.170.w2", "model.layers.8.block_sparse_moe.experts.171.w2", "model.layers.8.block_sparse_moe.experts.172.w2", "model.layers.8.block_sparse_moe.experts.173.w2", "model.layers.8.block_sparse_moe.experts.174.w2", "model.layers.8.block_sparse_moe.experts.175.w2", "model.layers.8.block_sparse_moe.experts.176.w2", "model.layers.8.block_sparse_moe.experts.177.w2", "model.layers.8.block_sparse_moe.experts.178.w2", "model.layers.8.block_sparse_moe.experts.179.w2", "model.layers.8.block_sparse_moe.experts.180.w2", "model.layers.8.block_sparse_moe.experts.181.w2", "model.layers.8.block_sparse_moe.experts.182.w2", "model.layers.8.block_sparse_moe.experts.183.w2", "model.layers.8.block_sparse_moe.experts.184.w2", "model.layers.8.block_sparse_moe.experts.185.w2", "model.layers.8.block_sparse_moe.experts.186.w2", "model.layers.8.block_sparse_moe.experts.187.w2", "model.layers.8.block_sparse_moe.experts.188.w2", "model.layers.8.block_sparse_moe.experts.189.w2", "model.layers.8.block_sparse_moe.experts.190.w2", "model.layers.8.block_sparse_moe.experts.191.w2", "model.layers.8.block_sparse_moe.experts.192.w2", "model.layers.8.block_sparse_moe.experts.193.w2", "model.layers.8.block_sparse_moe.experts.194.w2", "model.layers.8.block_sparse_moe.experts.195.w2", "model.layers.8.block_sparse_moe.experts.196.w2", "model.layers.8.block_sparse_moe.experts.197.w2", "model.layers.8.block_sparse_moe.experts.198.w2", "model.layers.8.block_sparse_moe.experts.199.w2", "model.layers.8.block_sparse_moe.experts.200.w2", "model.layers.8.block_sparse_moe.experts.201.w2", "model.layers.8.block_sparse_moe.experts.202.w2", "model.layers.8.block_sparse_moe.experts.203.w2", "model.layers.8.block_sparse_moe.experts.204.w2", "model.layers.8.block_sparse_moe.experts.205.w2", "model.layers.8.block_sparse_moe.experts.206.w2", "model.layers.8.block_sparse_moe.experts.207.w2", "model.layers.8.block_sparse_moe.experts.208.w2", "model.layers.8.block_sparse_moe.experts.209.w2", "model.layers.8.block_sparse_moe.experts.210.w2", "model.layers.8.block_sparse_moe.experts.211.w2", "model.layers.8.block_sparse_moe.experts.212.w2", "model.layers.8.block_sparse_moe.experts.213.w2", "model.layers.8.block_sparse_moe.experts.214.w2", "model.layers.8.block_sparse_moe.experts.215.w2", "model.layers.8.block_sparse_moe.experts.216.w2", "model.layers.8.block_sparse_moe.experts.217.w2", "model.layers.8.block_sparse_moe.experts.218.w2", "model.layers.8.block_sparse_moe.experts.219.w2", "model.layers.8.block_sparse_moe.experts.220.w2", "model.layers.8.block_sparse_moe.experts.221.w2", "model.layers.8.block_sparse_moe.experts.222.w2", "model.layers.8.block_sparse_moe.experts.223.w2", "model.layers.8.block_sparse_moe.experts.224.w2", "model.layers.8.block_sparse_moe.experts.225.w2", "model.layers.8.block_sparse_moe.experts.226.w2", "model.layers.8.block_sparse_moe.experts.227.w2", "model.layers.8.block_sparse_moe.experts.228.w2", "model.layers.8.block_sparse_moe.experts.229.w2", "model.layers.8.block_sparse_moe.experts.230.w2", "model.layers.8.block_sparse_moe.experts.231.w2", "model.layers.8.block_sparse_moe.experts.232.w2", "model.layers.8.block_sparse_moe.experts.233.w2", "model.layers.8.block_sparse_moe.experts.234.w2", "model.layers.8.block_sparse_moe.experts.235.w2", "model.layers.8.block_sparse_moe.experts.236.w2", "model.layers.8.block_sparse_moe.experts.237.w2", "model.layers.8.block_sparse_moe.experts.238.w2", "model.layers.8.block_sparse_moe.experts.239.w2", "model.layers.8.block_sparse_moe.experts.240.w2", "model.layers.8.block_sparse_moe.experts.241.w2", "model.layers.8.block_sparse_moe.experts.242.w2", "model.layers.8.block_sparse_moe.experts.243.w2", "model.layers.8.block_sparse_moe.experts.244.w2", "model.layers.8.block_sparse_moe.experts.245.w2", "model.layers.8.block_sparse_moe.experts.246.w2", "model.layers.8.block_sparse_moe.experts.247.w2", "model.layers.8.block_sparse_moe.experts.248.w2", "model.layers.8.block_sparse_moe.experts.249.w2", "model.layers.8.block_sparse_moe.experts.250.w2", "model.layers.8.block_sparse_moe.experts.251.w2", "model.layers.8.block_sparse_moe.experts.252.w2", "model.layers.8.block_sparse_moe.experts.253.w2", "model.layers.8.block_sparse_moe.experts.254.w2", "model.layers.8.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00046442169696092606, "dbits": 3623878656 } ] }, { "idx": 18, "layers": [ "model.layers.9.self_attn.q_proj", "model.layers.9.self_attn.k_proj", "model.layers.9.self_attn.v_proj", "model.layers.9.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0022327551618218644, "dbits": 44040192 } ] }, { "idx": 19, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w1", "model.layers.9.block_sparse_moe.experts.1.w1", "model.layers.9.block_sparse_moe.experts.2.w1", "model.layers.9.block_sparse_moe.experts.3.w1", "model.layers.9.block_sparse_moe.experts.4.w1", "model.layers.9.block_sparse_moe.experts.5.w1", "model.layers.9.block_sparse_moe.experts.6.w1", "model.layers.9.block_sparse_moe.experts.7.w1", "model.layers.9.block_sparse_moe.experts.8.w1", "model.layers.9.block_sparse_moe.experts.9.w1", "model.layers.9.block_sparse_moe.experts.10.w1", "model.layers.9.block_sparse_moe.experts.11.w1", "model.layers.9.block_sparse_moe.experts.12.w1", "model.layers.9.block_sparse_moe.experts.13.w1", "model.layers.9.block_sparse_moe.experts.14.w1", "model.layers.9.block_sparse_moe.experts.15.w1", "model.layers.9.block_sparse_moe.experts.16.w1", "model.layers.9.block_sparse_moe.experts.17.w1", "model.layers.9.block_sparse_moe.experts.18.w1", "model.layers.9.block_sparse_moe.experts.19.w1", "model.layers.9.block_sparse_moe.experts.20.w1", "model.layers.9.block_sparse_moe.experts.21.w1", "model.layers.9.block_sparse_moe.experts.22.w1", "model.layers.9.block_sparse_moe.experts.23.w1", "model.layers.9.block_sparse_moe.experts.24.w1", "model.layers.9.block_sparse_moe.experts.25.w1", "model.layers.9.block_sparse_moe.experts.26.w1", "model.layers.9.block_sparse_moe.experts.27.w1", "model.layers.9.block_sparse_moe.experts.28.w1", "model.layers.9.block_sparse_moe.experts.29.w1", "model.layers.9.block_sparse_moe.experts.30.w1", "model.layers.9.block_sparse_moe.experts.31.w1", "model.layers.9.block_sparse_moe.experts.32.w1", "model.layers.9.block_sparse_moe.experts.33.w1", "model.layers.9.block_sparse_moe.experts.34.w1", "model.layers.9.block_sparse_moe.experts.35.w1", "model.layers.9.block_sparse_moe.experts.36.w1", "model.layers.9.block_sparse_moe.experts.37.w1", "model.layers.9.block_sparse_moe.experts.38.w1", "model.layers.9.block_sparse_moe.experts.39.w1", "model.layers.9.block_sparse_moe.experts.40.w1", "model.layers.9.block_sparse_moe.experts.41.w1", "model.layers.9.block_sparse_moe.experts.42.w1", "model.layers.9.block_sparse_moe.experts.43.w1", "model.layers.9.block_sparse_moe.experts.44.w1", "model.layers.9.block_sparse_moe.experts.45.w1", "model.layers.9.block_sparse_moe.experts.46.w1", "model.layers.9.block_sparse_moe.experts.47.w1", "model.layers.9.block_sparse_moe.experts.48.w1", "model.layers.9.block_sparse_moe.experts.49.w1", "model.layers.9.block_sparse_moe.experts.50.w1", "model.layers.9.block_sparse_moe.experts.51.w1", "model.layers.9.block_sparse_moe.experts.52.w1", "model.layers.9.block_sparse_moe.experts.53.w1", "model.layers.9.block_sparse_moe.experts.54.w1", "model.layers.9.block_sparse_moe.experts.55.w1", "model.layers.9.block_sparse_moe.experts.56.w1", "model.layers.9.block_sparse_moe.experts.57.w1", "model.layers.9.block_sparse_moe.experts.58.w1", "model.layers.9.block_sparse_moe.experts.59.w1", "model.layers.9.block_sparse_moe.experts.60.w1", "model.layers.9.block_sparse_moe.experts.61.w1", "model.layers.9.block_sparse_moe.experts.62.w1", "model.layers.9.block_sparse_moe.experts.63.w1", "model.layers.9.block_sparse_moe.experts.64.w1", "model.layers.9.block_sparse_moe.experts.65.w1", "model.layers.9.block_sparse_moe.experts.66.w1", "model.layers.9.block_sparse_moe.experts.67.w1", "model.layers.9.block_sparse_moe.experts.68.w1", "model.layers.9.block_sparse_moe.experts.69.w1", "model.layers.9.block_sparse_moe.experts.70.w1", "model.layers.9.block_sparse_moe.experts.71.w1", "model.layers.9.block_sparse_moe.experts.72.w1", "model.layers.9.block_sparse_moe.experts.73.w1", "model.layers.9.block_sparse_moe.experts.74.w1", "model.layers.9.block_sparse_moe.experts.75.w1", "model.layers.9.block_sparse_moe.experts.76.w1", "model.layers.9.block_sparse_moe.experts.77.w1", "model.layers.9.block_sparse_moe.experts.78.w1", "model.layers.9.block_sparse_moe.experts.79.w1", "model.layers.9.block_sparse_moe.experts.80.w1", "model.layers.9.block_sparse_moe.experts.81.w1", "model.layers.9.block_sparse_moe.experts.82.w1", "model.layers.9.block_sparse_moe.experts.83.w1", "model.layers.9.block_sparse_moe.experts.84.w1", "model.layers.9.block_sparse_moe.experts.85.w1", "model.layers.9.block_sparse_moe.experts.86.w1", "model.layers.9.block_sparse_moe.experts.87.w1", "model.layers.9.block_sparse_moe.experts.88.w1", "model.layers.9.block_sparse_moe.experts.89.w1", "model.layers.9.block_sparse_moe.experts.90.w1", "model.layers.9.block_sparse_moe.experts.91.w1", "model.layers.9.block_sparse_moe.experts.92.w1", "model.layers.9.block_sparse_moe.experts.93.w1", "model.layers.9.block_sparse_moe.experts.94.w1", "model.layers.9.block_sparse_moe.experts.95.w1", "model.layers.9.block_sparse_moe.experts.96.w1", "model.layers.9.block_sparse_moe.experts.97.w1", "model.layers.9.block_sparse_moe.experts.98.w1", "model.layers.9.block_sparse_moe.experts.99.w1", "model.layers.9.block_sparse_moe.experts.100.w1", "model.layers.9.block_sparse_moe.experts.101.w1", "model.layers.9.block_sparse_moe.experts.102.w1", "model.layers.9.block_sparse_moe.experts.103.w1", "model.layers.9.block_sparse_moe.experts.104.w1", "model.layers.9.block_sparse_moe.experts.105.w1", "model.layers.9.block_sparse_moe.experts.106.w1", "model.layers.9.block_sparse_moe.experts.107.w1", "model.layers.9.block_sparse_moe.experts.108.w1", "model.layers.9.block_sparse_moe.experts.109.w1", "model.layers.9.block_sparse_moe.experts.110.w1", "model.layers.9.block_sparse_moe.experts.111.w1", "model.layers.9.block_sparse_moe.experts.112.w1", "model.layers.9.block_sparse_moe.experts.113.w1", "model.layers.9.block_sparse_moe.experts.114.w1", "model.layers.9.block_sparse_moe.experts.115.w1", "model.layers.9.block_sparse_moe.experts.116.w1", "model.layers.9.block_sparse_moe.experts.117.w1", "model.layers.9.block_sparse_moe.experts.118.w1", "model.layers.9.block_sparse_moe.experts.119.w1", "model.layers.9.block_sparse_moe.experts.120.w1", "model.layers.9.block_sparse_moe.experts.121.w1", "model.layers.9.block_sparse_moe.experts.122.w1", "model.layers.9.block_sparse_moe.experts.123.w1", "model.layers.9.block_sparse_moe.experts.124.w1", "model.layers.9.block_sparse_moe.experts.125.w1", "model.layers.9.block_sparse_moe.experts.126.w1", "model.layers.9.block_sparse_moe.experts.127.w1", "model.layers.9.block_sparse_moe.experts.128.w1", "model.layers.9.block_sparse_moe.experts.129.w1", "model.layers.9.block_sparse_moe.experts.130.w1", "model.layers.9.block_sparse_moe.experts.131.w1", "model.layers.9.block_sparse_moe.experts.132.w1", "model.layers.9.block_sparse_moe.experts.133.w1", "model.layers.9.block_sparse_moe.experts.134.w1", "model.layers.9.block_sparse_moe.experts.135.w1", "model.layers.9.block_sparse_moe.experts.136.w1", "model.layers.9.block_sparse_moe.experts.137.w1", "model.layers.9.block_sparse_moe.experts.138.w1", "model.layers.9.block_sparse_moe.experts.139.w1", "model.layers.9.block_sparse_moe.experts.140.w1", "model.layers.9.block_sparse_moe.experts.141.w1", "model.layers.9.block_sparse_moe.experts.142.w1", "model.layers.9.block_sparse_moe.experts.143.w1", "model.layers.9.block_sparse_moe.experts.144.w1", "model.layers.9.block_sparse_moe.experts.145.w1", "model.layers.9.block_sparse_moe.experts.146.w1", "model.layers.9.block_sparse_moe.experts.147.w1", "model.layers.9.block_sparse_moe.experts.148.w1", "model.layers.9.block_sparse_moe.experts.149.w1", "model.layers.9.block_sparse_moe.experts.150.w1", "model.layers.9.block_sparse_moe.experts.151.w1", "model.layers.9.block_sparse_moe.experts.152.w1", "model.layers.9.block_sparse_moe.experts.153.w1", "model.layers.9.block_sparse_moe.experts.154.w1", "model.layers.9.block_sparse_moe.experts.155.w1", "model.layers.9.block_sparse_moe.experts.156.w1", "model.layers.9.block_sparse_moe.experts.157.w1", "model.layers.9.block_sparse_moe.experts.158.w1", "model.layers.9.block_sparse_moe.experts.159.w1", "model.layers.9.block_sparse_moe.experts.160.w1", "model.layers.9.block_sparse_moe.experts.161.w1", "model.layers.9.block_sparse_moe.experts.162.w1", "model.layers.9.block_sparse_moe.experts.163.w1", "model.layers.9.block_sparse_moe.experts.164.w1", "model.layers.9.block_sparse_moe.experts.165.w1", "model.layers.9.block_sparse_moe.experts.166.w1", "model.layers.9.block_sparse_moe.experts.167.w1", "model.layers.9.block_sparse_moe.experts.168.w1", "model.layers.9.block_sparse_moe.experts.169.w1", "model.layers.9.block_sparse_moe.experts.170.w1", "model.layers.9.block_sparse_moe.experts.171.w1", "model.layers.9.block_sparse_moe.experts.172.w1", "model.layers.9.block_sparse_moe.experts.173.w1", "model.layers.9.block_sparse_moe.experts.174.w1", "model.layers.9.block_sparse_moe.experts.175.w1", "model.layers.9.block_sparse_moe.experts.176.w1", "model.layers.9.block_sparse_moe.experts.177.w1", "model.layers.9.block_sparse_moe.experts.178.w1", "model.layers.9.block_sparse_moe.experts.179.w1", "model.layers.9.block_sparse_moe.experts.180.w1", "model.layers.9.block_sparse_moe.experts.181.w1", "model.layers.9.block_sparse_moe.experts.182.w1", "model.layers.9.block_sparse_moe.experts.183.w1", "model.layers.9.block_sparse_moe.experts.184.w1", "model.layers.9.block_sparse_moe.experts.185.w1", "model.layers.9.block_sparse_moe.experts.186.w1", "model.layers.9.block_sparse_moe.experts.187.w1", "model.layers.9.block_sparse_moe.experts.188.w1", "model.layers.9.block_sparse_moe.experts.189.w1", "model.layers.9.block_sparse_moe.experts.190.w1", "model.layers.9.block_sparse_moe.experts.191.w1", "model.layers.9.block_sparse_moe.experts.192.w1", "model.layers.9.block_sparse_moe.experts.193.w1", "model.layers.9.block_sparse_moe.experts.194.w1", "model.layers.9.block_sparse_moe.experts.195.w1", "model.layers.9.block_sparse_moe.experts.196.w1", "model.layers.9.block_sparse_moe.experts.197.w1", "model.layers.9.block_sparse_moe.experts.198.w1", "model.layers.9.block_sparse_moe.experts.199.w1", "model.layers.9.block_sparse_moe.experts.200.w1", "model.layers.9.block_sparse_moe.experts.201.w1", "model.layers.9.block_sparse_moe.experts.202.w1", "model.layers.9.block_sparse_moe.experts.203.w1", "model.layers.9.block_sparse_moe.experts.204.w1", "model.layers.9.block_sparse_moe.experts.205.w1", "model.layers.9.block_sparse_moe.experts.206.w1", "model.layers.9.block_sparse_moe.experts.207.w1", "model.layers.9.block_sparse_moe.experts.208.w1", "model.layers.9.block_sparse_moe.experts.209.w1", "model.layers.9.block_sparse_moe.experts.210.w1", "model.layers.9.block_sparse_moe.experts.211.w1", "model.layers.9.block_sparse_moe.experts.212.w1", "model.layers.9.block_sparse_moe.experts.213.w1", "model.layers.9.block_sparse_moe.experts.214.w1", "model.layers.9.block_sparse_moe.experts.215.w1", "model.layers.9.block_sparse_moe.experts.216.w1", "model.layers.9.block_sparse_moe.experts.217.w1", "model.layers.9.block_sparse_moe.experts.218.w1", "model.layers.9.block_sparse_moe.experts.219.w1", "model.layers.9.block_sparse_moe.experts.220.w1", "model.layers.9.block_sparse_moe.experts.221.w1", "model.layers.9.block_sparse_moe.experts.222.w1", "model.layers.9.block_sparse_moe.experts.223.w1", "model.layers.9.block_sparse_moe.experts.224.w1", "model.layers.9.block_sparse_moe.experts.225.w1", "model.layers.9.block_sparse_moe.experts.226.w1", "model.layers.9.block_sparse_moe.experts.227.w1", "model.layers.9.block_sparse_moe.experts.228.w1", "model.layers.9.block_sparse_moe.experts.229.w1", "model.layers.9.block_sparse_moe.experts.230.w1", "model.layers.9.block_sparse_moe.experts.231.w1", "model.layers.9.block_sparse_moe.experts.232.w1", "model.layers.9.block_sparse_moe.experts.233.w1", "model.layers.9.block_sparse_moe.experts.234.w1", "model.layers.9.block_sparse_moe.experts.235.w1", "model.layers.9.block_sparse_moe.experts.236.w1", "model.layers.9.block_sparse_moe.experts.237.w1", "model.layers.9.block_sparse_moe.experts.238.w1", "model.layers.9.block_sparse_moe.experts.239.w1", "model.layers.9.block_sparse_moe.experts.240.w1", "model.layers.9.block_sparse_moe.experts.241.w1", "model.layers.9.block_sparse_moe.experts.242.w1", "model.layers.9.block_sparse_moe.experts.243.w1", "model.layers.9.block_sparse_moe.experts.244.w1", "model.layers.9.block_sparse_moe.experts.245.w1", "model.layers.9.block_sparse_moe.experts.246.w1", "model.layers.9.block_sparse_moe.experts.247.w1", "model.layers.9.block_sparse_moe.experts.248.w1", "model.layers.9.block_sparse_moe.experts.249.w1", "model.layers.9.block_sparse_moe.experts.250.w1", "model.layers.9.block_sparse_moe.experts.251.w1", "model.layers.9.block_sparse_moe.experts.252.w1", "model.layers.9.block_sparse_moe.experts.253.w1", "model.layers.9.block_sparse_moe.experts.254.w1", "model.layers.9.block_sparse_moe.experts.255.w1", "model.layers.9.block_sparse_moe.experts.0.w3", "model.layers.9.block_sparse_moe.experts.1.w3", "model.layers.9.block_sparse_moe.experts.2.w3", "model.layers.9.block_sparse_moe.experts.3.w3", "model.layers.9.block_sparse_moe.experts.4.w3", "model.layers.9.block_sparse_moe.experts.5.w3", "model.layers.9.block_sparse_moe.experts.6.w3", "model.layers.9.block_sparse_moe.experts.7.w3", "model.layers.9.block_sparse_moe.experts.8.w3", "model.layers.9.block_sparse_moe.experts.9.w3", "model.layers.9.block_sparse_moe.experts.10.w3", "model.layers.9.block_sparse_moe.experts.11.w3", "model.layers.9.block_sparse_moe.experts.12.w3", "model.layers.9.block_sparse_moe.experts.13.w3", "model.layers.9.block_sparse_moe.experts.14.w3", "model.layers.9.block_sparse_moe.experts.15.w3", "model.layers.9.block_sparse_moe.experts.16.w3", "model.layers.9.block_sparse_moe.experts.17.w3", "model.layers.9.block_sparse_moe.experts.18.w3", "model.layers.9.block_sparse_moe.experts.19.w3", "model.layers.9.block_sparse_moe.experts.20.w3", "model.layers.9.block_sparse_moe.experts.21.w3", "model.layers.9.block_sparse_moe.experts.22.w3", "model.layers.9.block_sparse_moe.experts.23.w3", "model.layers.9.block_sparse_moe.experts.24.w3", "model.layers.9.block_sparse_moe.experts.25.w3", "model.layers.9.block_sparse_moe.experts.26.w3", "model.layers.9.block_sparse_moe.experts.27.w3", "model.layers.9.block_sparse_moe.experts.28.w3", "model.layers.9.block_sparse_moe.experts.29.w3", "model.layers.9.block_sparse_moe.experts.30.w3", "model.layers.9.block_sparse_moe.experts.31.w3", "model.layers.9.block_sparse_moe.experts.32.w3", "model.layers.9.block_sparse_moe.experts.33.w3", "model.layers.9.block_sparse_moe.experts.34.w3", "model.layers.9.block_sparse_moe.experts.35.w3", "model.layers.9.block_sparse_moe.experts.36.w3", "model.layers.9.block_sparse_moe.experts.37.w3", "model.layers.9.block_sparse_moe.experts.38.w3", "model.layers.9.block_sparse_moe.experts.39.w3", "model.layers.9.block_sparse_moe.experts.40.w3", "model.layers.9.block_sparse_moe.experts.41.w3", "model.layers.9.block_sparse_moe.experts.42.w3", "model.layers.9.block_sparse_moe.experts.43.w3", "model.layers.9.block_sparse_moe.experts.44.w3", "model.layers.9.block_sparse_moe.experts.45.w3", "model.layers.9.block_sparse_moe.experts.46.w3", "model.layers.9.block_sparse_moe.experts.47.w3", "model.layers.9.block_sparse_moe.experts.48.w3", "model.layers.9.block_sparse_moe.experts.49.w3", "model.layers.9.block_sparse_moe.experts.50.w3", "model.layers.9.block_sparse_moe.experts.51.w3", "model.layers.9.block_sparse_moe.experts.52.w3", "model.layers.9.block_sparse_moe.experts.53.w3", "model.layers.9.block_sparse_moe.experts.54.w3", "model.layers.9.block_sparse_moe.experts.55.w3", "model.layers.9.block_sparse_moe.experts.56.w3", "model.layers.9.block_sparse_moe.experts.57.w3", "model.layers.9.block_sparse_moe.experts.58.w3", "model.layers.9.block_sparse_moe.experts.59.w3", "model.layers.9.block_sparse_moe.experts.60.w3", "model.layers.9.block_sparse_moe.experts.61.w3", "model.layers.9.block_sparse_moe.experts.62.w3", "model.layers.9.block_sparse_moe.experts.63.w3", "model.layers.9.block_sparse_moe.experts.64.w3", "model.layers.9.block_sparse_moe.experts.65.w3", "model.layers.9.block_sparse_moe.experts.66.w3", "model.layers.9.block_sparse_moe.experts.67.w3", "model.layers.9.block_sparse_moe.experts.68.w3", "model.layers.9.block_sparse_moe.experts.69.w3", "model.layers.9.block_sparse_moe.experts.70.w3", "model.layers.9.block_sparse_moe.experts.71.w3", "model.layers.9.block_sparse_moe.experts.72.w3", "model.layers.9.block_sparse_moe.experts.73.w3", "model.layers.9.block_sparse_moe.experts.74.w3", "model.layers.9.block_sparse_moe.experts.75.w3", "model.layers.9.block_sparse_moe.experts.76.w3", "model.layers.9.block_sparse_moe.experts.77.w3", "model.layers.9.block_sparse_moe.experts.78.w3", "model.layers.9.block_sparse_moe.experts.79.w3", "model.layers.9.block_sparse_moe.experts.80.w3", "model.layers.9.block_sparse_moe.experts.81.w3", "model.layers.9.block_sparse_moe.experts.82.w3", "model.layers.9.block_sparse_moe.experts.83.w3", "model.layers.9.block_sparse_moe.experts.84.w3", "model.layers.9.block_sparse_moe.experts.85.w3", "model.layers.9.block_sparse_moe.experts.86.w3", "model.layers.9.block_sparse_moe.experts.87.w3", "model.layers.9.block_sparse_moe.experts.88.w3", "model.layers.9.block_sparse_moe.experts.89.w3", "model.layers.9.block_sparse_moe.experts.90.w3", "model.layers.9.block_sparse_moe.experts.91.w3", "model.layers.9.block_sparse_moe.experts.92.w3", "model.layers.9.block_sparse_moe.experts.93.w3", "model.layers.9.block_sparse_moe.experts.94.w3", "model.layers.9.block_sparse_moe.experts.95.w3", "model.layers.9.block_sparse_moe.experts.96.w3", "model.layers.9.block_sparse_moe.experts.97.w3", "model.layers.9.block_sparse_moe.experts.98.w3", "model.layers.9.block_sparse_moe.experts.99.w3", "model.layers.9.block_sparse_moe.experts.100.w3", "model.layers.9.block_sparse_moe.experts.101.w3", "model.layers.9.block_sparse_moe.experts.102.w3", "model.layers.9.block_sparse_moe.experts.103.w3", "model.layers.9.block_sparse_moe.experts.104.w3", "model.layers.9.block_sparse_moe.experts.105.w3", "model.layers.9.block_sparse_moe.experts.106.w3", "model.layers.9.block_sparse_moe.experts.107.w3", "model.layers.9.block_sparse_moe.experts.108.w3", "model.layers.9.block_sparse_moe.experts.109.w3", "model.layers.9.block_sparse_moe.experts.110.w3", "model.layers.9.block_sparse_moe.experts.111.w3", "model.layers.9.block_sparse_moe.experts.112.w3", "model.layers.9.block_sparse_moe.experts.113.w3", "model.layers.9.block_sparse_moe.experts.114.w3", "model.layers.9.block_sparse_moe.experts.115.w3", "model.layers.9.block_sparse_moe.experts.116.w3", "model.layers.9.block_sparse_moe.experts.117.w3", "model.layers.9.block_sparse_moe.experts.118.w3", "model.layers.9.block_sparse_moe.experts.119.w3", "model.layers.9.block_sparse_moe.experts.120.w3", "model.layers.9.block_sparse_moe.experts.121.w3", "model.layers.9.block_sparse_moe.experts.122.w3", "model.layers.9.block_sparse_moe.experts.123.w3", "model.layers.9.block_sparse_moe.experts.124.w3", "model.layers.9.block_sparse_moe.experts.125.w3", "model.layers.9.block_sparse_moe.experts.126.w3", "model.layers.9.block_sparse_moe.experts.127.w3", "model.layers.9.block_sparse_moe.experts.128.w3", "model.layers.9.block_sparse_moe.experts.129.w3", "model.layers.9.block_sparse_moe.experts.130.w3", "model.layers.9.block_sparse_moe.experts.131.w3", "model.layers.9.block_sparse_moe.experts.132.w3", "model.layers.9.block_sparse_moe.experts.133.w3", "model.layers.9.block_sparse_moe.experts.134.w3", "model.layers.9.block_sparse_moe.experts.135.w3", "model.layers.9.block_sparse_moe.experts.136.w3", "model.layers.9.block_sparse_moe.experts.137.w3", "model.layers.9.block_sparse_moe.experts.138.w3", "model.layers.9.block_sparse_moe.experts.139.w3", "model.layers.9.block_sparse_moe.experts.140.w3", "model.layers.9.block_sparse_moe.experts.141.w3", "model.layers.9.block_sparse_moe.experts.142.w3", "model.layers.9.block_sparse_moe.experts.143.w3", "model.layers.9.block_sparse_moe.experts.144.w3", "model.layers.9.block_sparse_moe.experts.145.w3", "model.layers.9.block_sparse_moe.experts.146.w3", "model.layers.9.block_sparse_moe.experts.147.w3", "model.layers.9.block_sparse_moe.experts.148.w3", "model.layers.9.block_sparse_moe.experts.149.w3", "model.layers.9.block_sparse_moe.experts.150.w3", "model.layers.9.block_sparse_moe.experts.151.w3", "model.layers.9.block_sparse_moe.experts.152.w3", "model.layers.9.block_sparse_moe.experts.153.w3", "model.layers.9.block_sparse_moe.experts.154.w3", "model.layers.9.block_sparse_moe.experts.155.w3", "model.layers.9.block_sparse_moe.experts.156.w3", "model.layers.9.block_sparse_moe.experts.157.w3", "model.layers.9.block_sparse_moe.experts.158.w3", "model.layers.9.block_sparse_moe.experts.159.w3", "model.layers.9.block_sparse_moe.experts.160.w3", "model.layers.9.block_sparse_moe.experts.161.w3", "model.layers.9.block_sparse_moe.experts.162.w3", "model.layers.9.block_sparse_moe.experts.163.w3", "model.layers.9.block_sparse_moe.experts.164.w3", "model.layers.9.block_sparse_moe.experts.165.w3", "model.layers.9.block_sparse_moe.experts.166.w3", "model.layers.9.block_sparse_moe.experts.167.w3", "model.layers.9.block_sparse_moe.experts.168.w3", "model.layers.9.block_sparse_moe.experts.169.w3", "model.layers.9.block_sparse_moe.experts.170.w3", "model.layers.9.block_sparse_moe.experts.171.w3", "model.layers.9.block_sparse_moe.experts.172.w3", "model.layers.9.block_sparse_moe.experts.173.w3", "model.layers.9.block_sparse_moe.experts.174.w3", "model.layers.9.block_sparse_moe.experts.175.w3", "model.layers.9.block_sparse_moe.experts.176.w3", "model.layers.9.block_sparse_moe.experts.177.w3", "model.layers.9.block_sparse_moe.experts.178.w3", "model.layers.9.block_sparse_moe.experts.179.w3", "model.layers.9.block_sparse_moe.experts.180.w3", "model.layers.9.block_sparse_moe.experts.181.w3", "model.layers.9.block_sparse_moe.experts.182.w3", "model.layers.9.block_sparse_moe.experts.183.w3", "model.layers.9.block_sparse_moe.experts.184.w3", "model.layers.9.block_sparse_moe.experts.185.w3", "model.layers.9.block_sparse_moe.experts.186.w3", "model.layers.9.block_sparse_moe.experts.187.w3", "model.layers.9.block_sparse_moe.experts.188.w3", "model.layers.9.block_sparse_moe.experts.189.w3", "model.layers.9.block_sparse_moe.experts.190.w3", "model.layers.9.block_sparse_moe.experts.191.w3", "model.layers.9.block_sparse_moe.experts.192.w3", "model.layers.9.block_sparse_moe.experts.193.w3", "model.layers.9.block_sparse_moe.experts.194.w3", "model.layers.9.block_sparse_moe.experts.195.w3", "model.layers.9.block_sparse_moe.experts.196.w3", "model.layers.9.block_sparse_moe.experts.197.w3", "model.layers.9.block_sparse_moe.experts.198.w3", "model.layers.9.block_sparse_moe.experts.199.w3", "model.layers.9.block_sparse_moe.experts.200.w3", "model.layers.9.block_sparse_moe.experts.201.w3", "model.layers.9.block_sparse_moe.experts.202.w3", "model.layers.9.block_sparse_moe.experts.203.w3", "model.layers.9.block_sparse_moe.experts.204.w3", "model.layers.9.block_sparse_moe.experts.205.w3", "model.layers.9.block_sparse_moe.experts.206.w3", "model.layers.9.block_sparse_moe.experts.207.w3", "model.layers.9.block_sparse_moe.experts.208.w3", "model.layers.9.block_sparse_moe.experts.209.w3", "model.layers.9.block_sparse_moe.experts.210.w3", "model.layers.9.block_sparse_moe.experts.211.w3", "model.layers.9.block_sparse_moe.experts.212.w3", "model.layers.9.block_sparse_moe.experts.213.w3", "model.layers.9.block_sparse_moe.experts.214.w3", "model.layers.9.block_sparse_moe.experts.215.w3", "model.layers.9.block_sparse_moe.experts.216.w3", "model.layers.9.block_sparse_moe.experts.217.w3", "model.layers.9.block_sparse_moe.experts.218.w3", "model.layers.9.block_sparse_moe.experts.219.w3", "model.layers.9.block_sparse_moe.experts.220.w3", "model.layers.9.block_sparse_moe.experts.221.w3", "model.layers.9.block_sparse_moe.experts.222.w3", "model.layers.9.block_sparse_moe.experts.223.w3", "model.layers.9.block_sparse_moe.experts.224.w3", "model.layers.9.block_sparse_moe.experts.225.w3", "model.layers.9.block_sparse_moe.experts.226.w3", "model.layers.9.block_sparse_moe.experts.227.w3", "model.layers.9.block_sparse_moe.experts.228.w3", "model.layers.9.block_sparse_moe.experts.229.w3", "model.layers.9.block_sparse_moe.experts.230.w3", "model.layers.9.block_sparse_moe.experts.231.w3", "model.layers.9.block_sparse_moe.experts.232.w3", "model.layers.9.block_sparse_moe.experts.233.w3", "model.layers.9.block_sparse_moe.experts.234.w3", "model.layers.9.block_sparse_moe.experts.235.w3", "model.layers.9.block_sparse_moe.experts.236.w3", "model.layers.9.block_sparse_moe.experts.237.w3", "model.layers.9.block_sparse_moe.experts.238.w3", "model.layers.9.block_sparse_moe.experts.239.w3", "model.layers.9.block_sparse_moe.experts.240.w3", "model.layers.9.block_sparse_moe.experts.241.w3", "model.layers.9.block_sparse_moe.experts.242.w3", "model.layers.9.block_sparse_moe.experts.243.w3", "model.layers.9.block_sparse_moe.experts.244.w3", "model.layers.9.block_sparse_moe.experts.245.w3", "model.layers.9.block_sparse_moe.experts.246.w3", "model.layers.9.block_sparse_moe.experts.247.w3", "model.layers.9.block_sparse_moe.experts.248.w3", "model.layers.9.block_sparse_moe.experts.249.w3", "model.layers.9.block_sparse_moe.experts.250.w3", "model.layers.9.block_sparse_moe.experts.251.w3", "model.layers.9.block_sparse_moe.experts.252.w3", "model.layers.9.block_sparse_moe.experts.253.w3", "model.layers.9.block_sparse_moe.experts.254.w3", "model.layers.9.block_sparse_moe.experts.255.w3", "model.layers.9.block_sparse_moe.experts.0.w2", "model.layers.9.block_sparse_moe.experts.1.w2", "model.layers.9.block_sparse_moe.experts.2.w2", "model.layers.9.block_sparse_moe.experts.3.w2", "model.layers.9.block_sparse_moe.experts.4.w2", "model.layers.9.block_sparse_moe.experts.5.w2", "model.layers.9.block_sparse_moe.experts.6.w2", "model.layers.9.block_sparse_moe.experts.7.w2", "model.layers.9.block_sparse_moe.experts.8.w2", "model.layers.9.block_sparse_moe.experts.9.w2", "model.layers.9.block_sparse_moe.experts.10.w2", "model.layers.9.block_sparse_moe.experts.11.w2", "model.layers.9.block_sparse_moe.experts.12.w2", "model.layers.9.block_sparse_moe.experts.13.w2", "model.layers.9.block_sparse_moe.experts.14.w2", "model.layers.9.block_sparse_moe.experts.15.w2", "model.layers.9.block_sparse_moe.experts.16.w2", "model.layers.9.block_sparse_moe.experts.17.w2", "model.layers.9.block_sparse_moe.experts.18.w2", "model.layers.9.block_sparse_moe.experts.19.w2", "model.layers.9.block_sparse_moe.experts.20.w2", "model.layers.9.block_sparse_moe.experts.21.w2", "model.layers.9.block_sparse_moe.experts.22.w2", "model.layers.9.block_sparse_moe.experts.23.w2", "model.layers.9.block_sparse_moe.experts.24.w2", "model.layers.9.block_sparse_moe.experts.25.w2", "model.layers.9.block_sparse_moe.experts.26.w2", "model.layers.9.block_sparse_moe.experts.27.w2", "model.layers.9.block_sparse_moe.experts.28.w2", "model.layers.9.block_sparse_moe.experts.29.w2", "model.layers.9.block_sparse_moe.experts.30.w2", "model.layers.9.block_sparse_moe.experts.31.w2", "model.layers.9.block_sparse_moe.experts.32.w2", "model.layers.9.block_sparse_moe.experts.33.w2", "model.layers.9.block_sparse_moe.experts.34.w2", "model.layers.9.block_sparse_moe.experts.35.w2", "model.layers.9.block_sparse_moe.experts.36.w2", "model.layers.9.block_sparse_moe.experts.37.w2", "model.layers.9.block_sparse_moe.experts.38.w2", "model.layers.9.block_sparse_moe.experts.39.w2", "model.layers.9.block_sparse_moe.experts.40.w2", "model.layers.9.block_sparse_moe.experts.41.w2", "model.layers.9.block_sparse_moe.experts.42.w2", "model.layers.9.block_sparse_moe.experts.43.w2", "model.layers.9.block_sparse_moe.experts.44.w2", "model.layers.9.block_sparse_moe.experts.45.w2", "model.layers.9.block_sparse_moe.experts.46.w2", "model.layers.9.block_sparse_moe.experts.47.w2", "model.layers.9.block_sparse_moe.experts.48.w2", "model.layers.9.block_sparse_moe.experts.49.w2", "model.layers.9.block_sparse_moe.experts.50.w2", "model.layers.9.block_sparse_moe.experts.51.w2", "model.layers.9.block_sparse_moe.experts.52.w2", "model.layers.9.block_sparse_moe.experts.53.w2", "model.layers.9.block_sparse_moe.experts.54.w2", "model.layers.9.block_sparse_moe.experts.55.w2", "model.layers.9.block_sparse_moe.experts.56.w2", "model.layers.9.block_sparse_moe.experts.57.w2", "model.layers.9.block_sparse_moe.experts.58.w2", "model.layers.9.block_sparse_moe.experts.59.w2", "model.layers.9.block_sparse_moe.experts.60.w2", "model.layers.9.block_sparse_moe.experts.61.w2", "model.layers.9.block_sparse_moe.experts.62.w2", "model.layers.9.block_sparse_moe.experts.63.w2", "model.layers.9.block_sparse_moe.experts.64.w2", "model.layers.9.block_sparse_moe.experts.65.w2", "model.layers.9.block_sparse_moe.experts.66.w2", "model.layers.9.block_sparse_moe.experts.67.w2", "model.layers.9.block_sparse_moe.experts.68.w2", "model.layers.9.block_sparse_moe.experts.69.w2", "model.layers.9.block_sparse_moe.experts.70.w2", "model.layers.9.block_sparse_moe.experts.71.w2", "model.layers.9.block_sparse_moe.experts.72.w2", "model.layers.9.block_sparse_moe.experts.73.w2", "model.layers.9.block_sparse_moe.experts.74.w2", "model.layers.9.block_sparse_moe.experts.75.w2", "model.layers.9.block_sparse_moe.experts.76.w2", "model.layers.9.block_sparse_moe.experts.77.w2", "model.layers.9.block_sparse_moe.experts.78.w2", "model.layers.9.block_sparse_moe.experts.79.w2", "model.layers.9.block_sparse_moe.experts.80.w2", "model.layers.9.block_sparse_moe.experts.81.w2", "model.layers.9.block_sparse_moe.experts.82.w2", "model.layers.9.block_sparse_moe.experts.83.w2", "model.layers.9.block_sparse_moe.experts.84.w2", "model.layers.9.block_sparse_moe.experts.85.w2", "model.layers.9.block_sparse_moe.experts.86.w2", "model.layers.9.block_sparse_moe.experts.87.w2", "model.layers.9.block_sparse_moe.experts.88.w2", "model.layers.9.block_sparse_moe.experts.89.w2", "model.layers.9.block_sparse_moe.experts.90.w2", "model.layers.9.block_sparse_moe.experts.91.w2", "model.layers.9.block_sparse_moe.experts.92.w2", "model.layers.9.block_sparse_moe.experts.93.w2", "model.layers.9.block_sparse_moe.experts.94.w2", "model.layers.9.block_sparse_moe.experts.95.w2", "model.layers.9.block_sparse_moe.experts.96.w2", "model.layers.9.block_sparse_moe.experts.97.w2", "model.layers.9.block_sparse_moe.experts.98.w2", "model.layers.9.block_sparse_moe.experts.99.w2", "model.layers.9.block_sparse_moe.experts.100.w2", "model.layers.9.block_sparse_moe.experts.101.w2", "model.layers.9.block_sparse_moe.experts.102.w2", "model.layers.9.block_sparse_moe.experts.103.w2", "model.layers.9.block_sparse_moe.experts.104.w2", "model.layers.9.block_sparse_moe.experts.105.w2", "model.layers.9.block_sparse_moe.experts.106.w2", "model.layers.9.block_sparse_moe.experts.107.w2", "model.layers.9.block_sparse_moe.experts.108.w2", "model.layers.9.block_sparse_moe.experts.109.w2", "model.layers.9.block_sparse_moe.experts.110.w2", "model.layers.9.block_sparse_moe.experts.111.w2", "model.layers.9.block_sparse_moe.experts.112.w2", "model.layers.9.block_sparse_moe.experts.113.w2", "model.layers.9.block_sparse_moe.experts.114.w2", "model.layers.9.block_sparse_moe.experts.115.w2", "model.layers.9.block_sparse_moe.experts.116.w2", "model.layers.9.block_sparse_moe.experts.117.w2", "model.layers.9.block_sparse_moe.experts.118.w2", "model.layers.9.block_sparse_moe.experts.119.w2", "model.layers.9.block_sparse_moe.experts.120.w2", "model.layers.9.block_sparse_moe.experts.121.w2", "model.layers.9.block_sparse_moe.experts.122.w2", "model.layers.9.block_sparse_moe.experts.123.w2", "model.layers.9.block_sparse_moe.experts.124.w2", "model.layers.9.block_sparse_moe.experts.125.w2", "model.layers.9.block_sparse_moe.experts.126.w2", "model.layers.9.block_sparse_moe.experts.127.w2", "model.layers.9.block_sparse_moe.experts.128.w2", "model.layers.9.block_sparse_moe.experts.129.w2", "model.layers.9.block_sparse_moe.experts.130.w2", "model.layers.9.block_sparse_moe.experts.131.w2", "model.layers.9.block_sparse_moe.experts.132.w2", "model.layers.9.block_sparse_moe.experts.133.w2", "model.layers.9.block_sparse_moe.experts.134.w2", "model.layers.9.block_sparse_moe.experts.135.w2", "model.layers.9.block_sparse_moe.experts.136.w2", "model.layers.9.block_sparse_moe.experts.137.w2", "model.layers.9.block_sparse_moe.experts.138.w2", "model.layers.9.block_sparse_moe.experts.139.w2", "model.layers.9.block_sparse_moe.experts.140.w2", "model.layers.9.block_sparse_moe.experts.141.w2", "model.layers.9.block_sparse_moe.experts.142.w2", "model.layers.9.block_sparse_moe.experts.143.w2", "model.layers.9.block_sparse_moe.experts.144.w2", "model.layers.9.block_sparse_moe.experts.145.w2", "model.layers.9.block_sparse_moe.experts.146.w2", "model.layers.9.block_sparse_moe.experts.147.w2", "model.layers.9.block_sparse_moe.experts.148.w2", "model.layers.9.block_sparse_moe.experts.149.w2", "model.layers.9.block_sparse_moe.experts.150.w2", "model.layers.9.block_sparse_moe.experts.151.w2", "model.layers.9.block_sparse_moe.experts.152.w2", "model.layers.9.block_sparse_moe.experts.153.w2", "model.layers.9.block_sparse_moe.experts.154.w2", "model.layers.9.block_sparse_moe.experts.155.w2", "model.layers.9.block_sparse_moe.experts.156.w2", "model.layers.9.block_sparse_moe.experts.157.w2", "model.layers.9.block_sparse_moe.experts.158.w2", "model.layers.9.block_sparse_moe.experts.159.w2", "model.layers.9.block_sparse_moe.experts.160.w2", "model.layers.9.block_sparse_moe.experts.161.w2", "model.layers.9.block_sparse_moe.experts.162.w2", "model.layers.9.block_sparse_moe.experts.163.w2", "model.layers.9.block_sparse_moe.experts.164.w2", "model.layers.9.block_sparse_moe.experts.165.w2", "model.layers.9.block_sparse_moe.experts.166.w2", "model.layers.9.block_sparse_moe.experts.167.w2", "model.layers.9.block_sparse_moe.experts.168.w2", "model.layers.9.block_sparse_moe.experts.169.w2", "model.layers.9.block_sparse_moe.experts.170.w2", "model.layers.9.block_sparse_moe.experts.171.w2", "model.layers.9.block_sparse_moe.experts.172.w2", "model.layers.9.block_sparse_moe.experts.173.w2", "model.layers.9.block_sparse_moe.experts.174.w2", "model.layers.9.block_sparse_moe.experts.175.w2", "model.layers.9.block_sparse_moe.experts.176.w2", "model.layers.9.block_sparse_moe.experts.177.w2", "model.layers.9.block_sparse_moe.experts.178.w2", "model.layers.9.block_sparse_moe.experts.179.w2", "model.layers.9.block_sparse_moe.experts.180.w2", "model.layers.9.block_sparse_moe.experts.181.w2", "model.layers.9.block_sparse_moe.experts.182.w2", "model.layers.9.block_sparse_moe.experts.183.w2", "model.layers.9.block_sparse_moe.experts.184.w2", "model.layers.9.block_sparse_moe.experts.185.w2", "model.layers.9.block_sparse_moe.experts.186.w2", "model.layers.9.block_sparse_moe.experts.187.w2", "model.layers.9.block_sparse_moe.experts.188.w2", "model.layers.9.block_sparse_moe.experts.189.w2", "model.layers.9.block_sparse_moe.experts.190.w2", "model.layers.9.block_sparse_moe.experts.191.w2", "model.layers.9.block_sparse_moe.experts.192.w2", "model.layers.9.block_sparse_moe.experts.193.w2", "model.layers.9.block_sparse_moe.experts.194.w2", "model.layers.9.block_sparse_moe.experts.195.w2", "model.layers.9.block_sparse_moe.experts.196.w2", "model.layers.9.block_sparse_moe.experts.197.w2", "model.layers.9.block_sparse_moe.experts.198.w2", "model.layers.9.block_sparse_moe.experts.199.w2", "model.layers.9.block_sparse_moe.experts.200.w2", "model.layers.9.block_sparse_moe.experts.201.w2", "model.layers.9.block_sparse_moe.experts.202.w2", "model.layers.9.block_sparse_moe.experts.203.w2", "model.layers.9.block_sparse_moe.experts.204.w2", "model.layers.9.block_sparse_moe.experts.205.w2", "model.layers.9.block_sparse_moe.experts.206.w2", "model.layers.9.block_sparse_moe.experts.207.w2", "model.layers.9.block_sparse_moe.experts.208.w2", "model.layers.9.block_sparse_moe.experts.209.w2", "model.layers.9.block_sparse_moe.experts.210.w2", "model.layers.9.block_sparse_moe.experts.211.w2", "model.layers.9.block_sparse_moe.experts.212.w2", "model.layers.9.block_sparse_moe.experts.213.w2", "model.layers.9.block_sparse_moe.experts.214.w2", "model.layers.9.block_sparse_moe.experts.215.w2", "model.layers.9.block_sparse_moe.experts.216.w2", "model.layers.9.block_sparse_moe.experts.217.w2", "model.layers.9.block_sparse_moe.experts.218.w2", "model.layers.9.block_sparse_moe.experts.219.w2", "model.layers.9.block_sparse_moe.experts.220.w2", "model.layers.9.block_sparse_moe.experts.221.w2", "model.layers.9.block_sparse_moe.experts.222.w2", "model.layers.9.block_sparse_moe.experts.223.w2", "model.layers.9.block_sparse_moe.experts.224.w2", "model.layers.9.block_sparse_moe.experts.225.w2", "model.layers.9.block_sparse_moe.experts.226.w2", "model.layers.9.block_sparse_moe.experts.227.w2", "model.layers.9.block_sparse_moe.experts.228.w2", "model.layers.9.block_sparse_moe.experts.229.w2", "model.layers.9.block_sparse_moe.experts.230.w2", "model.layers.9.block_sparse_moe.experts.231.w2", "model.layers.9.block_sparse_moe.experts.232.w2", "model.layers.9.block_sparse_moe.experts.233.w2", "model.layers.9.block_sparse_moe.experts.234.w2", "model.layers.9.block_sparse_moe.experts.235.w2", "model.layers.9.block_sparse_moe.experts.236.w2", "model.layers.9.block_sparse_moe.experts.237.w2", "model.layers.9.block_sparse_moe.experts.238.w2", "model.layers.9.block_sparse_moe.experts.239.w2", "model.layers.9.block_sparse_moe.experts.240.w2", "model.layers.9.block_sparse_moe.experts.241.w2", "model.layers.9.block_sparse_moe.experts.242.w2", "model.layers.9.block_sparse_moe.experts.243.w2", "model.layers.9.block_sparse_moe.experts.244.w2", "model.layers.9.block_sparse_moe.experts.245.w2", "model.layers.9.block_sparse_moe.experts.246.w2", "model.layers.9.block_sparse_moe.experts.247.w2", "model.layers.9.block_sparse_moe.experts.248.w2", "model.layers.9.block_sparse_moe.experts.249.w2", "model.layers.9.block_sparse_moe.experts.250.w2", "model.layers.9.block_sparse_moe.experts.251.w2", "model.layers.9.block_sparse_moe.experts.252.w2", "model.layers.9.block_sparse_moe.experts.253.w2", "model.layers.9.block_sparse_moe.experts.254.w2", "model.layers.9.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.001296216808259476, "dbits": 3623878656 } ] }, { "idx": 20, "layers": [ "model.layers.10.self_attn.q_proj", "model.layers.10.self_attn.k_proj", "model.layers.10.self_attn.v_proj", "model.layers.10.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008117556571960449, "dbits": 44040192 } ] }, { "idx": 21, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w1", "model.layers.10.block_sparse_moe.experts.1.w1", "model.layers.10.block_sparse_moe.experts.2.w1", "model.layers.10.block_sparse_moe.experts.3.w1", "model.layers.10.block_sparse_moe.experts.4.w1", "model.layers.10.block_sparse_moe.experts.5.w1", "model.layers.10.block_sparse_moe.experts.6.w1", "model.layers.10.block_sparse_moe.experts.7.w1", "model.layers.10.block_sparse_moe.experts.8.w1", "model.layers.10.block_sparse_moe.experts.9.w1", "model.layers.10.block_sparse_moe.experts.10.w1", "model.layers.10.block_sparse_moe.experts.11.w1", "model.layers.10.block_sparse_moe.experts.12.w1", "model.layers.10.block_sparse_moe.experts.13.w1", "model.layers.10.block_sparse_moe.experts.14.w1", "model.layers.10.block_sparse_moe.experts.15.w1", "model.layers.10.block_sparse_moe.experts.16.w1", "model.layers.10.block_sparse_moe.experts.17.w1", "model.layers.10.block_sparse_moe.experts.18.w1", "model.layers.10.block_sparse_moe.experts.19.w1", "model.layers.10.block_sparse_moe.experts.20.w1", "model.layers.10.block_sparse_moe.experts.21.w1", "model.layers.10.block_sparse_moe.experts.22.w1", "model.layers.10.block_sparse_moe.experts.23.w1", "model.layers.10.block_sparse_moe.experts.24.w1", "model.layers.10.block_sparse_moe.experts.25.w1", "model.layers.10.block_sparse_moe.experts.26.w1", "model.layers.10.block_sparse_moe.experts.27.w1", "model.layers.10.block_sparse_moe.experts.28.w1", "model.layers.10.block_sparse_moe.experts.29.w1", "model.layers.10.block_sparse_moe.experts.30.w1", "model.layers.10.block_sparse_moe.experts.31.w1", "model.layers.10.block_sparse_moe.experts.32.w1", "model.layers.10.block_sparse_moe.experts.33.w1", "model.layers.10.block_sparse_moe.experts.34.w1", "model.layers.10.block_sparse_moe.experts.35.w1", "model.layers.10.block_sparse_moe.experts.36.w1", "model.layers.10.block_sparse_moe.experts.37.w1", "model.layers.10.block_sparse_moe.experts.38.w1", "model.layers.10.block_sparse_moe.experts.39.w1", "model.layers.10.block_sparse_moe.experts.40.w1", "model.layers.10.block_sparse_moe.experts.41.w1", "model.layers.10.block_sparse_moe.experts.42.w1", "model.layers.10.block_sparse_moe.experts.43.w1", "model.layers.10.block_sparse_moe.experts.44.w1", "model.layers.10.block_sparse_moe.experts.45.w1", "model.layers.10.block_sparse_moe.experts.46.w1", "model.layers.10.block_sparse_moe.experts.47.w1", "model.layers.10.block_sparse_moe.experts.48.w1", "model.layers.10.block_sparse_moe.experts.49.w1", "model.layers.10.block_sparse_moe.experts.50.w1", "model.layers.10.block_sparse_moe.experts.51.w1", "model.layers.10.block_sparse_moe.experts.52.w1", "model.layers.10.block_sparse_moe.experts.53.w1", "model.layers.10.block_sparse_moe.experts.54.w1", "model.layers.10.block_sparse_moe.experts.55.w1", "model.layers.10.block_sparse_moe.experts.56.w1", "model.layers.10.block_sparse_moe.experts.57.w1", "model.layers.10.block_sparse_moe.experts.58.w1", "model.layers.10.block_sparse_moe.experts.59.w1", "model.layers.10.block_sparse_moe.experts.60.w1", "model.layers.10.block_sparse_moe.experts.61.w1", "model.layers.10.block_sparse_moe.experts.62.w1", "model.layers.10.block_sparse_moe.experts.63.w1", "model.layers.10.block_sparse_moe.experts.64.w1", "model.layers.10.block_sparse_moe.experts.65.w1", "model.layers.10.block_sparse_moe.experts.66.w1", "model.layers.10.block_sparse_moe.experts.67.w1", "model.layers.10.block_sparse_moe.experts.68.w1", "model.layers.10.block_sparse_moe.experts.69.w1", "model.layers.10.block_sparse_moe.experts.70.w1", "model.layers.10.block_sparse_moe.experts.71.w1", "model.layers.10.block_sparse_moe.experts.72.w1", "model.layers.10.block_sparse_moe.experts.73.w1", "model.layers.10.block_sparse_moe.experts.74.w1", "model.layers.10.block_sparse_moe.experts.75.w1", "model.layers.10.block_sparse_moe.experts.76.w1", "model.layers.10.block_sparse_moe.experts.77.w1", "model.layers.10.block_sparse_moe.experts.78.w1", "model.layers.10.block_sparse_moe.experts.79.w1", "model.layers.10.block_sparse_moe.experts.80.w1", "model.layers.10.block_sparse_moe.experts.81.w1", "model.layers.10.block_sparse_moe.experts.82.w1", "model.layers.10.block_sparse_moe.experts.83.w1", "model.layers.10.block_sparse_moe.experts.84.w1", "model.layers.10.block_sparse_moe.experts.85.w1", "model.layers.10.block_sparse_moe.experts.86.w1", "model.layers.10.block_sparse_moe.experts.87.w1", "model.layers.10.block_sparse_moe.experts.88.w1", "model.layers.10.block_sparse_moe.experts.89.w1", "model.layers.10.block_sparse_moe.experts.90.w1", "model.layers.10.block_sparse_moe.experts.91.w1", "model.layers.10.block_sparse_moe.experts.92.w1", "model.layers.10.block_sparse_moe.experts.93.w1", "model.layers.10.block_sparse_moe.experts.94.w1", "model.layers.10.block_sparse_moe.experts.95.w1", "model.layers.10.block_sparse_moe.experts.96.w1", "model.layers.10.block_sparse_moe.experts.97.w1", "model.layers.10.block_sparse_moe.experts.98.w1", "model.layers.10.block_sparse_moe.experts.99.w1", "model.layers.10.block_sparse_moe.experts.100.w1", "model.layers.10.block_sparse_moe.experts.101.w1", "model.layers.10.block_sparse_moe.experts.102.w1", "model.layers.10.block_sparse_moe.experts.103.w1", "model.layers.10.block_sparse_moe.experts.104.w1", "model.layers.10.block_sparse_moe.experts.105.w1", "model.layers.10.block_sparse_moe.experts.106.w1", "model.layers.10.block_sparse_moe.experts.107.w1", "model.layers.10.block_sparse_moe.experts.108.w1", "model.layers.10.block_sparse_moe.experts.109.w1", "model.layers.10.block_sparse_moe.experts.110.w1", "model.layers.10.block_sparse_moe.experts.111.w1", "model.layers.10.block_sparse_moe.experts.112.w1", "model.layers.10.block_sparse_moe.experts.113.w1", "model.layers.10.block_sparse_moe.experts.114.w1", "model.layers.10.block_sparse_moe.experts.115.w1", "model.layers.10.block_sparse_moe.experts.116.w1", "model.layers.10.block_sparse_moe.experts.117.w1", "model.layers.10.block_sparse_moe.experts.118.w1", "model.layers.10.block_sparse_moe.experts.119.w1", "model.layers.10.block_sparse_moe.experts.120.w1", "model.layers.10.block_sparse_moe.experts.121.w1", "model.layers.10.block_sparse_moe.experts.122.w1", "model.layers.10.block_sparse_moe.experts.123.w1", "model.layers.10.block_sparse_moe.experts.124.w1", "model.layers.10.block_sparse_moe.experts.125.w1", "model.layers.10.block_sparse_moe.experts.126.w1", "model.layers.10.block_sparse_moe.experts.127.w1", "model.layers.10.block_sparse_moe.experts.128.w1", "model.layers.10.block_sparse_moe.experts.129.w1", "model.layers.10.block_sparse_moe.experts.130.w1", "model.layers.10.block_sparse_moe.experts.131.w1", "model.layers.10.block_sparse_moe.experts.132.w1", "model.layers.10.block_sparse_moe.experts.133.w1", "model.layers.10.block_sparse_moe.experts.134.w1", "model.layers.10.block_sparse_moe.experts.135.w1", "model.layers.10.block_sparse_moe.experts.136.w1", "model.layers.10.block_sparse_moe.experts.137.w1", "model.layers.10.block_sparse_moe.experts.138.w1", "model.layers.10.block_sparse_moe.experts.139.w1", "model.layers.10.block_sparse_moe.experts.140.w1", "model.layers.10.block_sparse_moe.experts.141.w1", "model.layers.10.block_sparse_moe.experts.142.w1", "model.layers.10.block_sparse_moe.experts.143.w1", "model.layers.10.block_sparse_moe.experts.144.w1", "model.layers.10.block_sparse_moe.experts.145.w1", "model.layers.10.block_sparse_moe.experts.146.w1", "model.layers.10.block_sparse_moe.experts.147.w1", "model.layers.10.block_sparse_moe.experts.148.w1", "model.layers.10.block_sparse_moe.experts.149.w1", "model.layers.10.block_sparse_moe.experts.150.w1", "model.layers.10.block_sparse_moe.experts.151.w1", "model.layers.10.block_sparse_moe.experts.152.w1", "model.layers.10.block_sparse_moe.experts.153.w1", "model.layers.10.block_sparse_moe.experts.154.w1", "model.layers.10.block_sparse_moe.experts.155.w1", "model.layers.10.block_sparse_moe.experts.156.w1", "model.layers.10.block_sparse_moe.experts.157.w1", "model.layers.10.block_sparse_moe.experts.158.w1", "model.layers.10.block_sparse_moe.experts.159.w1", "model.layers.10.block_sparse_moe.experts.160.w1", "model.layers.10.block_sparse_moe.experts.161.w1", "model.layers.10.block_sparse_moe.experts.162.w1", "model.layers.10.block_sparse_moe.experts.163.w1", "model.layers.10.block_sparse_moe.experts.164.w1", "model.layers.10.block_sparse_moe.experts.165.w1", "model.layers.10.block_sparse_moe.experts.166.w1", "model.layers.10.block_sparse_moe.experts.167.w1", "model.layers.10.block_sparse_moe.experts.168.w1", "model.layers.10.block_sparse_moe.experts.169.w1", "model.layers.10.block_sparse_moe.experts.170.w1", "model.layers.10.block_sparse_moe.experts.171.w1", "model.layers.10.block_sparse_moe.experts.172.w1", "model.layers.10.block_sparse_moe.experts.173.w1", "model.layers.10.block_sparse_moe.experts.174.w1", "model.layers.10.block_sparse_moe.experts.175.w1", "model.layers.10.block_sparse_moe.experts.176.w1", "model.layers.10.block_sparse_moe.experts.177.w1", "model.layers.10.block_sparse_moe.experts.178.w1", "model.layers.10.block_sparse_moe.experts.179.w1", "model.layers.10.block_sparse_moe.experts.180.w1", "model.layers.10.block_sparse_moe.experts.181.w1", "model.layers.10.block_sparse_moe.experts.182.w1", "model.layers.10.block_sparse_moe.experts.183.w1", "model.layers.10.block_sparse_moe.experts.184.w1", "model.layers.10.block_sparse_moe.experts.185.w1", "model.layers.10.block_sparse_moe.experts.186.w1", "model.layers.10.block_sparse_moe.experts.187.w1", "model.layers.10.block_sparse_moe.experts.188.w1", "model.layers.10.block_sparse_moe.experts.189.w1", "model.layers.10.block_sparse_moe.experts.190.w1", "model.layers.10.block_sparse_moe.experts.191.w1", "model.layers.10.block_sparse_moe.experts.192.w1", "model.layers.10.block_sparse_moe.experts.193.w1", "model.layers.10.block_sparse_moe.experts.194.w1", "model.layers.10.block_sparse_moe.experts.195.w1", "model.layers.10.block_sparse_moe.experts.196.w1", "model.layers.10.block_sparse_moe.experts.197.w1", "model.layers.10.block_sparse_moe.experts.198.w1", "model.layers.10.block_sparse_moe.experts.199.w1", "model.layers.10.block_sparse_moe.experts.200.w1", "model.layers.10.block_sparse_moe.experts.201.w1", "model.layers.10.block_sparse_moe.experts.202.w1", "model.layers.10.block_sparse_moe.experts.203.w1", "model.layers.10.block_sparse_moe.experts.204.w1", "model.layers.10.block_sparse_moe.experts.205.w1", "model.layers.10.block_sparse_moe.experts.206.w1", "model.layers.10.block_sparse_moe.experts.207.w1", "model.layers.10.block_sparse_moe.experts.208.w1", "model.layers.10.block_sparse_moe.experts.209.w1", "model.layers.10.block_sparse_moe.experts.210.w1", "model.layers.10.block_sparse_moe.experts.211.w1", "model.layers.10.block_sparse_moe.experts.212.w1", "model.layers.10.block_sparse_moe.experts.213.w1", "model.layers.10.block_sparse_moe.experts.214.w1", "model.layers.10.block_sparse_moe.experts.215.w1", "model.layers.10.block_sparse_moe.experts.216.w1", "model.layers.10.block_sparse_moe.experts.217.w1", "model.layers.10.block_sparse_moe.experts.218.w1", "model.layers.10.block_sparse_moe.experts.219.w1", "model.layers.10.block_sparse_moe.experts.220.w1", "model.layers.10.block_sparse_moe.experts.221.w1", "model.layers.10.block_sparse_moe.experts.222.w1", "model.layers.10.block_sparse_moe.experts.223.w1", "model.layers.10.block_sparse_moe.experts.224.w1", "model.layers.10.block_sparse_moe.experts.225.w1", "model.layers.10.block_sparse_moe.experts.226.w1", "model.layers.10.block_sparse_moe.experts.227.w1", "model.layers.10.block_sparse_moe.experts.228.w1", "model.layers.10.block_sparse_moe.experts.229.w1", "model.layers.10.block_sparse_moe.experts.230.w1", "model.layers.10.block_sparse_moe.experts.231.w1", "model.layers.10.block_sparse_moe.experts.232.w1", "model.layers.10.block_sparse_moe.experts.233.w1", "model.layers.10.block_sparse_moe.experts.234.w1", "model.layers.10.block_sparse_moe.experts.235.w1", "model.layers.10.block_sparse_moe.experts.236.w1", "model.layers.10.block_sparse_moe.experts.237.w1", "model.layers.10.block_sparse_moe.experts.238.w1", "model.layers.10.block_sparse_moe.experts.239.w1", "model.layers.10.block_sparse_moe.experts.240.w1", "model.layers.10.block_sparse_moe.experts.241.w1", "model.layers.10.block_sparse_moe.experts.242.w1", "model.layers.10.block_sparse_moe.experts.243.w1", "model.layers.10.block_sparse_moe.experts.244.w1", "model.layers.10.block_sparse_moe.experts.245.w1", "model.layers.10.block_sparse_moe.experts.246.w1", "model.layers.10.block_sparse_moe.experts.247.w1", "model.layers.10.block_sparse_moe.experts.248.w1", "model.layers.10.block_sparse_moe.experts.249.w1", "model.layers.10.block_sparse_moe.experts.250.w1", "model.layers.10.block_sparse_moe.experts.251.w1", "model.layers.10.block_sparse_moe.experts.252.w1", "model.layers.10.block_sparse_moe.experts.253.w1", "model.layers.10.block_sparse_moe.experts.254.w1", "model.layers.10.block_sparse_moe.experts.255.w1", "model.layers.10.block_sparse_moe.experts.0.w3", "model.layers.10.block_sparse_moe.experts.1.w3", "model.layers.10.block_sparse_moe.experts.2.w3", "model.layers.10.block_sparse_moe.experts.3.w3", "model.layers.10.block_sparse_moe.experts.4.w3", "model.layers.10.block_sparse_moe.experts.5.w3", "model.layers.10.block_sparse_moe.experts.6.w3", "model.layers.10.block_sparse_moe.experts.7.w3", "model.layers.10.block_sparse_moe.experts.8.w3", "model.layers.10.block_sparse_moe.experts.9.w3", "model.layers.10.block_sparse_moe.experts.10.w3", "model.layers.10.block_sparse_moe.experts.11.w3", "model.layers.10.block_sparse_moe.experts.12.w3", "model.layers.10.block_sparse_moe.experts.13.w3", "model.layers.10.block_sparse_moe.experts.14.w3", "model.layers.10.block_sparse_moe.experts.15.w3", "model.layers.10.block_sparse_moe.experts.16.w3", "model.layers.10.block_sparse_moe.experts.17.w3", "model.layers.10.block_sparse_moe.experts.18.w3", "model.layers.10.block_sparse_moe.experts.19.w3", "model.layers.10.block_sparse_moe.experts.20.w3", "model.layers.10.block_sparse_moe.experts.21.w3", "model.layers.10.block_sparse_moe.experts.22.w3", "model.layers.10.block_sparse_moe.experts.23.w3", "model.layers.10.block_sparse_moe.experts.24.w3", "model.layers.10.block_sparse_moe.experts.25.w3", "model.layers.10.block_sparse_moe.experts.26.w3", "model.layers.10.block_sparse_moe.experts.27.w3", "model.layers.10.block_sparse_moe.experts.28.w3", "model.layers.10.block_sparse_moe.experts.29.w3", "model.layers.10.block_sparse_moe.experts.30.w3", "model.layers.10.block_sparse_moe.experts.31.w3", "model.layers.10.block_sparse_moe.experts.32.w3", "model.layers.10.block_sparse_moe.experts.33.w3", "model.layers.10.block_sparse_moe.experts.34.w3", "model.layers.10.block_sparse_moe.experts.35.w3", "model.layers.10.block_sparse_moe.experts.36.w3", "model.layers.10.block_sparse_moe.experts.37.w3", "model.layers.10.block_sparse_moe.experts.38.w3", "model.layers.10.block_sparse_moe.experts.39.w3", "model.layers.10.block_sparse_moe.experts.40.w3", "model.layers.10.block_sparse_moe.experts.41.w3", "model.layers.10.block_sparse_moe.experts.42.w3", "model.layers.10.block_sparse_moe.experts.43.w3", "model.layers.10.block_sparse_moe.experts.44.w3", "model.layers.10.block_sparse_moe.experts.45.w3", "model.layers.10.block_sparse_moe.experts.46.w3", "model.layers.10.block_sparse_moe.experts.47.w3", "model.layers.10.block_sparse_moe.experts.48.w3", "model.layers.10.block_sparse_moe.experts.49.w3", "model.layers.10.block_sparse_moe.experts.50.w3", "model.layers.10.block_sparse_moe.experts.51.w3", "model.layers.10.block_sparse_moe.experts.52.w3", "model.layers.10.block_sparse_moe.experts.53.w3", "model.layers.10.block_sparse_moe.experts.54.w3", "model.layers.10.block_sparse_moe.experts.55.w3", "model.layers.10.block_sparse_moe.experts.56.w3", "model.layers.10.block_sparse_moe.experts.57.w3", "model.layers.10.block_sparse_moe.experts.58.w3", "model.layers.10.block_sparse_moe.experts.59.w3", "model.layers.10.block_sparse_moe.experts.60.w3", "model.layers.10.block_sparse_moe.experts.61.w3", "model.layers.10.block_sparse_moe.experts.62.w3", "model.layers.10.block_sparse_moe.experts.63.w3", "model.layers.10.block_sparse_moe.experts.64.w3", "model.layers.10.block_sparse_moe.experts.65.w3", "model.layers.10.block_sparse_moe.experts.66.w3", "model.layers.10.block_sparse_moe.experts.67.w3", "model.layers.10.block_sparse_moe.experts.68.w3", "model.layers.10.block_sparse_moe.experts.69.w3", "model.layers.10.block_sparse_moe.experts.70.w3", "model.layers.10.block_sparse_moe.experts.71.w3", "model.layers.10.block_sparse_moe.experts.72.w3", "model.layers.10.block_sparse_moe.experts.73.w3", "model.layers.10.block_sparse_moe.experts.74.w3", "model.layers.10.block_sparse_moe.experts.75.w3", "model.layers.10.block_sparse_moe.experts.76.w3", "model.layers.10.block_sparse_moe.experts.77.w3", "model.layers.10.block_sparse_moe.experts.78.w3", "model.layers.10.block_sparse_moe.experts.79.w3", "model.layers.10.block_sparse_moe.experts.80.w3", "model.layers.10.block_sparse_moe.experts.81.w3", "model.layers.10.block_sparse_moe.experts.82.w3", "model.layers.10.block_sparse_moe.experts.83.w3", "model.layers.10.block_sparse_moe.experts.84.w3", "model.layers.10.block_sparse_moe.experts.85.w3", "model.layers.10.block_sparse_moe.experts.86.w3", "model.layers.10.block_sparse_moe.experts.87.w3", "model.layers.10.block_sparse_moe.experts.88.w3", "model.layers.10.block_sparse_moe.experts.89.w3", "model.layers.10.block_sparse_moe.experts.90.w3", "model.layers.10.block_sparse_moe.experts.91.w3", "model.layers.10.block_sparse_moe.experts.92.w3", "model.layers.10.block_sparse_moe.experts.93.w3", "model.layers.10.block_sparse_moe.experts.94.w3", "model.layers.10.block_sparse_moe.experts.95.w3", "model.layers.10.block_sparse_moe.experts.96.w3", "model.layers.10.block_sparse_moe.experts.97.w3", "model.layers.10.block_sparse_moe.experts.98.w3", "model.layers.10.block_sparse_moe.experts.99.w3", "model.layers.10.block_sparse_moe.experts.100.w3", "model.layers.10.block_sparse_moe.experts.101.w3", "model.layers.10.block_sparse_moe.experts.102.w3", "model.layers.10.block_sparse_moe.experts.103.w3", "model.layers.10.block_sparse_moe.experts.104.w3", "model.layers.10.block_sparse_moe.experts.105.w3", "model.layers.10.block_sparse_moe.experts.106.w3", "model.layers.10.block_sparse_moe.experts.107.w3", "model.layers.10.block_sparse_moe.experts.108.w3", "model.layers.10.block_sparse_moe.experts.109.w3", "model.layers.10.block_sparse_moe.experts.110.w3", "model.layers.10.block_sparse_moe.experts.111.w3", "model.layers.10.block_sparse_moe.experts.112.w3", "model.layers.10.block_sparse_moe.experts.113.w3", "model.layers.10.block_sparse_moe.experts.114.w3", "model.layers.10.block_sparse_moe.experts.115.w3", "model.layers.10.block_sparse_moe.experts.116.w3", "model.layers.10.block_sparse_moe.experts.117.w3", "model.layers.10.block_sparse_moe.experts.118.w3", "model.layers.10.block_sparse_moe.experts.119.w3", "model.layers.10.block_sparse_moe.experts.120.w3", "model.layers.10.block_sparse_moe.experts.121.w3", "model.layers.10.block_sparse_moe.experts.122.w3", "model.layers.10.block_sparse_moe.experts.123.w3", "model.layers.10.block_sparse_moe.experts.124.w3", "model.layers.10.block_sparse_moe.experts.125.w3", "model.layers.10.block_sparse_moe.experts.126.w3", "model.layers.10.block_sparse_moe.experts.127.w3", "model.layers.10.block_sparse_moe.experts.128.w3", "model.layers.10.block_sparse_moe.experts.129.w3", "model.layers.10.block_sparse_moe.experts.130.w3", "model.layers.10.block_sparse_moe.experts.131.w3", "model.layers.10.block_sparse_moe.experts.132.w3", "model.layers.10.block_sparse_moe.experts.133.w3", "model.layers.10.block_sparse_moe.experts.134.w3", "model.layers.10.block_sparse_moe.experts.135.w3", "model.layers.10.block_sparse_moe.experts.136.w3", "model.layers.10.block_sparse_moe.experts.137.w3", "model.layers.10.block_sparse_moe.experts.138.w3", "model.layers.10.block_sparse_moe.experts.139.w3", "model.layers.10.block_sparse_moe.experts.140.w3", "model.layers.10.block_sparse_moe.experts.141.w3", "model.layers.10.block_sparse_moe.experts.142.w3", "model.layers.10.block_sparse_moe.experts.143.w3", "model.layers.10.block_sparse_moe.experts.144.w3", "model.layers.10.block_sparse_moe.experts.145.w3", "model.layers.10.block_sparse_moe.experts.146.w3", "model.layers.10.block_sparse_moe.experts.147.w3", "model.layers.10.block_sparse_moe.experts.148.w3", "model.layers.10.block_sparse_moe.experts.149.w3", "model.layers.10.block_sparse_moe.experts.150.w3", "model.layers.10.block_sparse_moe.experts.151.w3", "model.layers.10.block_sparse_moe.experts.152.w3", "model.layers.10.block_sparse_moe.experts.153.w3", "model.layers.10.block_sparse_moe.experts.154.w3", "model.layers.10.block_sparse_moe.experts.155.w3", "model.layers.10.block_sparse_moe.experts.156.w3", "model.layers.10.block_sparse_moe.experts.157.w3", "model.layers.10.block_sparse_moe.experts.158.w3", "model.layers.10.block_sparse_moe.experts.159.w3", "model.layers.10.block_sparse_moe.experts.160.w3", "model.layers.10.block_sparse_moe.experts.161.w3", "model.layers.10.block_sparse_moe.experts.162.w3", "model.layers.10.block_sparse_moe.experts.163.w3", "model.layers.10.block_sparse_moe.experts.164.w3", "model.layers.10.block_sparse_moe.experts.165.w3", "model.layers.10.block_sparse_moe.experts.166.w3", "model.layers.10.block_sparse_moe.experts.167.w3", "model.layers.10.block_sparse_moe.experts.168.w3", "model.layers.10.block_sparse_moe.experts.169.w3", "model.layers.10.block_sparse_moe.experts.170.w3", "model.layers.10.block_sparse_moe.experts.171.w3", "model.layers.10.block_sparse_moe.experts.172.w3", "model.layers.10.block_sparse_moe.experts.173.w3", "model.layers.10.block_sparse_moe.experts.174.w3", "model.layers.10.block_sparse_moe.experts.175.w3", "model.layers.10.block_sparse_moe.experts.176.w3", "model.layers.10.block_sparse_moe.experts.177.w3", "model.layers.10.block_sparse_moe.experts.178.w3", "model.layers.10.block_sparse_moe.experts.179.w3", "model.layers.10.block_sparse_moe.experts.180.w3", "model.layers.10.block_sparse_moe.experts.181.w3", "model.layers.10.block_sparse_moe.experts.182.w3", "model.layers.10.block_sparse_moe.experts.183.w3", "model.layers.10.block_sparse_moe.experts.184.w3", "model.layers.10.block_sparse_moe.experts.185.w3", "model.layers.10.block_sparse_moe.experts.186.w3", "model.layers.10.block_sparse_moe.experts.187.w3", "model.layers.10.block_sparse_moe.experts.188.w3", "model.layers.10.block_sparse_moe.experts.189.w3", "model.layers.10.block_sparse_moe.experts.190.w3", "model.layers.10.block_sparse_moe.experts.191.w3", "model.layers.10.block_sparse_moe.experts.192.w3", "model.layers.10.block_sparse_moe.experts.193.w3", "model.layers.10.block_sparse_moe.experts.194.w3", "model.layers.10.block_sparse_moe.experts.195.w3", "model.layers.10.block_sparse_moe.experts.196.w3", "model.layers.10.block_sparse_moe.experts.197.w3", "model.layers.10.block_sparse_moe.experts.198.w3", "model.layers.10.block_sparse_moe.experts.199.w3", "model.layers.10.block_sparse_moe.experts.200.w3", "model.layers.10.block_sparse_moe.experts.201.w3", "model.layers.10.block_sparse_moe.experts.202.w3", "model.layers.10.block_sparse_moe.experts.203.w3", "model.layers.10.block_sparse_moe.experts.204.w3", "model.layers.10.block_sparse_moe.experts.205.w3", "model.layers.10.block_sparse_moe.experts.206.w3", "model.layers.10.block_sparse_moe.experts.207.w3", "model.layers.10.block_sparse_moe.experts.208.w3", "model.layers.10.block_sparse_moe.experts.209.w3", "model.layers.10.block_sparse_moe.experts.210.w3", "model.layers.10.block_sparse_moe.experts.211.w3", "model.layers.10.block_sparse_moe.experts.212.w3", "model.layers.10.block_sparse_moe.experts.213.w3", "model.layers.10.block_sparse_moe.experts.214.w3", "model.layers.10.block_sparse_moe.experts.215.w3", "model.layers.10.block_sparse_moe.experts.216.w3", "model.layers.10.block_sparse_moe.experts.217.w3", "model.layers.10.block_sparse_moe.experts.218.w3", "model.layers.10.block_sparse_moe.experts.219.w3", "model.layers.10.block_sparse_moe.experts.220.w3", "model.layers.10.block_sparse_moe.experts.221.w3", "model.layers.10.block_sparse_moe.experts.222.w3", "model.layers.10.block_sparse_moe.experts.223.w3", "model.layers.10.block_sparse_moe.experts.224.w3", "model.layers.10.block_sparse_moe.experts.225.w3", "model.layers.10.block_sparse_moe.experts.226.w3", "model.layers.10.block_sparse_moe.experts.227.w3", "model.layers.10.block_sparse_moe.experts.228.w3", "model.layers.10.block_sparse_moe.experts.229.w3", "model.layers.10.block_sparse_moe.experts.230.w3", "model.layers.10.block_sparse_moe.experts.231.w3", "model.layers.10.block_sparse_moe.experts.232.w3", "model.layers.10.block_sparse_moe.experts.233.w3", "model.layers.10.block_sparse_moe.experts.234.w3", "model.layers.10.block_sparse_moe.experts.235.w3", "model.layers.10.block_sparse_moe.experts.236.w3", "model.layers.10.block_sparse_moe.experts.237.w3", "model.layers.10.block_sparse_moe.experts.238.w3", "model.layers.10.block_sparse_moe.experts.239.w3", "model.layers.10.block_sparse_moe.experts.240.w3", "model.layers.10.block_sparse_moe.experts.241.w3", "model.layers.10.block_sparse_moe.experts.242.w3", "model.layers.10.block_sparse_moe.experts.243.w3", "model.layers.10.block_sparse_moe.experts.244.w3", "model.layers.10.block_sparse_moe.experts.245.w3", "model.layers.10.block_sparse_moe.experts.246.w3", "model.layers.10.block_sparse_moe.experts.247.w3", "model.layers.10.block_sparse_moe.experts.248.w3", "model.layers.10.block_sparse_moe.experts.249.w3", "model.layers.10.block_sparse_moe.experts.250.w3", "model.layers.10.block_sparse_moe.experts.251.w3", "model.layers.10.block_sparse_moe.experts.252.w3", "model.layers.10.block_sparse_moe.experts.253.w3", "model.layers.10.block_sparse_moe.experts.254.w3", "model.layers.10.block_sparse_moe.experts.255.w3", "model.layers.10.block_sparse_moe.experts.0.w2", "model.layers.10.block_sparse_moe.experts.1.w2", "model.layers.10.block_sparse_moe.experts.2.w2", "model.layers.10.block_sparse_moe.experts.3.w2", "model.layers.10.block_sparse_moe.experts.4.w2", "model.layers.10.block_sparse_moe.experts.5.w2", "model.layers.10.block_sparse_moe.experts.6.w2", "model.layers.10.block_sparse_moe.experts.7.w2", "model.layers.10.block_sparse_moe.experts.8.w2", "model.layers.10.block_sparse_moe.experts.9.w2", "model.layers.10.block_sparse_moe.experts.10.w2", "model.layers.10.block_sparse_moe.experts.11.w2", "model.layers.10.block_sparse_moe.experts.12.w2", "model.layers.10.block_sparse_moe.experts.13.w2", "model.layers.10.block_sparse_moe.experts.14.w2", "model.layers.10.block_sparse_moe.experts.15.w2", "model.layers.10.block_sparse_moe.experts.16.w2", "model.layers.10.block_sparse_moe.experts.17.w2", "model.layers.10.block_sparse_moe.experts.18.w2", "model.layers.10.block_sparse_moe.experts.19.w2", "model.layers.10.block_sparse_moe.experts.20.w2", "model.layers.10.block_sparse_moe.experts.21.w2", "model.layers.10.block_sparse_moe.experts.22.w2", "model.layers.10.block_sparse_moe.experts.23.w2", "model.layers.10.block_sparse_moe.experts.24.w2", "model.layers.10.block_sparse_moe.experts.25.w2", "model.layers.10.block_sparse_moe.experts.26.w2", "model.layers.10.block_sparse_moe.experts.27.w2", "model.layers.10.block_sparse_moe.experts.28.w2", "model.layers.10.block_sparse_moe.experts.29.w2", "model.layers.10.block_sparse_moe.experts.30.w2", "model.layers.10.block_sparse_moe.experts.31.w2", "model.layers.10.block_sparse_moe.experts.32.w2", "model.layers.10.block_sparse_moe.experts.33.w2", "model.layers.10.block_sparse_moe.experts.34.w2", "model.layers.10.block_sparse_moe.experts.35.w2", "model.layers.10.block_sparse_moe.experts.36.w2", "model.layers.10.block_sparse_moe.experts.37.w2", "model.layers.10.block_sparse_moe.experts.38.w2", "model.layers.10.block_sparse_moe.experts.39.w2", "model.layers.10.block_sparse_moe.experts.40.w2", "model.layers.10.block_sparse_moe.experts.41.w2", "model.layers.10.block_sparse_moe.experts.42.w2", "model.layers.10.block_sparse_moe.experts.43.w2", "model.layers.10.block_sparse_moe.experts.44.w2", "model.layers.10.block_sparse_moe.experts.45.w2", "model.layers.10.block_sparse_moe.experts.46.w2", "model.layers.10.block_sparse_moe.experts.47.w2", "model.layers.10.block_sparse_moe.experts.48.w2", "model.layers.10.block_sparse_moe.experts.49.w2", "model.layers.10.block_sparse_moe.experts.50.w2", "model.layers.10.block_sparse_moe.experts.51.w2", "model.layers.10.block_sparse_moe.experts.52.w2", "model.layers.10.block_sparse_moe.experts.53.w2", "model.layers.10.block_sparse_moe.experts.54.w2", "model.layers.10.block_sparse_moe.experts.55.w2", "model.layers.10.block_sparse_moe.experts.56.w2", "model.layers.10.block_sparse_moe.experts.57.w2", "model.layers.10.block_sparse_moe.experts.58.w2", "model.layers.10.block_sparse_moe.experts.59.w2", "model.layers.10.block_sparse_moe.experts.60.w2", "model.layers.10.block_sparse_moe.experts.61.w2", "model.layers.10.block_sparse_moe.experts.62.w2", "model.layers.10.block_sparse_moe.experts.63.w2", "model.layers.10.block_sparse_moe.experts.64.w2", "model.layers.10.block_sparse_moe.experts.65.w2", "model.layers.10.block_sparse_moe.experts.66.w2", "model.layers.10.block_sparse_moe.experts.67.w2", "model.layers.10.block_sparse_moe.experts.68.w2", "model.layers.10.block_sparse_moe.experts.69.w2", "model.layers.10.block_sparse_moe.experts.70.w2", "model.layers.10.block_sparse_moe.experts.71.w2", "model.layers.10.block_sparse_moe.experts.72.w2", "model.layers.10.block_sparse_moe.experts.73.w2", "model.layers.10.block_sparse_moe.experts.74.w2", "model.layers.10.block_sparse_moe.experts.75.w2", "model.layers.10.block_sparse_moe.experts.76.w2", "model.layers.10.block_sparse_moe.experts.77.w2", "model.layers.10.block_sparse_moe.experts.78.w2", "model.layers.10.block_sparse_moe.experts.79.w2", "model.layers.10.block_sparse_moe.experts.80.w2", "model.layers.10.block_sparse_moe.experts.81.w2", "model.layers.10.block_sparse_moe.experts.82.w2", "model.layers.10.block_sparse_moe.experts.83.w2", "model.layers.10.block_sparse_moe.experts.84.w2", "model.layers.10.block_sparse_moe.experts.85.w2", "model.layers.10.block_sparse_moe.experts.86.w2", "model.layers.10.block_sparse_moe.experts.87.w2", "model.layers.10.block_sparse_moe.experts.88.w2", "model.layers.10.block_sparse_moe.experts.89.w2", "model.layers.10.block_sparse_moe.experts.90.w2", "model.layers.10.block_sparse_moe.experts.91.w2", "model.layers.10.block_sparse_moe.experts.92.w2", "model.layers.10.block_sparse_moe.experts.93.w2", "model.layers.10.block_sparse_moe.experts.94.w2", "model.layers.10.block_sparse_moe.experts.95.w2", "model.layers.10.block_sparse_moe.experts.96.w2", "model.layers.10.block_sparse_moe.experts.97.w2", "model.layers.10.block_sparse_moe.experts.98.w2", "model.layers.10.block_sparse_moe.experts.99.w2", "model.layers.10.block_sparse_moe.experts.100.w2", "model.layers.10.block_sparse_moe.experts.101.w2", "model.layers.10.block_sparse_moe.experts.102.w2", "model.layers.10.block_sparse_moe.experts.103.w2", "model.layers.10.block_sparse_moe.experts.104.w2", "model.layers.10.block_sparse_moe.experts.105.w2", "model.layers.10.block_sparse_moe.experts.106.w2", "model.layers.10.block_sparse_moe.experts.107.w2", "model.layers.10.block_sparse_moe.experts.108.w2", "model.layers.10.block_sparse_moe.experts.109.w2", "model.layers.10.block_sparse_moe.experts.110.w2", "model.layers.10.block_sparse_moe.experts.111.w2", "model.layers.10.block_sparse_moe.experts.112.w2", "model.layers.10.block_sparse_moe.experts.113.w2", "model.layers.10.block_sparse_moe.experts.114.w2", "model.layers.10.block_sparse_moe.experts.115.w2", "model.layers.10.block_sparse_moe.experts.116.w2", "model.layers.10.block_sparse_moe.experts.117.w2", "model.layers.10.block_sparse_moe.experts.118.w2", "model.layers.10.block_sparse_moe.experts.119.w2", "model.layers.10.block_sparse_moe.experts.120.w2", "model.layers.10.block_sparse_moe.experts.121.w2", "model.layers.10.block_sparse_moe.experts.122.w2", "model.layers.10.block_sparse_moe.experts.123.w2", "model.layers.10.block_sparse_moe.experts.124.w2", "model.layers.10.block_sparse_moe.experts.125.w2", "model.layers.10.block_sparse_moe.experts.126.w2", "model.layers.10.block_sparse_moe.experts.127.w2", "model.layers.10.block_sparse_moe.experts.128.w2", "model.layers.10.block_sparse_moe.experts.129.w2", "model.layers.10.block_sparse_moe.experts.130.w2", "model.layers.10.block_sparse_moe.experts.131.w2", "model.layers.10.block_sparse_moe.experts.132.w2", "model.layers.10.block_sparse_moe.experts.133.w2", "model.layers.10.block_sparse_moe.experts.134.w2", "model.layers.10.block_sparse_moe.experts.135.w2", "model.layers.10.block_sparse_moe.experts.136.w2", "model.layers.10.block_sparse_moe.experts.137.w2", "model.layers.10.block_sparse_moe.experts.138.w2", "model.layers.10.block_sparse_moe.experts.139.w2", "model.layers.10.block_sparse_moe.experts.140.w2", "model.layers.10.block_sparse_moe.experts.141.w2", "model.layers.10.block_sparse_moe.experts.142.w2", "model.layers.10.block_sparse_moe.experts.143.w2", "model.layers.10.block_sparse_moe.experts.144.w2", "model.layers.10.block_sparse_moe.experts.145.w2", "model.layers.10.block_sparse_moe.experts.146.w2", "model.layers.10.block_sparse_moe.experts.147.w2", "model.layers.10.block_sparse_moe.experts.148.w2", "model.layers.10.block_sparse_moe.experts.149.w2", "model.layers.10.block_sparse_moe.experts.150.w2", "model.layers.10.block_sparse_moe.experts.151.w2", "model.layers.10.block_sparse_moe.experts.152.w2", "model.layers.10.block_sparse_moe.experts.153.w2", "model.layers.10.block_sparse_moe.experts.154.w2", "model.layers.10.block_sparse_moe.experts.155.w2", "model.layers.10.block_sparse_moe.experts.156.w2", "model.layers.10.block_sparse_moe.experts.157.w2", "model.layers.10.block_sparse_moe.experts.158.w2", "model.layers.10.block_sparse_moe.experts.159.w2", "model.layers.10.block_sparse_moe.experts.160.w2", "model.layers.10.block_sparse_moe.experts.161.w2", "model.layers.10.block_sparse_moe.experts.162.w2", "model.layers.10.block_sparse_moe.experts.163.w2", "model.layers.10.block_sparse_moe.experts.164.w2", "model.layers.10.block_sparse_moe.experts.165.w2", "model.layers.10.block_sparse_moe.experts.166.w2", "model.layers.10.block_sparse_moe.experts.167.w2", "model.layers.10.block_sparse_moe.experts.168.w2", "model.layers.10.block_sparse_moe.experts.169.w2", "model.layers.10.block_sparse_moe.experts.170.w2", "model.layers.10.block_sparse_moe.experts.171.w2", "model.layers.10.block_sparse_moe.experts.172.w2", "model.layers.10.block_sparse_moe.experts.173.w2", "model.layers.10.block_sparse_moe.experts.174.w2", "model.layers.10.block_sparse_moe.experts.175.w2", "model.layers.10.block_sparse_moe.experts.176.w2", "model.layers.10.block_sparse_moe.experts.177.w2", "model.layers.10.block_sparse_moe.experts.178.w2", "model.layers.10.block_sparse_moe.experts.179.w2", "model.layers.10.block_sparse_moe.experts.180.w2", "model.layers.10.block_sparse_moe.experts.181.w2", "model.layers.10.block_sparse_moe.experts.182.w2", "model.layers.10.block_sparse_moe.experts.183.w2", "model.layers.10.block_sparse_moe.experts.184.w2", "model.layers.10.block_sparse_moe.experts.185.w2", "model.layers.10.block_sparse_moe.experts.186.w2", "model.layers.10.block_sparse_moe.experts.187.w2", "model.layers.10.block_sparse_moe.experts.188.w2", "model.layers.10.block_sparse_moe.experts.189.w2", "model.layers.10.block_sparse_moe.experts.190.w2", "model.layers.10.block_sparse_moe.experts.191.w2", "model.layers.10.block_sparse_moe.experts.192.w2", "model.layers.10.block_sparse_moe.experts.193.w2", "model.layers.10.block_sparse_moe.experts.194.w2", "model.layers.10.block_sparse_moe.experts.195.w2", "model.layers.10.block_sparse_moe.experts.196.w2", "model.layers.10.block_sparse_moe.experts.197.w2", "model.layers.10.block_sparse_moe.experts.198.w2", "model.layers.10.block_sparse_moe.experts.199.w2", "model.layers.10.block_sparse_moe.experts.200.w2", "model.layers.10.block_sparse_moe.experts.201.w2", "model.layers.10.block_sparse_moe.experts.202.w2", "model.layers.10.block_sparse_moe.experts.203.w2", "model.layers.10.block_sparse_moe.experts.204.w2", "model.layers.10.block_sparse_moe.experts.205.w2", "model.layers.10.block_sparse_moe.experts.206.w2", "model.layers.10.block_sparse_moe.experts.207.w2", "model.layers.10.block_sparse_moe.experts.208.w2", "model.layers.10.block_sparse_moe.experts.209.w2", "model.layers.10.block_sparse_moe.experts.210.w2", "model.layers.10.block_sparse_moe.experts.211.w2", "model.layers.10.block_sparse_moe.experts.212.w2", "model.layers.10.block_sparse_moe.experts.213.w2", "model.layers.10.block_sparse_moe.experts.214.w2", "model.layers.10.block_sparse_moe.experts.215.w2", "model.layers.10.block_sparse_moe.experts.216.w2", "model.layers.10.block_sparse_moe.experts.217.w2", "model.layers.10.block_sparse_moe.experts.218.w2", "model.layers.10.block_sparse_moe.experts.219.w2", "model.layers.10.block_sparse_moe.experts.220.w2", "model.layers.10.block_sparse_moe.experts.221.w2", "model.layers.10.block_sparse_moe.experts.222.w2", "model.layers.10.block_sparse_moe.experts.223.w2", "model.layers.10.block_sparse_moe.experts.224.w2", "model.layers.10.block_sparse_moe.experts.225.w2", "model.layers.10.block_sparse_moe.experts.226.w2", "model.layers.10.block_sparse_moe.experts.227.w2", "model.layers.10.block_sparse_moe.experts.228.w2", "model.layers.10.block_sparse_moe.experts.229.w2", "model.layers.10.block_sparse_moe.experts.230.w2", "model.layers.10.block_sparse_moe.experts.231.w2", "model.layers.10.block_sparse_moe.experts.232.w2", "model.layers.10.block_sparse_moe.experts.233.w2", "model.layers.10.block_sparse_moe.experts.234.w2", "model.layers.10.block_sparse_moe.experts.235.w2", "model.layers.10.block_sparse_moe.experts.236.w2", "model.layers.10.block_sparse_moe.experts.237.w2", "model.layers.10.block_sparse_moe.experts.238.w2", "model.layers.10.block_sparse_moe.experts.239.w2", "model.layers.10.block_sparse_moe.experts.240.w2", "model.layers.10.block_sparse_moe.experts.241.w2", "model.layers.10.block_sparse_moe.experts.242.w2", "model.layers.10.block_sparse_moe.experts.243.w2", "model.layers.10.block_sparse_moe.experts.244.w2", "model.layers.10.block_sparse_moe.experts.245.w2", "model.layers.10.block_sparse_moe.experts.246.w2", "model.layers.10.block_sparse_moe.experts.247.w2", "model.layers.10.block_sparse_moe.experts.248.w2", "model.layers.10.block_sparse_moe.experts.249.w2", "model.layers.10.block_sparse_moe.experts.250.w2", "model.layers.10.block_sparse_moe.experts.251.w2", "model.layers.10.block_sparse_moe.experts.252.w2", "model.layers.10.block_sparse_moe.experts.253.w2", "model.layers.10.block_sparse_moe.experts.254.w2", "model.layers.10.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.002395856752991643, "dbits": 3623878656 } ] }, { "idx": 22, "layers": [ "model.layers.11.self_attn.q_proj", "model.layers.11.self_attn.k_proj", "model.layers.11.self_attn.v_proj", "model.layers.11.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0003783501684665458, "dbits": 44040192 } ] }, { "idx": 23, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w1", "model.layers.11.block_sparse_moe.experts.1.w1", "model.layers.11.block_sparse_moe.experts.2.w1", "model.layers.11.block_sparse_moe.experts.3.w1", "model.layers.11.block_sparse_moe.experts.4.w1", "model.layers.11.block_sparse_moe.experts.5.w1", "model.layers.11.block_sparse_moe.experts.6.w1", "model.layers.11.block_sparse_moe.experts.7.w1", "model.layers.11.block_sparse_moe.experts.8.w1", "model.layers.11.block_sparse_moe.experts.9.w1", "model.layers.11.block_sparse_moe.experts.10.w1", "model.layers.11.block_sparse_moe.experts.11.w1", "model.layers.11.block_sparse_moe.experts.12.w1", "model.layers.11.block_sparse_moe.experts.13.w1", "model.layers.11.block_sparse_moe.experts.14.w1", "model.layers.11.block_sparse_moe.experts.15.w1", "model.layers.11.block_sparse_moe.experts.16.w1", "model.layers.11.block_sparse_moe.experts.17.w1", "model.layers.11.block_sparse_moe.experts.18.w1", "model.layers.11.block_sparse_moe.experts.19.w1", "model.layers.11.block_sparse_moe.experts.20.w1", "model.layers.11.block_sparse_moe.experts.21.w1", "model.layers.11.block_sparse_moe.experts.22.w1", "model.layers.11.block_sparse_moe.experts.23.w1", "model.layers.11.block_sparse_moe.experts.24.w1", "model.layers.11.block_sparse_moe.experts.25.w1", "model.layers.11.block_sparse_moe.experts.26.w1", "model.layers.11.block_sparse_moe.experts.27.w1", "model.layers.11.block_sparse_moe.experts.28.w1", "model.layers.11.block_sparse_moe.experts.29.w1", "model.layers.11.block_sparse_moe.experts.30.w1", "model.layers.11.block_sparse_moe.experts.31.w1", "model.layers.11.block_sparse_moe.experts.32.w1", "model.layers.11.block_sparse_moe.experts.33.w1", "model.layers.11.block_sparse_moe.experts.34.w1", "model.layers.11.block_sparse_moe.experts.35.w1", "model.layers.11.block_sparse_moe.experts.36.w1", "model.layers.11.block_sparse_moe.experts.37.w1", "model.layers.11.block_sparse_moe.experts.38.w1", "model.layers.11.block_sparse_moe.experts.39.w1", "model.layers.11.block_sparse_moe.experts.40.w1", "model.layers.11.block_sparse_moe.experts.41.w1", "model.layers.11.block_sparse_moe.experts.42.w1", "model.layers.11.block_sparse_moe.experts.43.w1", "model.layers.11.block_sparse_moe.experts.44.w1", "model.layers.11.block_sparse_moe.experts.45.w1", "model.layers.11.block_sparse_moe.experts.46.w1", "model.layers.11.block_sparse_moe.experts.47.w1", "model.layers.11.block_sparse_moe.experts.48.w1", "model.layers.11.block_sparse_moe.experts.49.w1", "model.layers.11.block_sparse_moe.experts.50.w1", "model.layers.11.block_sparse_moe.experts.51.w1", "model.layers.11.block_sparse_moe.experts.52.w1", "model.layers.11.block_sparse_moe.experts.53.w1", "model.layers.11.block_sparse_moe.experts.54.w1", "model.layers.11.block_sparse_moe.experts.55.w1", "model.layers.11.block_sparse_moe.experts.56.w1", "model.layers.11.block_sparse_moe.experts.57.w1", "model.layers.11.block_sparse_moe.experts.58.w1", "model.layers.11.block_sparse_moe.experts.59.w1", "model.layers.11.block_sparse_moe.experts.60.w1", "model.layers.11.block_sparse_moe.experts.61.w1", "model.layers.11.block_sparse_moe.experts.62.w1", "model.layers.11.block_sparse_moe.experts.63.w1", "model.layers.11.block_sparse_moe.experts.64.w1", "model.layers.11.block_sparse_moe.experts.65.w1", "model.layers.11.block_sparse_moe.experts.66.w1", "model.layers.11.block_sparse_moe.experts.67.w1", "model.layers.11.block_sparse_moe.experts.68.w1", "model.layers.11.block_sparse_moe.experts.69.w1", "model.layers.11.block_sparse_moe.experts.70.w1", "model.layers.11.block_sparse_moe.experts.71.w1", "model.layers.11.block_sparse_moe.experts.72.w1", "model.layers.11.block_sparse_moe.experts.73.w1", "model.layers.11.block_sparse_moe.experts.74.w1", "model.layers.11.block_sparse_moe.experts.75.w1", "model.layers.11.block_sparse_moe.experts.76.w1", "model.layers.11.block_sparse_moe.experts.77.w1", "model.layers.11.block_sparse_moe.experts.78.w1", "model.layers.11.block_sparse_moe.experts.79.w1", "model.layers.11.block_sparse_moe.experts.80.w1", "model.layers.11.block_sparse_moe.experts.81.w1", "model.layers.11.block_sparse_moe.experts.82.w1", "model.layers.11.block_sparse_moe.experts.83.w1", "model.layers.11.block_sparse_moe.experts.84.w1", "model.layers.11.block_sparse_moe.experts.85.w1", "model.layers.11.block_sparse_moe.experts.86.w1", "model.layers.11.block_sparse_moe.experts.87.w1", "model.layers.11.block_sparse_moe.experts.88.w1", "model.layers.11.block_sparse_moe.experts.89.w1", "model.layers.11.block_sparse_moe.experts.90.w1", "model.layers.11.block_sparse_moe.experts.91.w1", "model.layers.11.block_sparse_moe.experts.92.w1", "model.layers.11.block_sparse_moe.experts.93.w1", "model.layers.11.block_sparse_moe.experts.94.w1", "model.layers.11.block_sparse_moe.experts.95.w1", "model.layers.11.block_sparse_moe.experts.96.w1", "model.layers.11.block_sparse_moe.experts.97.w1", "model.layers.11.block_sparse_moe.experts.98.w1", "model.layers.11.block_sparse_moe.experts.99.w1", "model.layers.11.block_sparse_moe.experts.100.w1", "model.layers.11.block_sparse_moe.experts.101.w1", "model.layers.11.block_sparse_moe.experts.102.w1", "model.layers.11.block_sparse_moe.experts.103.w1", "model.layers.11.block_sparse_moe.experts.104.w1", "model.layers.11.block_sparse_moe.experts.105.w1", "model.layers.11.block_sparse_moe.experts.106.w1", "model.layers.11.block_sparse_moe.experts.107.w1", "model.layers.11.block_sparse_moe.experts.108.w1", "model.layers.11.block_sparse_moe.experts.109.w1", "model.layers.11.block_sparse_moe.experts.110.w1", "model.layers.11.block_sparse_moe.experts.111.w1", "model.layers.11.block_sparse_moe.experts.112.w1", "model.layers.11.block_sparse_moe.experts.113.w1", "model.layers.11.block_sparse_moe.experts.114.w1", "model.layers.11.block_sparse_moe.experts.115.w1", "model.layers.11.block_sparse_moe.experts.116.w1", "model.layers.11.block_sparse_moe.experts.117.w1", "model.layers.11.block_sparse_moe.experts.118.w1", "model.layers.11.block_sparse_moe.experts.119.w1", "model.layers.11.block_sparse_moe.experts.120.w1", "model.layers.11.block_sparse_moe.experts.121.w1", "model.layers.11.block_sparse_moe.experts.122.w1", "model.layers.11.block_sparse_moe.experts.123.w1", "model.layers.11.block_sparse_moe.experts.124.w1", "model.layers.11.block_sparse_moe.experts.125.w1", "model.layers.11.block_sparse_moe.experts.126.w1", "model.layers.11.block_sparse_moe.experts.127.w1", "model.layers.11.block_sparse_moe.experts.128.w1", "model.layers.11.block_sparse_moe.experts.129.w1", "model.layers.11.block_sparse_moe.experts.130.w1", "model.layers.11.block_sparse_moe.experts.131.w1", "model.layers.11.block_sparse_moe.experts.132.w1", "model.layers.11.block_sparse_moe.experts.133.w1", "model.layers.11.block_sparse_moe.experts.134.w1", "model.layers.11.block_sparse_moe.experts.135.w1", "model.layers.11.block_sparse_moe.experts.136.w1", "model.layers.11.block_sparse_moe.experts.137.w1", "model.layers.11.block_sparse_moe.experts.138.w1", "model.layers.11.block_sparse_moe.experts.139.w1", "model.layers.11.block_sparse_moe.experts.140.w1", "model.layers.11.block_sparse_moe.experts.141.w1", "model.layers.11.block_sparse_moe.experts.142.w1", "model.layers.11.block_sparse_moe.experts.143.w1", "model.layers.11.block_sparse_moe.experts.144.w1", "model.layers.11.block_sparse_moe.experts.145.w1", "model.layers.11.block_sparse_moe.experts.146.w1", "model.layers.11.block_sparse_moe.experts.147.w1", "model.layers.11.block_sparse_moe.experts.148.w1", "model.layers.11.block_sparse_moe.experts.149.w1", "model.layers.11.block_sparse_moe.experts.150.w1", "model.layers.11.block_sparse_moe.experts.151.w1", "model.layers.11.block_sparse_moe.experts.152.w1", "model.layers.11.block_sparse_moe.experts.153.w1", "model.layers.11.block_sparse_moe.experts.154.w1", "model.layers.11.block_sparse_moe.experts.155.w1", "model.layers.11.block_sparse_moe.experts.156.w1", "model.layers.11.block_sparse_moe.experts.157.w1", "model.layers.11.block_sparse_moe.experts.158.w1", "model.layers.11.block_sparse_moe.experts.159.w1", "model.layers.11.block_sparse_moe.experts.160.w1", "model.layers.11.block_sparse_moe.experts.161.w1", "model.layers.11.block_sparse_moe.experts.162.w1", "model.layers.11.block_sparse_moe.experts.163.w1", "model.layers.11.block_sparse_moe.experts.164.w1", "model.layers.11.block_sparse_moe.experts.165.w1", "model.layers.11.block_sparse_moe.experts.166.w1", "model.layers.11.block_sparse_moe.experts.167.w1", "model.layers.11.block_sparse_moe.experts.168.w1", "model.layers.11.block_sparse_moe.experts.169.w1", "model.layers.11.block_sparse_moe.experts.170.w1", "model.layers.11.block_sparse_moe.experts.171.w1", "model.layers.11.block_sparse_moe.experts.172.w1", "model.layers.11.block_sparse_moe.experts.173.w1", "model.layers.11.block_sparse_moe.experts.174.w1", "model.layers.11.block_sparse_moe.experts.175.w1", "model.layers.11.block_sparse_moe.experts.176.w1", "model.layers.11.block_sparse_moe.experts.177.w1", "model.layers.11.block_sparse_moe.experts.178.w1", "model.layers.11.block_sparse_moe.experts.179.w1", "model.layers.11.block_sparse_moe.experts.180.w1", "model.layers.11.block_sparse_moe.experts.181.w1", "model.layers.11.block_sparse_moe.experts.182.w1", "model.layers.11.block_sparse_moe.experts.183.w1", "model.layers.11.block_sparse_moe.experts.184.w1", "model.layers.11.block_sparse_moe.experts.185.w1", "model.layers.11.block_sparse_moe.experts.186.w1", "model.layers.11.block_sparse_moe.experts.187.w1", "model.layers.11.block_sparse_moe.experts.188.w1", "model.layers.11.block_sparse_moe.experts.189.w1", "model.layers.11.block_sparse_moe.experts.190.w1", "model.layers.11.block_sparse_moe.experts.191.w1", "model.layers.11.block_sparse_moe.experts.192.w1", "model.layers.11.block_sparse_moe.experts.193.w1", "model.layers.11.block_sparse_moe.experts.194.w1", "model.layers.11.block_sparse_moe.experts.195.w1", "model.layers.11.block_sparse_moe.experts.196.w1", "model.layers.11.block_sparse_moe.experts.197.w1", "model.layers.11.block_sparse_moe.experts.198.w1", "model.layers.11.block_sparse_moe.experts.199.w1", "model.layers.11.block_sparse_moe.experts.200.w1", "model.layers.11.block_sparse_moe.experts.201.w1", "model.layers.11.block_sparse_moe.experts.202.w1", "model.layers.11.block_sparse_moe.experts.203.w1", "model.layers.11.block_sparse_moe.experts.204.w1", "model.layers.11.block_sparse_moe.experts.205.w1", "model.layers.11.block_sparse_moe.experts.206.w1", "model.layers.11.block_sparse_moe.experts.207.w1", "model.layers.11.block_sparse_moe.experts.208.w1", "model.layers.11.block_sparse_moe.experts.209.w1", "model.layers.11.block_sparse_moe.experts.210.w1", "model.layers.11.block_sparse_moe.experts.211.w1", "model.layers.11.block_sparse_moe.experts.212.w1", "model.layers.11.block_sparse_moe.experts.213.w1", "model.layers.11.block_sparse_moe.experts.214.w1", "model.layers.11.block_sparse_moe.experts.215.w1", "model.layers.11.block_sparse_moe.experts.216.w1", "model.layers.11.block_sparse_moe.experts.217.w1", "model.layers.11.block_sparse_moe.experts.218.w1", "model.layers.11.block_sparse_moe.experts.219.w1", "model.layers.11.block_sparse_moe.experts.220.w1", "model.layers.11.block_sparse_moe.experts.221.w1", "model.layers.11.block_sparse_moe.experts.222.w1", "model.layers.11.block_sparse_moe.experts.223.w1", "model.layers.11.block_sparse_moe.experts.224.w1", "model.layers.11.block_sparse_moe.experts.225.w1", "model.layers.11.block_sparse_moe.experts.226.w1", "model.layers.11.block_sparse_moe.experts.227.w1", "model.layers.11.block_sparse_moe.experts.228.w1", "model.layers.11.block_sparse_moe.experts.229.w1", "model.layers.11.block_sparse_moe.experts.230.w1", "model.layers.11.block_sparse_moe.experts.231.w1", "model.layers.11.block_sparse_moe.experts.232.w1", "model.layers.11.block_sparse_moe.experts.233.w1", "model.layers.11.block_sparse_moe.experts.234.w1", "model.layers.11.block_sparse_moe.experts.235.w1", "model.layers.11.block_sparse_moe.experts.236.w1", "model.layers.11.block_sparse_moe.experts.237.w1", "model.layers.11.block_sparse_moe.experts.238.w1", "model.layers.11.block_sparse_moe.experts.239.w1", "model.layers.11.block_sparse_moe.experts.240.w1", "model.layers.11.block_sparse_moe.experts.241.w1", "model.layers.11.block_sparse_moe.experts.242.w1", "model.layers.11.block_sparse_moe.experts.243.w1", "model.layers.11.block_sparse_moe.experts.244.w1", "model.layers.11.block_sparse_moe.experts.245.w1", "model.layers.11.block_sparse_moe.experts.246.w1", "model.layers.11.block_sparse_moe.experts.247.w1", "model.layers.11.block_sparse_moe.experts.248.w1", "model.layers.11.block_sparse_moe.experts.249.w1", "model.layers.11.block_sparse_moe.experts.250.w1", "model.layers.11.block_sparse_moe.experts.251.w1", "model.layers.11.block_sparse_moe.experts.252.w1", "model.layers.11.block_sparse_moe.experts.253.w1", "model.layers.11.block_sparse_moe.experts.254.w1", "model.layers.11.block_sparse_moe.experts.255.w1", "model.layers.11.block_sparse_moe.experts.0.w3", "model.layers.11.block_sparse_moe.experts.1.w3", "model.layers.11.block_sparse_moe.experts.2.w3", "model.layers.11.block_sparse_moe.experts.3.w3", "model.layers.11.block_sparse_moe.experts.4.w3", "model.layers.11.block_sparse_moe.experts.5.w3", "model.layers.11.block_sparse_moe.experts.6.w3", "model.layers.11.block_sparse_moe.experts.7.w3", "model.layers.11.block_sparse_moe.experts.8.w3", "model.layers.11.block_sparse_moe.experts.9.w3", "model.layers.11.block_sparse_moe.experts.10.w3", "model.layers.11.block_sparse_moe.experts.11.w3", "model.layers.11.block_sparse_moe.experts.12.w3", "model.layers.11.block_sparse_moe.experts.13.w3", "model.layers.11.block_sparse_moe.experts.14.w3", "model.layers.11.block_sparse_moe.experts.15.w3", "model.layers.11.block_sparse_moe.experts.16.w3", "model.layers.11.block_sparse_moe.experts.17.w3", "model.layers.11.block_sparse_moe.experts.18.w3", "model.layers.11.block_sparse_moe.experts.19.w3", "model.layers.11.block_sparse_moe.experts.20.w3", "model.layers.11.block_sparse_moe.experts.21.w3", "model.layers.11.block_sparse_moe.experts.22.w3", "model.layers.11.block_sparse_moe.experts.23.w3", "model.layers.11.block_sparse_moe.experts.24.w3", "model.layers.11.block_sparse_moe.experts.25.w3", "model.layers.11.block_sparse_moe.experts.26.w3", "model.layers.11.block_sparse_moe.experts.27.w3", "model.layers.11.block_sparse_moe.experts.28.w3", "model.layers.11.block_sparse_moe.experts.29.w3", "model.layers.11.block_sparse_moe.experts.30.w3", "model.layers.11.block_sparse_moe.experts.31.w3", "model.layers.11.block_sparse_moe.experts.32.w3", "model.layers.11.block_sparse_moe.experts.33.w3", "model.layers.11.block_sparse_moe.experts.34.w3", "model.layers.11.block_sparse_moe.experts.35.w3", "model.layers.11.block_sparse_moe.experts.36.w3", "model.layers.11.block_sparse_moe.experts.37.w3", "model.layers.11.block_sparse_moe.experts.38.w3", "model.layers.11.block_sparse_moe.experts.39.w3", "model.layers.11.block_sparse_moe.experts.40.w3", "model.layers.11.block_sparse_moe.experts.41.w3", "model.layers.11.block_sparse_moe.experts.42.w3", "model.layers.11.block_sparse_moe.experts.43.w3", "model.layers.11.block_sparse_moe.experts.44.w3", "model.layers.11.block_sparse_moe.experts.45.w3", "model.layers.11.block_sparse_moe.experts.46.w3", "model.layers.11.block_sparse_moe.experts.47.w3", "model.layers.11.block_sparse_moe.experts.48.w3", "model.layers.11.block_sparse_moe.experts.49.w3", "model.layers.11.block_sparse_moe.experts.50.w3", "model.layers.11.block_sparse_moe.experts.51.w3", "model.layers.11.block_sparse_moe.experts.52.w3", "model.layers.11.block_sparse_moe.experts.53.w3", "model.layers.11.block_sparse_moe.experts.54.w3", "model.layers.11.block_sparse_moe.experts.55.w3", "model.layers.11.block_sparse_moe.experts.56.w3", "model.layers.11.block_sparse_moe.experts.57.w3", "model.layers.11.block_sparse_moe.experts.58.w3", "model.layers.11.block_sparse_moe.experts.59.w3", "model.layers.11.block_sparse_moe.experts.60.w3", "model.layers.11.block_sparse_moe.experts.61.w3", "model.layers.11.block_sparse_moe.experts.62.w3", "model.layers.11.block_sparse_moe.experts.63.w3", "model.layers.11.block_sparse_moe.experts.64.w3", "model.layers.11.block_sparse_moe.experts.65.w3", "model.layers.11.block_sparse_moe.experts.66.w3", "model.layers.11.block_sparse_moe.experts.67.w3", "model.layers.11.block_sparse_moe.experts.68.w3", "model.layers.11.block_sparse_moe.experts.69.w3", "model.layers.11.block_sparse_moe.experts.70.w3", "model.layers.11.block_sparse_moe.experts.71.w3", "model.layers.11.block_sparse_moe.experts.72.w3", "model.layers.11.block_sparse_moe.experts.73.w3", "model.layers.11.block_sparse_moe.experts.74.w3", "model.layers.11.block_sparse_moe.experts.75.w3", "model.layers.11.block_sparse_moe.experts.76.w3", "model.layers.11.block_sparse_moe.experts.77.w3", "model.layers.11.block_sparse_moe.experts.78.w3", "model.layers.11.block_sparse_moe.experts.79.w3", "model.layers.11.block_sparse_moe.experts.80.w3", "model.layers.11.block_sparse_moe.experts.81.w3", "model.layers.11.block_sparse_moe.experts.82.w3", "model.layers.11.block_sparse_moe.experts.83.w3", "model.layers.11.block_sparse_moe.experts.84.w3", "model.layers.11.block_sparse_moe.experts.85.w3", "model.layers.11.block_sparse_moe.experts.86.w3", "model.layers.11.block_sparse_moe.experts.87.w3", "model.layers.11.block_sparse_moe.experts.88.w3", "model.layers.11.block_sparse_moe.experts.89.w3", "model.layers.11.block_sparse_moe.experts.90.w3", "model.layers.11.block_sparse_moe.experts.91.w3", "model.layers.11.block_sparse_moe.experts.92.w3", "model.layers.11.block_sparse_moe.experts.93.w3", "model.layers.11.block_sparse_moe.experts.94.w3", "model.layers.11.block_sparse_moe.experts.95.w3", "model.layers.11.block_sparse_moe.experts.96.w3", "model.layers.11.block_sparse_moe.experts.97.w3", "model.layers.11.block_sparse_moe.experts.98.w3", "model.layers.11.block_sparse_moe.experts.99.w3", "model.layers.11.block_sparse_moe.experts.100.w3", "model.layers.11.block_sparse_moe.experts.101.w3", "model.layers.11.block_sparse_moe.experts.102.w3", "model.layers.11.block_sparse_moe.experts.103.w3", "model.layers.11.block_sparse_moe.experts.104.w3", "model.layers.11.block_sparse_moe.experts.105.w3", "model.layers.11.block_sparse_moe.experts.106.w3", "model.layers.11.block_sparse_moe.experts.107.w3", "model.layers.11.block_sparse_moe.experts.108.w3", "model.layers.11.block_sparse_moe.experts.109.w3", "model.layers.11.block_sparse_moe.experts.110.w3", "model.layers.11.block_sparse_moe.experts.111.w3", "model.layers.11.block_sparse_moe.experts.112.w3", "model.layers.11.block_sparse_moe.experts.113.w3", "model.layers.11.block_sparse_moe.experts.114.w3", "model.layers.11.block_sparse_moe.experts.115.w3", "model.layers.11.block_sparse_moe.experts.116.w3", "model.layers.11.block_sparse_moe.experts.117.w3", "model.layers.11.block_sparse_moe.experts.118.w3", "model.layers.11.block_sparse_moe.experts.119.w3", "model.layers.11.block_sparse_moe.experts.120.w3", "model.layers.11.block_sparse_moe.experts.121.w3", "model.layers.11.block_sparse_moe.experts.122.w3", "model.layers.11.block_sparse_moe.experts.123.w3", "model.layers.11.block_sparse_moe.experts.124.w3", "model.layers.11.block_sparse_moe.experts.125.w3", "model.layers.11.block_sparse_moe.experts.126.w3", "model.layers.11.block_sparse_moe.experts.127.w3", "model.layers.11.block_sparse_moe.experts.128.w3", "model.layers.11.block_sparse_moe.experts.129.w3", "model.layers.11.block_sparse_moe.experts.130.w3", "model.layers.11.block_sparse_moe.experts.131.w3", "model.layers.11.block_sparse_moe.experts.132.w3", "model.layers.11.block_sparse_moe.experts.133.w3", "model.layers.11.block_sparse_moe.experts.134.w3", "model.layers.11.block_sparse_moe.experts.135.w3", "model.layers.11.block_sparse_moe.experts.136.w3", "model.layers.11.block_sparse_moe.experts.137.w3", "model.layers.11.block_sparse_moe.experts.138.w3", "model.layers.11.block_sparse_moe.experts.139.w3", "model.layers.11.block_sparse_moe.experts.140.w3", "model.layers.11.block_sparse_moe.experts.141.w3", "model.layers.11.block_sparse_moe.experts.142.w3", "model.layers.11.block_sparse_moe.experts.143.w3", "model.layers.11.block_sparse_moe.experts.144.w3", "model.layers.11.block_sparse_moe.experts.145.w3", "model.layers.11.block_sparse_moe.experts.146.w3", "model.layers.11.block_sparse_moe.experts.147.w3", "model.layers.11.block_sparse_moe.experts.148.w3", "model.layers.11.block_sparse_moe.experts.149.w3", "model.layers.11.block_sparse_moe.experts.150.w3", "model.layers.11.block_sparse_moe.experts.151.w3", "model.layers.11.block_sparse_moe.experts.152.w3", "model.layers.11.block_sparse_moe.experts.153.w3", "model.layers.11.block_sparse_moe.experts.154.w3", "model.layers.11.block_sparse_moe.experts.155.w3", "model.layers.11.block_sparse_moe.experts.156.w3", "model.layers.11.block_sparse_moe.experts.157.w3", "model.layers.11.block_sparse_moe.experts.158.w3", "model.layers.11.block_sparse_moe.experts.159.w3", "model.layers.11.block_sparse_moe.experts.160.w3", "model.layers.11.block_sparse_moe.experts.161.w3", "model.layers.11.block_sparse_moe.experts.162.w3", "model.layers.11.block_sparse_moe.experts.163.w3", "model.layers.11.block_sparse_moe.experts.164.w3", "model.layers.11.block_sparse_moe.experts.165.w3", "model.layers.11.block_sparse_moe.experts.166.w3", "model.layers.11.block_sparse_moe.experts.167.w3", "model.layers.11.block_sparse_moe.experts.168.w3", "model.layers.11.block_sparse_moe.experts.169.w3", "model.layers.11.block_sparse_moe.experts.170.w3", "model.layers.11.block_sparse_moe.experts.171.w3", "model.layers.11.block_sparse_moe.experts.172.w3", "model.layers.11.block_sparse_moe.experts.173.w3", "model.layers.11.block_sparse_moe.experts.174.w3", "model.layers.11.block_sparse_moe.experts.175.w3", "model.layers.11.block_sparse_moe.experts.176.w3", "model.layers.11.block_sparse_moe.experts.177.w3", "model.layers.11.block_sparse_moe.experts.178.w3", "model.layers.11.block_sparse_moe.experts.179.w3", "model.layers.11.block_sparse_moe.experts.180.w3", "model.layers.11.block_sparse_moe.experts.181.w3", "model.layers.11.block_sparse_moe.experts.182.w3", "model.layers.11.block_sparse_moe.experts.183.w3", "model.layers.11.block_sparse_moe.experts.184.w3", "model.layers.11.block_sparse_moe.experts.185.w3", "model.layers.11.block_sparse_moe.experts.186.w3", "model.layers.11.block_sparse_moe.experts.187.w3", "model.layers.11.block_sparse_moe.experts.188.w3", "model.layers.11.block_sparse_moe.experts.189.w3", "model.layers.11.block_sparse_moe.experts.190.w3", "model.layers.11.block_sparse_moe.experts.191.w3", "model.layers.11.block_sparse_moe.experts.192.w3", "model.layers.11.block_sparse_moe.experts.193.w3", "model.layers.11.block_sparse_moe.experts.194.w3", "model.layers.11.block_sparse_moe.experts.195.w3", "model.layers.11.block_sparse_moe.experts.196.w3", "model.layers.11.block_sparse_moe.experts.197.w3", "model.layers.11.block_sparse_moe.experts.198.w3", "model.layers.11.block_sparse_moe.experts.199.w3", "model.layers.11.block_sparse_moe.experts.200.w3", "model.layers.11.block_sparse_moe.experts.201.w3", "model.layers.11.block_sparse_moe.experts.202.w3", "model.layers.11.block_sparse_moe.experts.203.w3", "model.layers.11.block_sparse_moe.experts.204.w3", "model.layers.11.block_sparse_moe.experts.205.w3", "model.layers.11.block_sparse_moe.experts.206.w3", "model.layers.11.block_sparse_moe.experts.207.w3", "model.layers.11.block_sparse_moe.experts.208.w3", "model.layers.11.block_sparse_moe.experts.209.w3", "model.layers.11.block_sparse_moe.experts.210.w3", "model.layers.11.block_sparse_moe.experts.211.w3", "model.layers.11.block_sparse_moe.experts.212.w3", "model.layers.11.block_sparse_moe.experts.213.w3", "model.layers.11.block_sparse_moe.experts.214.w3", "model.layers.11.block_sparse_moe.experts.215.w3", "model.layers.11.block_sparse_moe.experts.216.w3", "model.layers.11.block_sparse_moe.experts.217.w3", "model.layers.11.block_sparse_moe.experts.218.w3", "model.layers.11.block_sparse_moe.experts.219.w3", "model.layers.11.block_sparse_moe.experts.220.w3", "model.layers.11.block_sparse_moe.experts.221.w3", "model.layers.11.block_sparse_moe.experts.222.w3", "model.layers.11.block_sparse_moe.experts.223.w3", "model.layers.11.block_sparse_moe.experts.224.w3", "model.layers.11.block_sparse_moe.experts.225.w3", "model.layers.11.block_sparse_moe.experts.226.w3", "model.layers.11.block_sparse_moe.experts.227.w3", "model.layers.11.block_sparse_moe.experts.228.w3", "model.layers.11.block_sparse_moe.experts.229.w3", "model.layers.11.block_sparse_moe.experts.230.w3", "model.layers.11.block_sparse_moe.experts.231.w3", "model.layers.11.block_sparse_moe.experts.232.w3", "model.layers.11.block_sparse_moe.experts.233.w3", "model.layers.11.block_sparse_moe.experts.234.w3", "model.layers.11.block_sparse_moe.experts.235.w3", "model.layers.11.block_sparse_moe.experts.236.w3", "model.layers.11.block_sparse_moe.experts.237.w3", "model.layers.11.block_sparse_moe.experts.238.w3", "model.layers.11.block_sparse_moe.experts.239.w3", "model.layers.11.block_sparse_moe.experts.240.w3", "model.layers.11.block_sparse_moe.experts.241.w3", "model.layers.11.block_sparse_moe.experts.242.w3", "model.layers.11.block_sparse_moe.experts.243.w3", "model.layers.11.block_sparse_moe.experts.244.w3", "model.layers.11.block_sparse_moe.experts.245.w3", "model.layers.11.block_sparse_moe.experts.246.w3", "model.layers.11.block_sparse_moe.experts.247.w3", "model.layers.11.block_sparse_moe.experts.248.w3", "model.layers.11.block_sparse_moe.experts.249.w3", "model.layers.11.block_sparse_moe.experts.250.w3", "model.layers.11.block_sparse_moe.experts.251.w3", "model.layers.11.block_sparse_moe.experts.252.w3", "model.layers.11.block_sparse_moe.experts.253.w3", "model.layers.11.block_sparse_moe.experts.254.w3", "model.layers.11.block_sparse_moe.experts.255.w3", "model.layers.11.block_sparse_moe.experts.0.w2", "model.layers.11.block_sparse_moe.experts.1.w2", "model.layers.11.block_sparse_moe.experts.2.w2", "model.layers.11.block_sparse_moe.experts.3.w2", "model.layers.11.block_sparse_moe.experts.4.w2", "model.layers.11.block_sparse_moe.experts.5.w2", "model.layers.11.block_sparse_moe.experts.6.w2", "model.layers.11.block_sparse_moe.experts.7.w2", "model.layers.11.block_sparse_moe.experts.8.w2", "model.layers.11.block_sparse_moe.experts.9.w2", "model.layers.11.block_sparse_moe.experts.10.w2", "model.layers.11.block_sparse_moe.experts.11.w2", "model.layers.11.block_sparse_moe.experts.12.w2", "model.layers.11.block_sparse_moe.experts.13.w2", "model.layers.11.block_sparse_moe.experts.14.w2", "model.layers.11.block_sparse_moe.experts.15.w2", "model.layers.11.block_sparse_moe.experts.16.w2", "model.layers.11.block_sparse_moe.experts.17.w2", "model.layers.11.block_sparse_moe.experts.18.w2", "model.layers.11.block_sparse_moe.experts.19.w2", "model.layers.11.block_sparse_moe.experts.20.w2", "model.layers.11.block_sparse_moe.experts.21.w2", "model.layers.11.block_sparse_moe.experts.22.w2", "model.layers.11.block_sparse_moe.experts.23.w2", "model.layers.11.block_sparse_moe.experts.24.w2", "model.layers.11.block_sparse_moe.experts.25.w2", "model.layers.11.block_sparse_moe.experts.26.w2", "model.layers.11.block_sparse_moe.experts.27.w2", "model.layers.11.block_sparse_moe.experts.28.w2", "model.layers.11.block_sparse_moe.experts.29.w2", "model.layers.11.block_sparse_moe.experts.30.w2", "model.layers.11.block_sparse_moe.experts.31.w2", "model.layers.11.block_sparse_moe.experts.32.w2", "model.layers.11.block_sparse_moe.experts.33.w2", "model.layers.11.block_sparse_moe.experts.34.w2", "model.layers.11.block_sparse_moe.experts.35.w2", "model.layers.11.block_sparse_moe.experts.36.w2", "model.layers.11.block_sparse_moe.experts.37.w2", "model.layers.11.block_sparse_moe.experts.38.w2", "model.layers.11.block_sparse_moe.experts.39.w2", "model.layers.11.block_sparse_moe.experts.40.w2", "model.layers.11.block_sparse_moe.experts.41.w2", "model.layers.11.block_sparse_moe.experts.42.w2", "model.layers.11.block_sparse_moe.experts.43.w2", "model.layers.11.block_sparse_moe.experts.44.w2", "model.layers.11.block_sparse_moe.experts.45.w2", "model.layers.11.block_sparse_moe.experts.46.w2", "model.layers.11.block_sparse_moe.experts.47.w2", "model.layers.11.block_sparse_moe.experts.48.w2", "model.layers.11.block_sparse_moe.experts.49.w2", "model.layers.11.block_sparse_moe.experts.50.w2", "model.layers.11.block_sparse_moe.experts.51.w2", "model.layers.11.block_sparse_moe.experts.52.w2", "model.layers.11.block_sparse_moe.experts.53.w2", "model.layers.11.block_sparse_moe.experts.54.w2", "model.layers.11.block_sparse_moe.experts.55.w2", "model.layers.11.block_sparse_moe.experts.56.w2", "model.layers.11.block_sparse_moe.experts.57.w2", "model.layers.11.block_sparse_moe.experts.58.w2", "model.layers.11.block_sparse_moe.experts.59.w2", "model.layers.11.block_sparse_moe.experts.60.w2", "model.layers.11.block_sparse_moe.experts.61.w2", "model.layers.11.block_sparse_moe.experts.62.w2", "model.layers.11.block_sparse_moe.experts.63.w2", "model.layers.11.block_sparse_moe.experts.64.w2", "model.layers.11.block_sparse_moe.experts.65.w2", "model.layers.11.block_sparse_moe.experts.66.w2", "model.layers.11.block_sparse_moe.experts.67.w2", "model.layers.11.block_sparse_moe.experts.68.w2", "model.layers.11.block_sparse_moe.experts.69.w2", "model.layers.11.block_sparse_moe.experts.70.w2", "model.layers.11.block_sparse_moe.experts.71.w2", "model.layers.11.block_sparse_moe.experts.72.w2", "model.layers.11.block_sparse_moe.experts.73.w2", "model.layers.11.block_sparse_moe.experts.74.w2", "model.layers.11.block_sparse_moe.experts.75.w2", "model.layers.11.block_sparse_moe.experts.76.w2", "model.layers.11.block_sparse_moe.experts.77.w2", "model.layers.11.block_sparse_moe.experts.78.w2", "model.layers.11.block_sparse_moe.experts.79.w2", "model.layers.11.block_sparse_moe.experts.80.w2", "model.layers.11.block_sparse_moe.experts.81.w2", "model.layers.11.block_sparse_moe.experts.82.w2", "model.layers.11.block_sparse_moe.experts.83.w2", "model.layers.11.block_sparse_moe.experts.84.w2", "model.layers.11.block_sparse_moe.experts.85.w2", "model.layers.11.block_sparse_moe.experts.86.w2", "model.layers.11.block_sparse_moe.experts.87.w2", "model.layers.11.block_sparse_moe.experts.88.w2", "model.layers.11.block_sparse_moe.experts.89.w2", "model.layers.11.block_sparse_moe.experts.90.w2", "model.layers.11.block_sparse_moe.experts.91.w2", "model.layers.11.block_sparse_moe.experts.92.w2", "model.layers.11.block_sparse_moe.experts.93.w2", "model.layers.11.block_sparse_moe.experts.94.w2", "model.layers.11.block_sparse_moe.experts.95.w2", "model.layers.11.block_sparse_moe.experts.96.w2", "model.layers.11.block_sparse_moe.experts.97.w2", "model.layers.11.block_sparse_moe.experts.98.w2", "model.layers.11.block_sparse_moe.experts.99.w2", "model.layers.11.block_sparse_moe.experts.100.w2", "model.layers.11.block_sparse_moe.experts.101.w2", "model.layers.11.block_sparse_moe.experts.102.w2", "model.layers.11.block_sparse_moe.experts.103.w2", "model.layers.11.block_sparse_moe.experts.104.w2", "model.layers.11.block_sparse_moe.experts.105.w2", "model.layers.11.block_sparse_moe.experts.106.w2", "model.layers.11.block_sparse_moe.experts.107.w2", "model.layers.11.block_sparse_moe.experts.108.w2", "model.layers.11.block_sparse_moe.experts.109.w2", "model.layers.11.block_sparse_moe.experts.110.w2", "model.layers.11.block_sparse_moe.experts.111.w2", "model.layers.11.block_sparse_moe.experts.112.w2", "model.layers.11.block_sparse_moe.experts.113.w2", "model.layers.11.block_sparse_moe.experts.114.w2", "model.layers.11.block_sparse_moe.experts.115.w2", "model.layers.11.block_sparse_moe.experts.116.w2", "model.layers.11.block_sparse_moe.experts.117.w2", "model.layers.11.block_sparse_moe.experts.118.w2", "model.layers.11.block_sparse_moe.experts.119.w2", "model.layers.11.block_sparse_moe.experts.120.w2", "model.layers.11.block_sparse_moe.experts.121.w2", "model.layers.11.block_sparse_moe.experts.122.w2", "model.layers.11.block_sparse_moe.experts.123.w2", "model.layers.11.block_sparse_moe.experts.124.w2", "model.layers.11.block_sparse_moe.experts.125.w2", "model.layers.11.block_sparse_moe.experts.126.w2", "model.layers.11.block_sparse_moe.experts.127.w2", "model.layers.11.block_sparse_moe.experts.128.w2", "model.layers.11.block_sparse_moe.experts.129.w2", "model.layers.11.block_sparse_moe.experts.130.w2", "model.layers.11.block_sparse_moe.experts.131.w2", "model.layers.11.block_sparse_moe.experts.132.w2", "model.layers.11.block_sparse_moe.experts.133.w2", "model.layers.11.block_sparse_moe.experts.134.w2", "model.layers.11.block_sparse_moe.experts.135.w2", "model.layers.11.block_sparse_moe.experts.136.w2", "model.layers.11.block_sparse_moe.experts.137.w2", "model.layers.11.block_sparse_moe.experts.138.w2", "model.layers.11.block_sparse_moe.experts.139.w2", "model.layers.11.block_sparse_moe.experts.140.w2", "model.layers.11.block_sparse_moe.experts.141.w2", "model.layers.11.block_sparse_moe.experts.142.w2", "model.layers.11.block_sparse_moe.experts.143.w2", "model.layers.11.block_sparse_moe.experts.144.w2", "model.layers.11.block_sparse_moe.experts.145.w2", "model.layers.11.block_sparse_moe.experts.146.w2", "model.layers.11.block_sparse_moe.experts.147.w2", "model.layers.11.block_sparse_moe.experts.148.w2", "model.layers.11.block_sparse_moe.experts.149.w2", "model.layers.11.block_sparse_moe.experts.150.w2", "model.layers.11.block_sparse_moe.experts.151.w2", "model.layers.11.block_sparse_moe.experts.152.w2", "model.layers.11.block_sparse_moe.experts.153.w2", "model.layers.11.block_sparse_moe.experts.154.w2", "model.layers.11.block_sparse_moe.experts.155.w2", "model.layers.11.block_sparse_moe.experts.156.w2", "model.layers.11.block_sparse_moe.experts.157.w2", "model.layers.11.block_sparse_moe.experts.158.w2", "model.layers.11.block_sparse_moe.experts.159.w2", "model.layers.11.block_sparse_moe.experts.160.w2", "model.layers.11.block_sparse_moe.experts.161.w2", "model.layers.11.block_sparse_moe.experts.162.w2", "model.layers.11.block_sparse_moe.experts.163.w2", "model.layers.11.block_sparse_moe.experts.164.w2", "model.layers.11.block_sparse_moe.experts.165.w2", "model.layers.11.block_sparse_moe.experts.166.w2", "model.layers.11.block_sparse_moe.experts.167.w2", "model.layers.11.block_sparse_moe.experts.168.w2", "model.layers.11.block_sparse_moe.experts.169.w2", "model.layers.11.block_sparse_moe.experts.170.w2", "model.layers.11.block_sparse_moe.experts.171.w2", "model.layers.11.block_sparse_moe.experts.172.w2", "model.layers.11.block_sparse_moe.experts.173.w2", "model.layers.11.block_sparse_moe.experts.174.w2", "model.layers.11.block_sparse_moe.experts.175.w2", "model.layers.11.block_sparse_moe.experts.176.w2", "model.layers.11.block_sparse_moe.experts.177.w2", "model.layers.11.block_sparse_moe.experts.178.w2", "model.layers.11.block_sparse_moe.experts.179.w2", "model.layers.11.block_sparse_moe.experts.180.w2", "model.layers.11.block_sparse_moe.experts.181.w2", "model.layers.11.block_sparse_moe.experts.182.w2", "model.layers.11.block_sparse_moe.experts.183.w2", "model.layers.11.block_sparse_moe.experts.184.w2", "model.layers.11.block_sparse_moe.experts.185.w2", "model.layers.11.block_sparse_moe.experts.186.w2", "model.layers.11.block_sparse_moe.experts.187.w2", "model.layers.11.block_sparse_moe.experts.188.w2", "model.layers.11.block_sparse_moe.experts.189.w2", "model.layers.11.block_sparse_moe.experts.190.w2", "model.layers.11.block_sparse_moe.experts.191.w2", "model.layers.11.block_sparse_moe.experts.192.w2", "model.layers.11.block_sparse_moe.experts.193.w2", "model.layers.11.block_sparse_moe.experts.194.w2", "model.layers.11.block_sparse_moe.experts.195.w2", "model.layers.11.block_sparse_moe.experts.196.w2", "model.layers.11.block_sparse_moe.experts.197.w2", "model.layers.11.block_sparse_moe.experts.198.w2", "model.layers.11.block_sparse_moe.experts.199.w2", "model.layers.11.block_sparse_moe.experts.200.w2", "model.layers.11.block_sparse_moe.experts.201.w2", "model.layers.11.block_sparse_moe.experts.202.w2", "model.layers.11.block_sparse_moe.experts.203.w2", "model.layers.11.block_sparse_moe.experts.204.w2", "model.layers.11.block_sparse_moe.experts.205.w2", "model.layers.11.block_sparse_moe.experts.206.w2", "model.layers.11.block_sparse_moe.experts.207.w2", "model.layers.11.block_sparse_moe.experts.208.w2", "model.layers.11.block_sparse_moe.experts.209.w2", "model.layers.11.block_sparse_moe.experts.210.w2", "model.layers.11.block_sparse_moe.experts.211.w2", "model.layers.11.block_sparse_moe.experts.212.w2", "model.layers.11.block_sparse_moe.experts.213.w2", "model.layers.11.block_sparse_moe.experts.214.w2", "model.layers.11.block_sparse_moe.experts.215.w2", "model.layers.11.block_sparse_moe.experts.216.w2", "model.layers.11.block_sparse_moe.experts.217.w2", "model.layers.11.block_sparse_moe.experts.218.w2", "model.layers.11.block_sparse_moe.experts.219.w2", "model.layers.11.block_sparse_moe.experts.220.w2", "model.layers.11.block_sparse_moe.experts.221.w2", "model.layers.11.block_sparse_moe.experts.222.w2", "model.layers.11.block_sparse_moe.experts.223.w2", "model.layers.11.block_sparse_moe.experts.224.w2", "model.layers.11.block_sparse_moe.experts.225.w2", "model.layers.11.block_sparse_moe.experts.226.w2", "model.layers.11.block_sparse_moe.experts.227.w2", "model.layers.11.block_sparse_moe.experts.228.w2", "model.layers.11.block_sparse_moe.experts.229.w2", "model.layers.11.block_sparse_moe.experts.230.w2", "model.layers.11.block_sparse_moe.experts.231.w2", "model.layers.11.block_sparse_moe.experts.232.w2", "model.layers.11.block_sparse_moe.experts.233.w2", "model.layers.11.block_sparse_moe.experts.234.w2", "model.layers.11.block_sparse_moe.experts.235.w2", "model.layers.11.block_sparse_moe.experts.236.w2", "model.layers.11.block_sparse_moe.experts.237.w2", "model.layers.11.block_sparse_moe.experts.238.w2", "model.layers.11.block_sparse_moe.experts.239.w2", "model.layers.11.block_sparse_moe.experts.240.w2", "model.layers.11.block_sparse_moe.experts.241.w2", "model.layers.11.block_sparse_moe.experts.242.w2", "model.layers.11.block_sparse_moe.experts.243.w2", "model.layers.11.block_sparse_moe.experts.244.w2", "model.layers.11.block_sparse_moe.experts.245.w2", "model.layers.11.block_sparse_moe.experts.246.w2", "model.layers.11.block_sparse_moe.experts.247.w2", "model.layers.11.block_sparse_moe.experts.248.w2", "model.layers.11.block_sparse_moe.experts.249.w2", "model.layers.11.block_sparse_moe.experts.250.w2", "model.layers.11.block_sparse_moe.experts.251.w2", "model.layers.11.block_sparse_moe.experts.252.w2", "model.layers.11.block_sparse_moe.experts.253.w2", "model.layers.11.block_sparse_moe.experts.254.w2", "model.layers.11.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008339611813426018, "dbits": 3623878656 } ] }, { "idx": 24, "layers": [ "model.layers.12.self_attn.q_proj", "model.layers.12.self_attn.k_proj", "model.layers.12.self_attn.v_proj", "model.layers.12.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0004702983424067497, "dbits": 44040192 } ] }, { "idx": 25, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w1", "model.layers.12.block_sparse_moe.experts.1.w1", "model.layers.12.block_sparse_moe.experts.2.w1", "model.layers.12.block_sparse_moe.experts.3.w1", "model.layers.12.block_sparse_moe.experts.4.w1", "model.layers.12.block_sparse_moe.experts.5.w1", "model.layers.12.block_sparse_moe.experts.6.w1", "model.layers.12.block_sparse_moe.experts.7.w1", "model.layers.12.block_sparse_moe.experts.8.w1", "model.layers.12.block_sparse_moe.experts.9.w1", "model.layers.12.block_sparse_moe.experts.10.w1", "model.layers.12.block_sparse_moe.experts.11.w1", "model.layers.12.block_sparse_moe.experts.12.w1", "model.layers.12.block_sparse_moe.experts.13.w1", "model.layers.12.block_sparse_moe.experts.14.w1", "model.layers.12.block_sparse_moe.experts.15.w1", "model.layers.12.block_sparse_moe.experts.16.w1", "model.layers.12.block_sparse_moe.experts.17.w1", "model.layers.12.block_sparse_moe.experts.18.w1", "model.layers.12.block_sparse_moe.experts.19.w1", "model.layers.12.block_sparse_moe.experts.20.w1", "model.layers.12.block_sparse_moe.experts.21.w1", "model.layers.12.block_sparse_moe.experts.22.w1", "model.layers.12.block_sparse_moe.experts.23.w1", "model.layers.12.block_sparse_moe.experts.24.w1", "model.layers.12.block_sparse_moe.experts.25.w1", "model.layers.12.block_sparse_moe.experts.26.w1", "model.layers.12.block_sparse_moe.experts.27.w1", "model.layers.12.block_sparse_moe.experts.28.w1", "model.layers.12.block_sparse_moe.experts.29.w1", "model.layers.12.block_sparse_moe.experts.30.w1", "model.layers.12.block_sparse_moe.experts.31.w1", "model.layers.12.block_sparse_moe.experts.32.w1", "model.layers.12.block_sparse_moe.experts.33.w1", "model.layers.12.block_sparse_moe.experts.34.w1", "model.layers.12.block_sparse_moe.experts.35.w1", "model.layers.12.block_sparse_moe.experts.36.w1", "model.layers.12.block_sparse_moe.experts.37.w1", "model.layers.12.block_sparse_moe.experts.38.w1", "model.layers.12.block_sparse_moe.experts.39.w1", "model.layers.12.block_sparse_moe.experts.40.w1", "model.layers.12.block_sparse_moe.experts.41.w1", "model.layers.12.block_sparse_moe.experts.42.w1", "model.layers.12.block_sparse_moe.experts.43.w1", "model.layers.12.block_sparse_moe.experts.44.w1", "model.layers.12.block_sparse_moe.experts.45.w1", "model.layers.12.block_sparse_moe.experts.46.w1", "model.layers.12.block_sparse_moe.experts.47.w1", "model.layers.12.block_sparse_moe.experts.48.w1", "model.layers.12.block_sparse_moe.experts.49.w1", "model.layers.12.block_sparse_moe.experts.50.w1", "model.layers.12.block_sparse_moe.experts.51.w1", "model.layers.12.block_sparse_moe.experts.52.w1", "model.layers.12.block_sparse_moe.experts.53.w1", "model.layers.12.block_sparse_moe.experts.54.w1", "model.layers.12.block_sparse_moe.experts.55.w1", "model.layers.12.block_sparse_moe.experts.56.w1", "model.layers.12.block_sparse_moe.experts.57.w1", "model.layers.12.block_sparse_moe.experts.58.w1", "model.layers.12.block_sparse_moe.experts.59.w1", "model.layers.12.block_sparse_moe.experts.60.w1", "model.layers.12.block_sparse_moe.experts.61.w1", "model.layers.12.block_sparse_moe.experts.62.w1", "model.layers.12.block_sparse_moe.experts.63.w1", "model.layers.12.block_sparse_moe.experts.64.w1", "model.layers.12.block_sparse_moe.experts.65.w1", "model.layers.12.block_sparse_moe.experts.66.w1", "model.layers.12.block_sparse_moe.experts.67.w1", "model.layers.12.block_sparse_moe.experts.68.w1", "model.layers.12.block_sparse_moe.experts.69.w1", "model.layers.12.block_sparse_moe.experts.70.w1", "model.layers.12.block_sparse_moe.experts.71.w1", "model.layers.12.block_sparse_moe.experts.72.w1", "model.layers.12.block_sparse_moe.experts.73.w1", "model.layers.12.block_sparse_moe.experts.74.w1", "model.layers.12.block_sparse_moe.experts.75.w1", "model.layers.12.block_sparse_moe.experts.76.w1", "model.layers.12.block_sparse_moe.experts.77.w1", "model.layers.12.block_sparse_moe.experts.78.w1", "model.layers.12.block_sparse_moe.experts.79.w1", "model.layers.12.block_sparse_moe.experts.80.w1", "model.layers.12.block_sparse_moe.experts.81.w1", "model.layers.12.block_sparse_moe.experts.82.w1", "model.layers.12.block_sparse_moe.experts.83.w1", "model.layers.12.block_sparse_moe.experts.84.w1", "model.layers.12.block_sparse_moe.experts.85.w1", "model.layers.12.block_sparse_moe.experts.86.w1", "model.layers.12.block_sparse_moe.experts.87.w1", "model.layers.12.block_sparse_moe.experts.88.w1", "model.layers.12.block_sparse_moe.experts.89.w1", "model.layers.12.block_sparse_moe.experts.90.w1", "model.layers.12.block_sparse_moe.experts.91.w1", "model.layers.12.block_sparse_moe.experts.92.w1", "model.layers.12.block_sparse_moe.experts.93.w1", "model.layers.12.block_sparse_moe.experts.94.w1", "model.layers.12.block_sparse_moe.experts.95.w1", "model.layers.12.block_sparse_moe.experts.96.w1", "model.layers.12.block_sparse_moe.experts.97.w1", "model.layers.12.block_sparse_moe.experts.98.w1", "model.layers.12.block_sparse_moe.experts.99.w1", "model.layers.12.block_sparse_moe.experts.100.w1", "model.layers.12.block_sparse_moe.experts.101.w1", "model.layers.12.block_sparse_moe.experts.102.w1", "model.layers.12.block_sparse_moe.experts.103.w1", "model.layers.12.block_sparse_moe.experts.104.w1", "model.layers.12.block_sparse_moe.experts.105.w1", "model.layers.12.block_sparse_moe.experts.106.w1", "model.layers.12.block_sparse_moe.experts.107.w1", "model.layers.12.block_sparse_moe.experts.108.w1", "model.layers.12.block_sparse_moe.experts.109.w1", "model.layers.12.block_sparse_moe.experts.110.w1", "model.layers.12.block_sparse_moe.experts.111.w1", "model.layers.12.block_sparse_moe.experts.112.w1", "model.layers.12.block_sparse_moe.experts.113.w1", "model.layers.12.block_sparse_moe.experts.114.w1", "model.layers.12.block_sparse_moe.experts.115.w1", "model.layers.12.block_sparse_moe.experts.116.w1", "model.layers.12.block_sparse_moe.experts.117.w1", "model.layers.12.block_sparse_moe.experts.118.w1", "model.layers.12.block_sparse_moe.experts.119.w1", "model.layers.12.block_sparse_moe.experts.120.w1", "model.layers.12.block_sparse_moe.experts.121.w1", "model.layers.12.block_sparse_moe.experts.122.w1", "model.layers.12.block_sparse_moe.experts.123.w1", "model.layers.12.block_sparse_moe.experts.124.w1", "model.layers.12.block_sparse_moe.experts.125.w1", "model.layers.12.block_sparse_moe.experts.126.w1", "model.layers.12.block_sparse_moe.experts.127.w1", "model.layers.12.block_sparse_moe.experts.128.w1", "model.layers.12.block_sparse_moe.experts.129.w1", "model.layers.12.block_sparse_moe.experts.130.w1", "model.layers.12.block_sparse_moe.experts.131.w1", "model.layers.12.block_sparse_moe.experts.132.w1", "model.layers.12.block_sparse_moe.experts.133.w1", "model.layers.12.block_sparse_moe.experts.134.w1", "model.layers.12.block_sparse_moe.experts.135.w1", "model.layers.12.block_sparse_moe.experts.136.w1", "model.layers.12.block_sparse_moe.experts.137.w1", "model.layers.12.block_sparse_moe.experts.138.w1", "model.layers.12.block_sparse_moe.experts.139.w1", "model.layers.12.block_sparse_moe.experts.140.w1", "model.layers.12.block_sparse_moe.experts.141.w1", "model.layers.12.block_sparse_moe.experts.142.w1", "model.layers.12.block_sparse_moe.experts.143.w1", "model.layers.12.block_sparse_moe.experts.144.w1", "model.layers.12.block_sparse_moe.experts.145.w1", "model.layers.12.block_sparse_moe.experts.146.w1", "model.layers.12.block_sparse_moe.experts.147.w1", "model.layers.12.block_sparse_moe.experts.148.w1", "model.layers.12.block_sparse_moe.experts.149.w1", "model.layers.12.block_sparse_moe.experts.150.w1", "model.layers.12.block_sparse_moe.experts.151.w1", "model.layers.12.block_sparse_moe.experts.152.w1", "model.layers.12.block_sparse_moe.experts.153.w1", "model.layers.12.block_sparse_moe.experts.154.w1", "model.layers.12.block_sparse_moe.experts.155.w1", "model.layers.12.block_sparse_moe.experts.156.w1", "model.layers.12.block_sparse_moe.experts.157.w1", "model.layers.12.block_sparse_moe.experts.158.w1", "model.layers.12.block_sparse_moe.experts.159.w1", "model.layers.12.block_sparse_moe.experts.160.w1", "model.layers.12.block_sparse_moe.experts.161.w1", "model.layers.12.block_sparse_moe.experts.162.w1", "model.layers.12.block_sparse_moe.experts.163.w1", "model.layers.12.block_sparse_moe.experts.164.w1", "model.layers.12.block_sparse_moe.experts.165.w1", "model.layers.12.block_sparse_moe.experts.166.w1", "model.layers.12.block_sparse_moe.experts.167.w1", "model.layers.12.block_sparse_moe.experts.168.w1", "model.layers.12.block_sparse_moe.experts.169.w1", "model.layers.12.block_sparse_moe.experts.170.w1", "model.layers.12.block_sparse_moe.experts.171.w1", "model.layers.12.block_sparse_moe.experts.172.w1", "model.layers.12.block_sparse_moe.experts.173.w1", "model.layers.12.block_sparse_moe.experts.174.w1", "model.layers.12.block_sparse_moe.experts.175.w1", "model.layers.12.block_sparse_moe.experts.176.w1", "model.layers.12.block_sparse_moe.experts.177.w1", "model.layers.12.block_sparse_moe.experts.178.w1", "model.layers.12.block_sparse_moe.experts.179.w1", "model.layers.12.block_sparse_moe.experts.180.w1", "model.layers.12.block_sparse_moe.experts.181.w1", "model.layers.12.block_sparse_moe.experts.182.w1", "model.layers.12.block_sparse_moe.experts.183.w1", "model.layers.12.block_sparse_moe.experts.184.w1", "model.layers.12.block_sparse_moe.experts.185.w1", "model.layers.12.block_sparse_moe.experts.186.w1", "model.layers.12.block_sparse_moe.experts.187.w1", "model.layers.12.block_sparse_moe.experts.188.w1", "model.layers.12.block_sparse_moe.experts.189.w1", "model.layers.12.block_sparse_moe.experts.190.w1", "model.layers.12.block_sparse_moe.experts.191.w1", "model.layers.12.block_sparse_moe.experts.192.w1", "model.layers.12.block_sparse_moe.experts.193.w1", "model.layers.12.block_sparse_moe.experts.194.w1", "model.layers.12.block_sparse_moe.experts.195.w1", "model.layers.12.block_sparse_moe.experts.196.w1", "model.layers.12.block_sparse_moe.experts.197.w1", "model.layers.12.block_sparse_moe.experts.198.w1", "model.layers.12.block_sparse_moe.experts.199.w1", "model.layers.12.block_sparse_moe.experts.200.w1", "model.layers.12.block_sparse_moe.experts.201.w1", "model.layers.12.block_sparse_moe.experts.202.w1", "model.layers.12.block_sparse_moe.experts.203.w1", "model.layers.12.block_sparse_moe.experts.204.w1", "model.layers.12.block_sparse_moe.experts.205.w1", "model.layers.12.block_sparse_moe.experts.206.w1", "model.layers.12.block_sparse_moe.experts.207.w1", "model.layers.12.block_sparse_moe.experts.208.w1", "model.layers.12.block_sparse_moe.experts.209.w1", "model.layers.12.block_sparse_moe.experts.210.w1", "model.layers.12.block_sparse_moe.experts.211.w1", "model.layers.12.block_sparse_moe.experts.212.w1", "model.layers.12.block_sparse_moe.experts.213.w1", "model.layers.12.block_sparse_moe.experts.214.w1", "model.layers.12.block_sparse_moe.experts.215.w1", "model.layers.12.block_sparse_moe.experts.216.w1", "model.layers.12.block_sparse_moe.experts.217.w1", "model.layers.12.block_sparse_moe.experts.218.w1", "model.layers.12.block_sparse_moe.experts.219.w1", "model.layers.12.block_sparse_moe.experts.220.w1", "model.layers.12.block_sparse_moe.experts.221.w1", "model.layers.12.block_sparse_moe.experts.222.w1", "model.layers.12.block_sparse_moe.experts.223.w1", "model.layers.12.block_sparse_moe.experts.224.w1", "model.layers.12.block_sparse_moe.experts.225.w1", "model.layers.12.block_sparse_moe.experts.226.w1", "model.layers.12.block_sparse_moe.experts.227.w1", "model.layers.12.block_sparse_moe.experts.228.w1", "model.layers.12.block_sparse_moe.experts.229.w1", "model.layers.12.block_sparse_moe.experts.230.w1", "model.layers.12.block_sparse_moe.experts.231.w1", "model.layers.12.block_sparse_moe.experts.232.w1", "model.layers.12.block_sparse_moe.experts.233.w1", "model.layers.12.block_sparse_moe.experts.234.w1", "model.layers.12.block_sparse_moe.experts.235.w1", "model.layers.12.block_sparse_moe.experts.236.w1", "model.layers.12.block_sparse_moe.experts.237.w1", "model.layers.12.block_sparse_moe.experts.238.w1", "model.layers.12.block_sparse_moe.experts.239.w1", "model.layers.12.block_sparse_moe.experts.240.w1", "model.layers.12.block_sparse_moe.experts.241.w1", "model.layers.12.block_sparse_moe.experts.242.w1", "model.layers.12.block_sparse_moe.experts.243.w1", "model.layers.12.block_sparse_moe.experts.244.w1", "model.layers.12.block_sparse_moe.experts.245.w1", "model.layers.12.block_sparse_moe.experts.246.w1", "model.layers.12.block_sparse_moe.experts.247.w1", "model.layers.12.block_sparse_moe.experts.248.w1", "model.layers.12.block_sparse_moe.experts.249.w1", "model.layers.12.block_sparse_moe.experts.250.w1", "model.layers.12.block_sparse_moe.experts.251.w1", "model.layers.12.block_sparse_moe.experts.252.w1", "model.layers.12.block_sparse_moe.experts.253.w1", "model.layers.12.block_sparse_moe.experts.254.w1", "model.layers.12.block_sparse_moe.experts.255.w1", "model.layers.12.block_sparse_moe.experts.0.w3", "model.layers.12.block_sparse_moe.experts.1.w3", "model.layers.12.block_sparse_moe.experts.2.w3", "model.layers.12.block_sparse_moe.experts.3.w3", "model.layers.12.block_sparse_moe.experts.4.w3", "model.layers.12.block_sparse_moe.experts.5.w3", "model.layers.12.block_sparse_moe.experts.6.w3", "model.layers.12.block_sparse_moe.experts.7.w3", "model.layers.12.block_sparse_moe.experts.8.w3", "model.layers.12.block_sparse_moe.experts.9.w3", "model.layers.12.block_sparse_moe.experts.10.w3", "model.layers.12.block_sparse_moe.experts.11.w3", "model.layers.12.block_sparse_moe.experts.12.w3", "model.layers.12.block_sparse_moe.experts.13.w3", "model.layers.12.block_sparse_moe.experts.14.w3", "model.layers.12.block_sparse_moe.experts.15.w3", "model.layers.12.block_sparse_moe.experts.16.w3", "model.layers.12.block_sparse_moe.experts.17.w3", "model.layers.12.block_sparse_moe.experts.18.w3", "model.layers.12.block_sparse_moe.experts.19.w3", "model.layers.12.block_sparse_moe.experts.20.w3", "model.layers.12.block_sparse_moe.experts.21.w3", "model.layers.12.block_sparse_moe.experts.22.w3", "model.layers.12.block_sparse_moe.experts.23.w3", "model.layers.12.block_sparse_moe.experts.24.w3", "model.layers.12.block_sparse_moe.experts.25.w3", "model.layers.12.block_sparse_moe.experts.26.w3", "model.layers.12.block_sparse_moe.experts.27.w3", "model.layers.12.block_sparse_moe.experts.28.w3", "model.layers.12.block_sparse_moe.experts.29.w3", "model.layers.12.block_sparse_moe.experts.30.w3", "model.layers.12.block_sparse_moe.experts.31.w3", "model.layers.12.block_sparse_moe.experts.32.w3", "model.layers.12.block_sparse_moe.experts.33.w3", "model.layers.12.block_sparse_moe.experts.34.w3", "model.layers.12.block_sparse_moe.experts.35.w3", "model.layers.12.block_sparse_moe.experts.36.w3", "model.layers.12.block_sparse_moe.experts.37.w3", "model.layers.12.block_sparse_moe.experts.38.w3", "model.layers.12.block_sparse_moe.experts.39.w3", "model.layers.12.block_sparse_moe.experts.40.w3", "model.layers.12.block_sparse_moe.experts.41.w3", "model.layers.12.block_sparse_moe.experts.42.w3", "model.layers.12.block_sparse_moe.experts.43.w3", "model.layers.12.block_sparse_moe.experts.44.w3", "model.layers.12.block_sparse_moe.experts.45.w3", "model.layers.12.block_sparse_moe.experts.46.w3", "model.layers.12.block_sparse_moe.experts.47.w3", "model.layers.12.block_sparse_moe.experts.48.w3", "model.layers.12.block_sparse_moe.experts.49.w3", "model.layers.12.block_sparse_moe.experts.50.w3", "model.layers.12.block_sparse_moe.experts.51.w3", "model.layers.12.block_sparse_moe.experts.52.w3", "model.layers.12.block_sparse_moe.experts.53.w3", "model.layers.12.block_sparse_moe.experts.54.w3", "model.layers.12.block_sparse_moe.experts.55.w3", "model.layers.12.block_sparse_moe.experts.56.w3", "model.layers.12.block_sparse_moe.experts.57.w3", "model.layers.12.block_sparse_moe.experts.58.w3", "model.layers.12.block_sparse_moe.experts.59.w3", "model.layers.12.block_sparse_moe.experts.60.w3", "model.layers.12.block_sparse_moe.experts.61.w3", "model.layers.12.block_sparse_moe.experts.62.w3", "model.layers.12.block_sparse_moe.experts.63.w3", "model.layers.12.block_sparse_moe.experts.64.w3", "model.layers.12.block_sparse_moe.experts.65.w3", "model.layers.12.block_sparse_moe.experts.66.w3", "model.layers.12.block_sparse_moe.experts.67.w3", "model.layers.12.block_sparse_moe.experts.68.w3", "model.layers.12.block_sparse_moe.experts.69.w3", "model.layers.12.block_sparse_moe.experts.70.w3", "model.layers.12.block_sparse_moe.experts.71.w3", "model.layers.12.block_sparse_moe.experts.72.w3", "model.layers.12.block_sparse_moe.experts.73.w3", "model.layers.12.block_sparse_moe.experts.74.w3", "model.layers.12.block_sparse_moe.experts.75.w3", "model.layers.12.block_sparse_moe.experts.76.w3", "model.layers.12.block_sparse_moe.experts.77.w3", "model.layers.12.block_sparse_moe.experts.78.w3", "model.layers.12.block_sparse_moe.experts.79.w3", "model.layers.12.block_sparse_moe.experts.80.w3", "model.layers.12.block_sparse_moe.experts.81.w3", "model.layers.12.block_sparse_moe.experts.82.w3", "model.layers.12.block_sparse_moe.experts.83.w3", "model.layers.12.block_sparse_moe.experts.84.w3", "model.layers.12.block_sparse_moe.experts.85.w3", "model.layers.12.block_sparse_moe.experts.86.w3", "model.layers.12.block_sparse_moe.experts.87.w3", "model.layers.12.block_sparse_moe.experts.88.w3", "model.layers.12.block_sparse_moe.experts.89.w3", "model.layers.12.block_sparse_moe.experts.90.w3", "model.layers.12.block_sparse_moe.experts.91.w3", "model.layers.12.block_sparse_moe.experts.92.w3", "model.layers.12.block_sparse_moe.experts.93.w3", "model.layers.12.block_sparse_moe.experts.94.w3", "model.layers.12.block_sparse_moe.experts.95.w3", "model.layers.12.block_sparse_moe.experts.96.w3", "model.layers.12.block_sparse_moe.experts.97.w3", "model.layers.12.block_sparse_moe.experts.98.w3", "model.layers.12.block_sparse_moe.experts.99.w3", "model.layers.12.block_sparse_moe.experts.100.w3", "model.layers.12.block_sparse_moe.experts.101.w3", "model.layers.12.block_sparse_moe.experts.102.w3", "model.layers.12.block_sparse_moe.experts.103.w3", "model.layers.12.block_sparse_moe.experts.104.w3", "model.layers.12.block_sparse_moe.experts.105.w3", "model.layers.12.block_sparse_moe.experts.106.w3", "model.layers.12.block_sparse_moe.experts.107.w3", "model.layers.12.block_sparse_moe.experts.108.w3", "model.layers.12.block_sparse_moe.experts.109.w3", "model.layers.12.block_sparse_moe.experts.110.w3", "model.layers.12.block_sparse_moe.experts.111.w3", "model.layers.12.block_sparse_moe.experts.112.w3", "model.layers.12.block_sparse_moe.experts.113.w3", "model.layers.12.block_sparse_moe.experts.114.w3", "model.layers.12.block_sparse_moe.experts.115.w3", "model.layers.12.block_sparse_moe.experts.116.w3", "model.layers.12.block_sparse_moe.experts.117.w3", "model.layers.12.block_sparse_moe.experts.118.w3", "model.layers.12.block_sparse_moe.experts.119.w3", "model.layers.12.block_sparse_moe.experts.120.w3", "model.layers.12.block_sparse_moe.experts.121.w3", "model.layers.12.block_sparse_moe.experts.122.w3", "model.layers.12.block_sparse_moe.experts.123.w3", "model.layers.12.block_sparse_moe.experts.124.w3", "model.layers.12.block_sparse_moe.experts.125.w3", "model.layers.12.block_sparse_moe.experts.126.w3", "model.layers.12.block_sparse_moe.experts.127.w3", "model.layers.12.block_sparse_moe.experts.128.w3", "model.layers.12.block_sparse_moe.experts.129.w3", "model.layers.12.block_sparse_moe.experts.130.w3", "model.layers.12.block_sparse_moe.experts.131.w3", "model.layers.12.block_sparse_moe.experts.132.w3", "model.layers.12.block_sparse_moe.experts.133.w3", "model.layers.12.block_sparse_moe.experts.134.w3", "model.layers.12.block_sparse_moe.experts.135.w3", "model.layers.12.block_sparse_moe.experts.136.w3", "model.layers.12.block_sparse_moe.experts.137.w3", "model.layers.12.block_sparse_moe.experts.138.w3", "model.layers.12.block_sparse_moe.experts.139.w3", "model.layers.12.block_sparse_moe.experts.140.w3", "model.layers.12.block_sparse_moe.experts.141.w3", "model.layers.12.block_sparse_moe.experts.142.w3", "model.layers.12.block_sparse_moe.experts.143.w3", "model.layers.12.block_sparse_moe.experts.144.w3", "model.layers.12.block_sparse_moe.experts.145.w3", "model.layers.12.block_sparse_moe.experts.146.w3", "model.layers.12.block_sparse_moe.experts.147.w3", "model.layers.12.block_sparse_moe.experts.148.w3", "model.layers.12.block_sparse_moe.experts.149.w3", "model.layers.12.block_sparse_moe.experts.150.w3", "model.layers.12.block_sparse_moe.experts.151.w3", "model.layers.12.block_sparse_moe.experts.152.w3", "model.layers.12.block_sparse_moe.experts.153.w3", "model.layers.12.block_sparse_moe.experts.154.w3", "model.layers.12.block_sparse_moe.experts.155.w3", "model.layers.12.block_sparse_moe.experts.156.w3", "model.layers.12.block_sparse_moe.experts.157.w3", "model.layers.12.block_sparse_moe.experts.158.w3", "model.layers.12.block_sparse_moe.experts.159.w3", "model.layers.12.block_sparse_moe.experts.160.w3", "model.layers.12.block_sparse_moe.experts.161.w3", "model.layers.12.block_sparse_moe.experts.162.w3", "model.layers.12.block_sparse_moe.experts.163.w3", "model.layers.12.block_sparse_moe.experts.164.w3", "model.layers.12.block_sparse_moe.experts.165.w3", "model.layers.12.block_sparse_moe.experts.166.w3", "model.layers.12.block_sparse_moe.experts.167.w3", "model.layers.12.block_sparse_moe.experts.168.w3", "model.layers.12.block_sparse_moe.experts.169.w3", "model.layers.12.block_sparse_moe.experts.170.w3", "model.layers.12.block_sparse_moe.experts.171.w3", "model.layers.12.block_sparse_moe.experts.172.w3", "model.layers.12.block_sparse_moe.experts.173.w3", "model.layers.12.block_sparse_moe.experts.174.w3", "model.layers.12.block_sparse_moe.experts.175.w3", "model.layers.12.block_sparse_moe.experts.176.w3", "model.layers.12.block_sparse_moe.experts.177.w3", "model.layers.12.block_sparse_moe.experts.178.w3", "model.layers.12.block_sparse_moe.experts.179.w3", "model.layers.12.block_sparse_moe.experts.180.w3", "model.layers.12.block_sparse_moe.experts.181.w3", "model.layers.12.block_sparse_moe.experts.182.w3", "model.layers.12.block_sparse_moe.experts.183.w3", "model.layers.12.block_sparse_moe.experts.184.w3", "model.layers.12.block_sparse_moe.experts.185.w3", "model.layers.12.block_sparse_moe.experts.186.w3", "model.layers.12.block_sparse_moe.experts.187.w3", "model.layers.12.block_sparse_moe.experts.188.w3", "model.layers.12.block_sparse_moe.experts.189.w3", "model.layers.12.block_sparse_moe.experts.190.w3", "model.layers.12.block_sparse_moe.experts.191.w3", "model.layers.12.block_sparse_moe.experts.192.w3", "model.layers.12.block_sparse_moe.experts.193.w3", "model.layers.12.block_sparse_moe.experts.194.w3", "model.layers.12.block_sparse_moe.experts.195.w3", "model.layers.12.block_sparse_moe.experts.196.w3", "model.layers.12.block_sparse_moe.experts.197.w3", "model.layers.12.block_sparse_moe.experts.198.w3", "model.layers.12.block_sparse_moe.experts.199.w3", "model.layers.12.block_sparse_moe.experts.200.w3", "model.layers.12.block_sparse_moe.experts.201.w3", "model.layers.12.block_sparse_moe.experts.202.w3", "model.layers.12.block_sparse_moe.experts.203.w3", "model.layers.12.block_sparse_moe.experts.204.w3", "model.layers.12.block_sparse_moe.experts.205.w3", "model.layers.12.block_sparse_moe.experts.206.w3", "model.layers.12.block_sparse_moe.experts.207.w3", "model.layers.12.block_sparse_moe.experts.208.w3", "model.layers.12.block_sparse_moe.experts.209.w3", "model.layers.12.block_sparse_moe.experts.210.w3", "model.layers.12.block_sparse_moe.experts.211.w3", "model.layers.12.block_sparse_moe.experts.212.w3", "model.layers.12.block_sparse_moe.experts.213.w3", "model.layers.12.block_sparse_moe.experts.214.w3", "model.layers.12.block_sparse_moe.experts.215.w3", "model.layers.12.block_sparse_moe.experts.216.w3", "model.layers.12.block_sparse_moe.experts.217.w3", "model.layers.12.block_sparse_moe.experts.218.w3", "model.layers.12.block_sparse_moe.experts.219.w3", "model.layers.12.block_sparse_moe.experts.220.w3", "model.layers.12.block_sparse_moe.experts.221.w3", "model.layers.12.block_sparse_moe.experts.222.w3", "model.layers.12.block_sparse_moe.experts.223.w3", "model.layers.12.block_sparse_moe.experts.224.w3", "model.layers.12.block_sparse_moe.experts.225.w3", "model.layers.12.block_sparse_moe.experts.226.w3", "model.layers.12.block_sparse_moe.experts.227.w3", "model.layers.12.block_sparse_moe.experts.228.w3", "model.layers.12.block_sparse_moe.experts.229.w3", "model.layers.12.block_sparse_moe.experts.230.w3", "model.layers.12.block_sparse_moe.experts.231.w3", "model.layers.12.block_sparse_moe.experts.232.w3", "model.layers.12.block_sparse_moe.experts.233.w3", "model.layers.12.block_sparse_moe.experts.234.w3", "model.layers.12.block_sparse_moe.experts.235.w3", "model.layers.12.block_sparse_moe.experts.236.w3", "model.layers.12.block_sparse_moe.experts.237.w3", "model.layers.12.block_sparse_moe.experts.238.w3", "model.layers.12.block_sparse_moe.experts.239.w3", "model.layers.12.block_sparse_moe.experts.240.w3", "model.layers.12.block_sparse_moe.experts.241.w3", "model.layers.12.block_sparse_moe.experts.242.w3", "model.layers.12.block_sparse_moe.experts.243.w3", "model.layers.12.block_sparse_moe.experts.244.w3", "model.layers.12.block_sparse_moe.experts.245.w3", "model.layers.12.block_sparse_moe.experts.246.w3", "model.layers.12.block_sparse_moe.experts.247.w3", "model.layers.12.block_sparse_moe.experts.248.w3", "model.layers.12.block_sparse_moe.experts.249.w3", "model.layers.12.block_sparse_moe.experts.250.w3", "model.layers.12.block_sparse_moe.experts.251.w3", "model.layers.12.block_sparse_moe.experts.252.w3", "model.layers.12.block_sparse_moe.experts.253.w3", "model.layers.12.block_sparse_moe.experts.254.w3", "model.layers.12.block_sparse_moe.experts.255.w3", "model.layers.12.block_sparse_moe.experts.0.w2", "model.layers.12.block_sparse_moe.experts.1.w2", "model.layers.12.block_sparse_moe.experts.2.w2", "model.layers.12.block_sparse_moe.experts.3.w2", "model.layers.12.block_sparse_moe.experts.4.w2", "model.layers.12.block_sparse_moe.experts.5.w2", "model.layers.12.block_sparse_moe.experts.6.w2", "model.layers.12.block_sparse_moe.experts.7.w2", "model.layers.12.block_sparse_moe.experts.8.w2", "model.layers.12.block_sparse_moe.experts.9.w2", "model.layers.12.block_sparse_moe.experts.10.w2", "model.layers.12.block_sparse_moe.experts.11.w2", "model.layers.12.block_sparse_moe.experts.12.w2", "model.layers.12.block_sparse_moe.experts.13.w2", "model.layers.12.block_sparse_moe.experts.14.w2", "model.layers.12.block_sparse_moe.experts.15.w2", "model.layers.12.block_sparse_moe.experts.16.w2", "model.layers.12.block_sparse_moe.experts.17.w2", "model.layers.12.block_sparse_moe.experts.18.w2", "model.layers.12.block_sparse_moe.experts.19.w2", "model.layers.12.block_sparse_moe.experts.20.w2", "model.layers.12.block_sparse_moe.experts.21.w2", "model.layers.12.block_sparse_moe.experts.22.w2", "model.layers.12.block_sparse_moe.experts.23.w2", "model.layers.12.block_sparse_moe.experts.24.w2", "model.layers.12.block_sparse_moe.experts.25.w2", "model.layers.12.block_sparse_moe.experts.26.w2", "model.layers.12.block_sparse_moe.experts.27.w2", "model.layers.12.block_sparse_moe.experts.28.w2", "model.layers.12.block_sparse_moe.experts.29.w2", "model.layers.12.block_sparse_moe.experts.30.w2", "model.layers.12.block_sparse_moe.experts.31.w2", "model.layers.12.block_sparse_moe.experts.32.w2", "model.layers.12.block_sparse_moe.experts.33.w2", "model.layers.12.block_sparse_moe.experts.34.w2", "model.layers.12.block_sparse_moe.experts.35.w2", "model.layers.12.block_sparse_moe.experts.36.w2", "model.layers.12.block_sparse_moe.experts.37.w2", "model.layers.12.block_sparse_moe.experts.38.w2", "model.layers.12.block_sparse_moe.experts.39.w2", "model.layers.12.block_sparse_moe.experts.40.w2", "model.layers.12.block_sparse_moe.experts.41.w2", "model.layers.12.block_sparse_moe.experts.42.w2", "model.layers.12.block_sparse_moe.experts.43.w2", "model.layers.12.block_sparse_moe.experts.44.w2", "model.layers.12.block_sparse_moe.experts.45.w2", "model.layers.12.block_sparse_moe.experts.46.w2", "model.layers.12.block_sparse_moe.experts.47.w2", "model.layers.12.block_sparse_moe.experts.48.w2", "model.layers.12.block_sparse_moe.experts.49.w2", "model.layers.12.block_sparse_moe.experts.50.w2", "model.layers.12.block_sparse_moe.experts.51.w2", "model.layers.12.block_sparse_moe.experts.52.w2", "model.layers.12.block_sparse_moe.experts.53.w2", "model.layers.12.block_sparse_moe.experts.54.w2", "model.layers.12.block_sparse_moe.experts.55.w2", "model.layers.12.block_sparse_moe.experts.56.w2", "model.layers.12.block_sparse_moe.experts.57.w2", "model.layers.12.block_sparse_moe.experts.58.w2", "model.layers.12.block_sparse_moe.experts.59.w2", "model.layers.12.block_sparse_moe.experts.60.w2", "model.layers.12.block_sparse_moe.experts.61.w2", "model.layers.12.block_sparse_moe.experts.62.w2", "model.layers.12.block_sparse_moe.experts.63.w2", "model.layers.12.block_sparse_moe.experts.64.w2", "model.layers.12.block_sparse_moe.experts.65.w2", "model.layers.12.block_sparse_moe.experts.66.w2", "model.layers.12.block_sparse_moe.experts.67.w2", "model.layers.12.block_sparse_moe.experts.68.w2", "model.layers.12.block_sparse_moe.experts.69.w2", "model.layers.12.block_sparse_moe.experts.70.w2", "model.layers.12.block_sparse_moe.experts.71.w2", "model.layers.12.block_sparse_moe.experts.72.w2", "model.layers.12.block_sparse_moe.experts.73.w2", "model.layers.12.block_sparse_moe.experts.74.w2", "model.layers.12.block_sparse_moe.experts.75.w2", "model.layers.12.block_sparse_moe.experts.76.w2", "model.layers.12.block_sparse_moe.experts.77.w2", "model.layers.12.block_sparse_moe.experts.78.w2", "model.layers.12.block_sparse_moe.experts.79.w2", "model.layers.12.block_sparse_moe.experts.80.w2", "model.layers.12.block_sparse_moe.experts.81.w2", "model.layers.12.block_sparse_moe.experts.82.w2", "model.layers.12.block_sparse_moe.experts.83.w2", "model.layers.12.block_sparse_moe.experts.84.w2", "model.layers.12.block_sparse_moe.experts.85.w2", "model.layers.12.block_sparse_moe.experts.86.w2", "model.layers.12.block_sparse_moe.experts.87.w2", "model.layers.12.block_sparse_moe.experts.88.w2", "model.layers.12.block_sparse_moe.experts.89.w2", "model.layers.12.block_sparse_moe.experts.90.w2", "model.layers.12.block_sparse_moe.experts.91.w2", "model.layers.12.block_sparse_moe.experts.92.w2", "model.layers.12.block_sparse_moe.experts.93.w2", "model.layers.12.block_sparse_moe.experts.94.w2", "model.layers.12.block_sparse_moe.experts.95.w2", "model.layers.12.block_sparse_moe.experts.96.w2", "model.layers.12.block_sparse_moe.experts.97.w2", "model.layers.12.block_sparse_moe.experts.98.w2", "model.layers.12.block_sparse_moe.experts.99.w2", "model.layers.12.block_sparse_moe.experts.100.w2", "model.layers.12.block_sparse_moe.experts.101.w2", "model.layers.12.block_sparse_moe.experts.102.w2", "model.layers.12.block_sparse_moe.experts.103.w2", "model.layers.12.block_sparse_moe.experts.104.w2", "model.layers.12.block_sparse_moe.experts.105.w2", "model.layers.12.block_sparse_moe.experts.106.w2", "model.layers.12.block_sparse_moe.experts.107.w2", "model.layers.12.block_sparse_moe.experts.108.w2", "model.layers.12.block_sparse_moe.experts.109.w2", "model.layers.12.block_sparse_moe.experts.110.w2", "model.layers.12.block_sparse_moe.experts.111.w2", "model.layers.12.block_sparse_moe.experts.112.w2", "model.layers.12.block_sparse_moe.experts.113.w2", "model.layers.12.block_sparse_moe.experts.114.w2", "model.layers.12.block_sparse_moe.experts.115.w2", "model.layers.12.block_sparse_moe.experts.116.w2", "model.layers.12.block_sparse_moe.experts.117.w2", "model.layers.12.block_sparse_moe.experts.118.w2", "model.layers.12.block_sparse_moe.experts.119.w2", "model.layers.12.block_sparse_moe.experts.120.w2", "model.layers.12.block_sparse_moe.experts.121.w2", "model.layers.12.block_sparse_moe.experts.122.w2", "model.layers.12.block_sparse_moe.experts.123.w2", "model.layers.12.block_sparse_moe.experts.124.w2", "model.layers.12.block_sparse_moe.experts.125.w2", "model.layers.12.block_sparse_moe.experts.126.w2", "model.layers.12.block_sparse_moe.experts.127.w2", "model.layers.12.block_sparse_moe.experts.128.w2", "model.layers.12.block_sparse_moe.experts.129.w2", "model.layers.12.block_sparse_moe.experts.130.w2", "model.layers.12.block_sparse_moe.experts.131.w2", "model.layers.12.block_sparse_moe.experts.132.w2", "model.layers.12.block_sparse_moe.experts.133.w2", "model.layers.12.block_sparse_moe.experts.134.w2", "model.layers.12.block_sparse_moe.experts.135.w2", "model.layers.12.block_sparse_moe.experts.136.w2", "model.layers.12.block_sparse_moe.experts.137.w2", "model.layers.12.block_sparse_moe.experts.138.w2", "model.layers.12.block_sparse_moe.experts.139.w2", "model.layers.12.block_sparse_moe.experts.140.w2", "model.layers.12.block_sparse_moe.experts.141.w2", "model.layers.12.block_sparse_moe.experts.142.w2", "model.layers.12.block_sparse_moe.experts.143.w2", "model.layers.12.block_sparse_moe.experts.144.w2", "model.layers.12.block_sparse_moe.experts.145.w2", "model.layers.12.block_sparse_moe.experts.146.w2", "model.layers.12.block_sparse_moe.experts.147.w2", "model.layers.12.block_sparse_moe.experts.148.w2", "model.layers.12.block_sparse_moe.experts.149.w2", "model.layers.12.block_sparse_moe.experts.150.w2", "model.layers.12.block_sparse_moe.experts.151.w2", "model.layers.12.block_sparse_moe.experts.152.w2", "model.layers.12.block_sparse_moe.experts.153.w2", "model.layers.12.block_sparse_moe.experts.154.w2", "model.layers.12.block_sparse_moe.experts.155.w2", "model.layers.12.block_sparse_moe.experts.156.w2", "model.layers.12.block_sparse_moe.experts.157.w2", "model.layers.12.block_sparse_moe.experts.158.w2", "model.layers.12.block_sparse_moe.experts.159.w2", "model.layers.12.block_sparse_moe.experts.160.w2", "model.layers.12.block_sparse_moe.experts.161.w2", "model.layers.12.block_sparse_moe.experts.162.w2", "model.layers.12.block_sparse_moe.experts.163.w2", "model.layers.12.block_sparse_moe.experts.164.w2", "model.layers.12.block_sparse_moe.experts.165.w2", "model.layers.12.block_sparse_moe.experts.166.w2", "model.layers.12.block_sparse_moe.experts.167.w2", "model.layers.12.block_sparse_moe.experts.168.w2", "model.layers.12.block_sparse_moe.experts.169.w2", "model.layers.12.block_sparse_moe.experts.170.w2", "model.layers.12.block_sparse_moe.experts.171.w2", "model.layers.12.block_sparse_moe.experts.172.w2", "model.layers.12.block_sparse_moe.experts.173.w2", "model.layers.12.block_sparse_moe.experts.174.w2", "model.layers.12.block_sparse_moe.experts.175.w2", "model.layers.12.block_sparse_moe.experts.176.w2", "model.layers.12.block_sparse_moe.experts.177.w2", "model.layers.12.block_sparse_moe.experts.178.w2", "model.layers.12.block_sparse_moe.experts.179.w2", "model.layers.12.block_sparse_moe.experts.180.w2", "model.layers.12.block_sparse_moe.experts.181.w2", "model.layers.12.block_sparse_moe.experts.182.w2", "model.layers.12.block_sparse_moe.experts.183.w2", "model.layers.12.block_sparse_moe.experts.184.w2", "model.layers.12.block_sparse_moe.experts.185.w2", "model.layers.12.block_sparse_moe.experts.186.w2", "model.layers.12.block_sparse_moe.experts.187.w2", "model.layers.12.block_sparse_moe.experts.188.w2", "model.layers.12.block_sparse_moe.experts.189.w2", "model.layers.12.block_sparse_moe.experts.190.w2", "model.layers.12.block_sparse_moe.experts.191.w2", "model.layers.12.block_sparse_moe.experts.192.w2", "model.layers.12.block_sparse_moe.experts.193.w2", "model.layers.12.block_sparse_moe.experts.194.w2", "model.layers.12.block_sparse_moe.experts.195.w2", "model.layers.12.block_sparse_moe.experts.196.w2", "model.layers.12.block_sparse_moe.experts.197.w2", "model.layers.12.block_sparse_moe.experts.198.w2", "model.layers.12.block_sparse_moe.experts.199.w2", "model.layers.12.block_sparse_moe.experts.200.w2", "model.layers.12.block_sparse_moe.experts.201.w2", "model.layers.12.block_sparse_moe.experts.202.w2", "model.layers.12.block_sparse_moe.experts.203.w2", "model.layers.12.block_sparse_moe.experts.204.w2", "model.layers.12.block_sparse_moe.experts.205.w2", "model.layers.12.block_sparse_moe.experts.206.w2", "model.layers.12.block_sparse_moe.experts.207.w2", "model.layers.12.block_sparse_moe.experts.208.w2", "model.layers.12.block_sparse_moe.experts.209.w2", "model.layers.12.block_sparse_moe.experts.210.w2", "model.layers.12.block_sparse_moe.experts.211.w2", "model.layers.12.block_sparse_moe.experts.212.w2", "model.layers.12.block_sparse_moe.experts.213.w2", "model.layers.12.block_sparse_moe.experts.214.w2", "model.layers.12.block_sparse_moe.experts.215.w2", "model.layers.12.block_sparse_moe.experts.216.w2", "model.layers.12.block_sparse_moe.experts.217.w2", "model.layers.12.block_sparse_moe.experts.218.w2", "model.layers.12.block_sparse_moe.experts.219.w2", "model.layers.12.block_sparse_moe.experts.220.w2", "model.layers.12.block_sparse_moe.experts.221.w2", "model.layers.12.block_sparse_moe.experts.222.w2", "model.layers.12.block_sparse_moe.experts.223.w2", "model.layers.12.block_sparse_moe.experts.224.w2", "model.layers.12.block_sparse_moe.experts.225.w2", "model.layers.12.block_sparse_moe.experts.226.w2", "model.layers.12.block_sparse_moe.experts.227.w2", "model.layers.12.block_sparse_moe.experts.228.w2", "model.layers.12.block_sparse_moe.experts.229.w2", "model.layers.12.block_sparse_moe.experts.230.w2", "model.layers.12.block_sparse_moe.experts.231.w2", "model.layers.12.block_sparse_moe.experts.232.w2", "model.layers.12.block_sparse_moe.experts.233.w2", "model.layers.12.block_sparse_moe.experts.234.w2", "model.layers.12.block_sparse_moe.experts.235.w2", "model.layers.12.block_sparse_moe.experts.236.w2", "model.layers.12.block_sparse_moe.experts.237.w2", "model.layers.12.block_sparse_moe.experts.238.w2", "model.layers.12.block_sparse_moe.experts.239.w2", "model.layers.12.block_sparse_moe.experts.240.w2", "model.layers.12.block_sparse_moe.experts.241.w2", "model.layers.12.block_sparse_moe.experts.242.w2", "model.layers.12.block_sparse_moe.experts.243.w2", "model.layers.12.block_sparse_moe.experts.244.w2", "model.layers.12.block_sparse_moe.experts.245.w2", "model.layers.12.block_sparse_moe.experts.246.w2", "model.layers.12.block_sparse_moe.experts.247.w2", "model.layers.12.block_sparse_moe.experts.248.w2", "model.layers.12.block_sparse_moe.experts.249.w2", "model.layers.12.block_sparse_moe.experts.250.w2", "model.layers.12.block_sparse_moe.experts.251.w2", "model.layers.12.block_sparse_moe.experts.252.w2", "model.layers.12.block_sparse_moe.experts.253.w2", "model.layers.12.block_sparse_moe.experts.254.w2", "model.layers.12.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008408358320593501, "dbits": 3623878656 } ] }, { "idx": 26, "layers": [ "model.layers.13.self_attn.q_proj", "model.layers.13.self_attn.k_proj", "model.layers.13.self_attn.v_proj", "model.layers.13.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0013393597677349711, "dbits": 44040192 } ] }, { "idx": 27, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w1", "model.layers.13.block_sparse_moe.experts.1.w1", "model.layers.13.block_sparse_moe.experts.2.w1", "model.layers.13.block_sparse_moe.experts.3.w1", "model.layers.13.block_sparse_moe.experts.4.w1", "model.layers.13.block_sparse_moe.experts.5.w1", "model.layers.13.block_sparse_moe.experts.6.w1", "model.layers.13.block_sparse_moe.experts.7.w1", "model.layers.13.block_sparse_moe.experts.8.w1", "model.layers.13.block_sparse_moe.experts.9.w1", "model.layers.13.block_sparse_moe.experts.10.w1", "model.layers.13.block_sparse_moe.experts.11.w1", "model.layers.13.block_sparse_moe.experts.12.w1", "model.layers.13.block_sparse_moe.experts.13.w1", "model.layers.13.block_sparse_moe.experts.14.w1", "model.layers.13.block_sparse_moe.experts.15.w1", "model.layers.13.block_sparse_moe.experts.16.w1", "model.layers.13.block_sparse_moe.experts.17.w1", "model.layers.13.block_sparse_moe.experts.18.w1", "model.layers.13.block_sparse_moe.experts.19.w1", "model.layers.13.block_sparse_moe.experts.20.w1", "model.layers.13.block_sparse_moe.experts.21.w1", "model.layers.13.block_sparse_moe.experts.22.w1", "model.layers.13.block_sparse_moe.experts.23.w1", "model.layers.13.block_sparse_moe.experts.24.w1", "model.layers.13.block_sparse_moe.experts.25.w1", "model.layers.13.block_sparse_moe.experts.26.w1", "model.layers.13.block_sparse_moe.experts.27.w1", "model.layers.13.block_sparse_moe.experts.28.w1", "model.layers.13.block_sparse_moe.experts.29.w1", "model.layers.13.block_sparse_moe.experts.30.w1", "model.layers.13.block_sparse_moe.experts.31.w1", "model.layers.13.block_sparse_moe.experts.32.w1", "model.layers.13.block_sparse_moe.experts.33.w1", "model.layers.13.block_sparse_moe.experts.34.w1", "model.layers.13.block_sparse_moe.experts.35.w1", "model.layers.13.block_sparse_moe.experts.36.w1", "model.layers.13.block_sparse_moe.experts.37.w1", "model.layers.13.block_sparse_moe.experts.38.w1", "model.layers.13.block_sparse_moe.experts.39.w1", "model.layers.13.block_sparse_moe.experts.40.w1", "model.layers.13.block_sparse_moe.experts.41.w1", "model.layers.13.block_sparse_moe.experts.42.w1", "model.layers.13.block_sparse_moe.experts.43.w1", "model.layers.13.block_sparse_moe.experts.44.w1", "model.layers.13.block_sparse_moe.experts.45.w1", "model.layers.13.block_sparse_moe.experts.46.w1", "model.layers.13.block_sparse_moe.experts.47.w1", "model.layers.13.block_sparse_moe.experts.48.w1", "model.layers.13.block_sparse_moe.experts.49.w1", "model.layers.13.block_sparse_moe.experts.50.w1", "model.layers.13.block_sparse_moe.experts.51.w1", "model.layers.13.block_sparse_moe.experts.52.w1", "model.layers.13.block_sparse_moe.experts.53.w1", "model.layers.13.block_sparse_moe.experts.54.w1", "model.layers.13.block_sparse_moe.experts.55.w1", "model.layers.13.block_sparse_moe.experts.56.w1", "model.layers.13.block_sparse_moe.experts.57.w1", "model.layers.13.block_sparse_moe.experts.58.w1", "model.layers.13.block_sparse_moe.experts.59.w1", "model.layers.13.block_sparse_moe.experts.60.w1", "model.layers.13.block_sparse_moe.experts.61.w1", "model.layers.13.block_sparse_moe.experts.62.w1", "model.layers.13.block_sparse_moe.experts.63.w1", "model.layers.13.block_sparse_moe.experts.64.w1", "model.layers.13.block_sparse_moe.experts.65.w1", "model.layers.13.block_sparse_moe.experts.66.w1", "model.layers.13.block_sparse_moe.experts.67.w1", "model.layers.13.block_sparse_moe.experts.68.w1", "model.layers.13.block_sparse_moe.experts.69.w1", "model.layers.13.block_sparse_moe.experts.70.w1", "model.layers.13.block_sparse_moe.experts.71.w1", "model.layers.13.block_sparse_moe.experts.72.w1", "model.layers.13.block_sparse_moe.experts.73.w1", "model.layers.13.block_sparse_moe.experts.74.w1", "model.layers.13.block_sparse_moe.experts.75.w1", "model.layers.13.block_sparse_moe.experts.76.w1", "model.layers.13.block_sparse_moe.experts.77.w1", "model.layers.13.block_sparse_moe.experts.78.w1", "model.layers.13.block_sparse_moe.experts.79.w1", "model.layers.13.block_sparse_moe.experts.80.w1", "model.layers.13.block_sparse_moe.experts.81.w1", "model.layers.13.block_sparse_moe.experts.82.w1", "model.layers.13.block_sparse_moe.experts.83.w1", "model.layers.13.block_sparse_moe.experts.84.w1", "model.layers.13.block_sparse_moe.experts.85.w1", "model.layers.13.block_sparse_moe.experts.86.w1", "model.layers.13.block_sparse_moe.experts.87.w1", "model.layers.13.block_sparse_moe.experts.88.w1", "model.layers.13.block_sparse_moe.experts.89.w1", "model.layers.13.block_sparse_moe.experts.90.w1", "model.layers.13.block_sparse_moe.experts.91.w1", "model.layers.13.block_sparse_moe.experts.92.w1", "model.layers.13.block_sparse_moe.experts.93.w1", "model.layers.13.block_sparse_moe.experts.94.w1", "model.layers.13.block_sparse_moe.experts.95.w1", "model.layers.13.block_sparse_moe.experts.96.w1", "model.layers.13.block_sparse_moe.experts.97.w1", "model.layers.13.block_sparse_moe.experts.98.w1", "model.layers.13.block_sparse_moe.experts.99.w1", "model.layers.13.block_sparse_moe.experts.100.w1", "model.layers.13.block_sparse_moe.experts.101.w1", "model.layers.13.block_sparse_moe.experts.102.w1", "model.layers.13.block_sparse_moe.experts.103.w1", "model.layers.13.block_sparse_moe.experts.104.w1", "model.layers.13.block_sparse_moe.experts.105.w1", "model.layers.13.block_sparse_moe.experts.106.w1", "model.layers.13.block_sparse_moe.experts.107.w1", "model.layers.13.block_sparse_moe.experts.108.w1", "model.layers.13.block_sparse_moe.experts.109.w1", "model.layers.13.block_sparse_moe.experts.110.w1", "model.layers.13.block_sparse_moe.experts.111.w1", "model.layers.13.block_sparse_moe.experts.112.w1", "model.layers.13.block_sparse_moe.experts.113.w1", "model.layers.13.block_sparse_moe.experts.114.w1", "model.layers.13.block_sparse_moe.experts.115.w1", "model.layers.13.block_sparse_moe.experts.116.w1", "model.layers.13.block_sparse_moe.experts.117.w1", "model.layers.13.block_sparse_moe.experts.118.w1", "model.layers.13.block_sparse_moe.experts.119.w1", "model.layers.13.block_sparse_moe.experts.120.w1", "model.layers.13.block_sparse_moe.experts.121.w1", "model.layers.13.block_sparse_moe.experts.122.w1", "model.layers.13.block_sparse_moe.experts.123.w1", "model.layers.13.block_sparse_moe.experts.124.w1", "model.layers.13.block_sparse_moe.experts.125.w1", "model.layers.13.block_sparse_moe.experts.126.w1", "model.layers.13.block_sparse_moe.experts.127.w1", "model.layers.13.block_sparse_moe.experts.128.w1", "model.layers.13.block_sparse_moe.experts.129.w1", "model.layers.13.block_sparse_moe.experts.130.w1", "model.layers.13.block_sparse_moe.experts.131.w1", "model.layers.13.block_sparse_moe.experts.132.w1", "model.layers.13.block_sparse_moe.experts.133.w1", "model.layers.13.block_sparse_moe.experts.134.w1", "model.layers.13.block_sparse_moe.experts.135.w1", "model.layers.13.block_sparse_moe.experts.136.w1", "model.layers.13.block_sparse_moe.experts.137.w1", "model.layers.13.block_sparse_moe.experts.138.w1", "model.layers.13.block_sparse_moe.experts.139.w1", "model.layers.13.block_sparse_moe.experts.140.w1", "model.layers.13.block_sparse_moe.experts.141.w1", "model.layers.13.block_sparse_moe.experts.142.w1", "model.layers.13.block_sparse_moe.experts.143.w1", "model.layers.13.block_sparse_moe.experts.144.w1", "model.layers.13.block_sparse_moe.experts.145.w1", "model.layers.13.block_sparse_moe.experts.146.w1", "model.layers.13.block_sparse_moe.experts.147.w1", "model.layers.13.block_sparse_moe.experts.148.w1", "model.layers.13.block_sparse_moe.experts.149.w1", "model.layers.13.block_sparse_moe.experts.150.w1", "model.layers.13.block_sparse_moe.experts.151.w1", "model.layers.13.block_sparse_moe.experts.152.w1", "model.layers.13.block_sparse_moe.experts.153.w1", "model.layers.13.block_sparse_moe.experts.154.w1", "model.layers.13.block_sparse_moe.experts.155.w1", "model.layers.13.block_sparse_moe.experts.156.w1", "model.layers.13.block_sparse_moe.experts.157.w1", "model.layers.13.block_sparse_moe.experts.158.w1", "model.layers.13.block_sparse_moe.experts.159.w1", "model.layers.13.block_sparse_moe.experts.160.w1", "model.layers.13.block_sparse_moe.experts.161.w1", "model.layers.13.block_sparse_moe.experts.162.w1", "model.layers.13.block_sparse_moe.experts.163.w1", "model.layers.13.block_sparse_moe.experts.164.w1", "model.layers.13.block_sparse_moe.experts.165.w1", "model.layers.13.block_sparse_moe.experts.166.w1", "model.layers.13.block_sparse_moe.experts.167.w1", "model.layers.13.block_sparse_moe.experts.168.w1", "model.layers.13.block_sparse_moe.experts.169.w1", "model.layers.13.block_sparse_moe.experts.170.w1", "model.layers.13.block_sparse_moe.experts.171.w1", "model.layers.13.block_sparse_moe.experts.172.w1", "model.layers.13.block_sparse_moe.experts.173.w1", "model.layers.13.block_sparse_moe.experts.174.w1", "model.layers.13.block_sparse_moe.experts.175.w1", "model.layers.13.block_sparse_moe.experts.176.w1", "model.layers.13.block_sparse_moe.experts.177.w1", "model.layers.13.block_sparse_moe.experts.178.w1", "model.layers.13.block_sparse_moe.experts.179.w1", "model.layers.13.block_sparse_moe.experts.180.w1", "model.layers.13.block_sparse_moe.experts.181.w1", "model.layers.13.block_sparse_moe.experts.182.w1", "model.layers.13.block_sparse_moe.experts.183.w1", "model.layers.13.block_sparse_moe.experts.184.w1", "model.layers.13.block_sparse_moe.experts.185.w1", "model.layers.13.block_sparse_moe.experts.186.w1", "model.layers.13.block_sparse_moe.experts.187.w1", "model.layers.13.block_sparse_moe.experts.188.w1", "model.layers.13.block_sparse_moe.experts.189.w1", "model.layers.13.block_sparse_moe.experts.190.w1", "model.layers.13.block_sparse_moe.experts.191.w1", "model.layers.13.block_sparse_moe.experts.192.w1", "model.layers.13.block_sparse_moe.experts.193.w1", "model.layers.13.block_sparse_moe.experts.194.w1", "model.layers.13.block_sparse_moe.experts.195.w1", "model.layers.13.block_sparse_moe.experts.196.w1", "model.layers.13.block_sparse_moe.experts.197.w1", "model.layers.13.block_sparse_moe.experts.198.w1", "model.layers.13.block_sparse_moe.experts.199.w1", "model.layers.13.block_sparse_moe.experts.200.w1", "model.layers.13.block_sparse_moe.experts.201.w1", "model.layers.13.block_sparse_moe.experts.202.w1", "model.layers.13.block_sparse_moe.experts.203.w1", "model.layers.13.block_sparse_moe.experts.204.w1", "model.layers.13.block_sparse_moe.experts.205.w1", "model.layers.13.block_sparse_moe.experts.206.w1", "model.layers.13.block_sparse_moe.experts.207.w1", "model.layers.13.block_sparse_moe.experts.208.w1", "model.layers.13.block_sparse_moe.experts.209.w1", "model.layers.13.block_sparse_moe.experts.210.w1", "model.layers.13.block_sparse_moe.experts.211.w1", "model.layers.13.block_sparse_moe.experts.212.w1", "model.layers.13.block_sparse_moe.experts.213.w1", "model.layers.13.block_sparse_moe.experts.214.w1", "model.layers.13.block_sparse_moe.experts.215.w1", "model.layers.13.block_sparse_moe.experts.216.w1", "model.layers.13.block_sparse_moe.experts.217.w1", "model.layers.13.block_sparse_moe.experts.218.w1", "model.layers.13.block_sparse_moe.experts.219.w1", "model.layers.13.block_sparse_moe.experts.220.w1", "model.layers.13.block_sparse_moe.experts.221.w1", "model.layers.13.block_sparse_moe.experts.222.w1", "model.layers.13.block_sparse_moe.experts.223.w1", "model.layers.13.block_sparse_moe.experts.224.w1", "model.layers.13.block_sparse_moe.experts.225.w1", "model.layers.13.block_sparse_moe.experts.226.w1", "model.layers.13.block_sparse_moe.experts.227.w1", "model.layers.13.block_sparse_moe.experts.228.w1", "model.layers.13.block_sparse_moe.experts.229.w1", "model.layers.13.block_sparse_moe.experts.230.w1", "model.layers.13.block_sparse_moe.experts.231.w1", "model.layers.13.block_sparse_moe.experts.232.w1", "model.layers.13.block_sparse_moe.experts.233.w1", "model.layers.13.block_sparse_moe.experts.234.w1", "model.layers.13.block_sparse_moe.experts.235.w1", "model.layers.13.block_sparse_moe.experts.236.w1", "model.layers.13.block_sparse_moe.experts.237.w1", "model.layers.13.block_sparse_moe.experts.238.w1", "model.layers.13.block_sparse_moe.experts.239.w1", "model.layers.13.block_sparse_moe.experts.240.w1", "model.layers.13.block_sparse_moe.experts.241.w1", "model.layers.13.block_sparse_moe.experts.242.w1", "model.layers.13.block_sparse_moe.experts.243.w1", "model.layers.13.block_sparse_moe.experts.244.w1", "model.layers.13.block_sparse_moe.experts.245.w1", "model.layers.13.block_sparse_moe.experts.246.w1", "model.layers.13.block_sparse_moe.experts.247.w1", "model.layers.13.block_sparse_moe.experts.248.w1", "model.layers.13.block_sparse_moe.experts.249.w1", "model.layers.13.block_sparse_moe.experts.250.w1", "model.layers.13.block_sparse_moe.experts.251.w1", "model.layers.13.block_sparse_moe.experts.252.w1", "model.layers.13.block_sparse_moe.experts.253.w1", "model.layers.13.block_sparse_moe.experts.254.w1", "model.layers.13.block_sparse_moe.experts.255.w1", "model.layers.13.block_sparse_moe.experts.0.w3", "model.layers.13.block_sparse_moe.experts.1.w3", "model.layers.13.block_sparse_moe.experts.2.w3", "model.layers.13.block_sparse_moe.experts.3.w3", "model.layers.13.block_sparse_moe.experts.4.w3", "model.layers.13.block_sparse_moe.experts.5.w3", "model.layers.13.block_sparse_moe.experts.6.w3", "model.layers.13.block_sparse_moe.experts.7.w3", "model.layers.13.block_sparse_moe.experts.8.w3", "model.layers.13.block_sparse_moe.experts.9.w3", "model.layers.13.block_sparse_moe.experts.10.w3", "model.layers.13.block_sparse_moe.experts.11.w3", "model.layers.13.block_sparse_moe.experts.12.w3", "model.layers.13.block_sparse_moe.experts.13.w3", "model.layers.13.block_sparse_moe.experts.14.w3", "model.layers.13.block_sparse_moe.experts.15.w3", "model.layers.13.block_sparse_moe.experts.16.w3", "model.layers.13.block_sparse_moe.experts.17.w3", "model.layers.13.block_sparse_moe.experts.18.w3", "model.layers.13.block_sparse_moe.experts.19.w3", "model.layers.13.block_sparse_moe.experts.20.w3", "model.layers.13.block_sparse_moe.experts.21.w3", "model.layers.13.block_sparse_moe.experts.22.w3", "model.layers.13.block_sparse_moe.experts.23.w3", "model.layers.13.block_sparse_moe.experts.24.w3", "model.layers.13.block_sparse_moe.experts.25.w3", "model.layers.13.block_sparse_moe.experts.26.w3", "model.layers.13.block_sparse_moe.experts.27.w3", "model.layers.13.block_sparse_moe.experts.28.w3", "model.layers.13.block_sparse_moe.experts.29.w3", "model.layers.13.block_sparse_moe.experts.30.w3", "model.layers.13.block_sparse_moe.experts.31.w3", "model.layers.13.block_sparse_moe.experts.32.w3", "model.layers.13.block_sparse_moe.experts.33.w3", "model.layers.13.block_sparse_moe.experts.34.w3", "model.layers.13.block_sparse_moe.experts.35.w3", "model.layers.13.block_sparse_moe.experts.36.w3", "model.layers.13.block_sparse_moe.experts.37.w3", "model.layers.13.block_sparse_moe.experts.38.w3", "model.layers.13.block_sparse_moe.experts.39.w3", "model.layers.13.block_sparse_moe.experts.40.w3", "model.layers.13.block_sparse_moe.experts.41.w3", "model.layers.13.block_sparse_moe.experts.42.w3", "model.layers.13.block_sparse_moe.experts.43.w3", "model.layers.13.block_sparse_moe.experts.44.w3", "model.layers.13.block_sparse_moe.experts.45.w3", "model.layers.13.block_sparse_moe.experts.46.w3", "model.layers.13.block_sparse_moe.experts.47.w3", "model.layers.13.block_sparse_moe.experts.48.w3", "model.layers.13.block_sparse_moe.experts.49.w3", "model.layers.13.block_sparse_moe.experts.50.w3", "model.layers.13.block_sparse_moe.experts.51.w3", "model.layers.13.block_sparse_moe.experts.52.w3", "model.layers.13.block_sparse_moe.experts.53.w3", "model.layers.13.block_sparse_moe.experts.54.w3", "model.layers.13.block_sparse_moe.experts.55.w3", "model.layers.13.block_sparse_moe.experts.56.w3", "model.layers.13.block_sparse_moe.experts.57.w3", "model.layers.13.block_sparse_moe.experts.58.w3", "model.layers.13.block_sparse_moe.experts.59.w3", "model.layers.13.block_sparse_moe.experts.60.w3", "model.layers.13.block_sparse_moe.experts.61.w3", "model.layers.13.block_sparse_moe.experts.62.w3", "model.layers.13.block_sparse_moe.experts.63.w3", "model.layers.13.block_sparse_moe.experts.64.w3", "model.layers.13.block_sparse_moe.experts.65.w3", "model.layers.13.block_sparse_moe.experts.66.w3", "model.layers.13.block_sparse_moe.experts.67.w3", "model.layers.13.block_sparse_moe.experts.68.w3", "model.layers.13.block_sparse_moe.experts.69.w3", "model.layers.13.block_sparse_moe.experts.70.w3", "model.layers.13.block_sparse_moe.experts.71.w3", "model.layers.13.block_sparse_moe.experts.72.w3", "model.layers.13.block_sparse_moe.experts.73.w3", "model.layers.13.block_sparse_moe.experts.74.w3", "model.layers.13.block_sparse_moe.experts.75.w3", "model.layers.13.block_sparse_moe.experts.76.w3", "model.layers.13.block_sparse_moe.experts.77.w3", "model.layers.13.block_sparse_moe.experts.78.w3", "model.layers.13.block_sparse_moe.experts.79.w3", "model.layers.13.block_sparse_moe.experts.80.w3", "model.layers.13.block_sparse_moe.experts.81.w3", "model.layers.13.block_sparse_moe.experts.82.w3", "model.layers.13.block_sparse_moe.experts.83.w3", "model.layers.13.block_sparse_moe.experts.84.w3", "model.layers.13.block_sparse_moe.experts.85.w3", "model.layers.13.block_sparse_moe.experts.86.w3", "model.layers.13.block_sparse_moe.experts.87.w3", "model.layers.13.block_sparse_moe.experts.88.w3", "model.layers.13.block_sparse_moe.experts.89.w3", "model.layers.13.block_sparse_moe.experts.90.w3", "model.layers.13.block_sparse_moe.experts.91.w3", "model.layers.13.block_sparse_moe.experts.92.w3", "model.layers.13.block_sparse_moe.experts.93.w3", "model.layers.13.block_sparse_moe.experts.94.w3", "model.layers.13.block_sparse_moe.experts.95.w3", "model.layers.13.block_sparse_moe.experts.96.w3", "model.layers.13.block_sparse_moe.experts.97.w3", "model.layers.13.block_sparse_moe.experts.98.w3", "model.layers.13.block_sparse_moe.experts.99.w3", "model.layers.13.block_sparse_moe.experts.100.w3", "model.layers.13.block_sparse_moe.experts.101.w3", "model.layers.13.block_sparse_moe.experts.102.w3", "model.layers.13.block_sparse_moe.experts.103.w3", "model.layers.13.block_sparse_moe.experts.104.w3", "model.layers.13.block_sparse_moe.experts.105.w3", "model.layers.13.block_sparse_moe.experts.106.w3", "model.layers.13.block_sparse_moe.experts.107.w3", "model.layers.13.block_sparse_moe.experts.108.w3", "model.layers.13.block_sparse_moe.experts.109.w3", "model.layers.13.block_sparse_moe.experts.110.w3", "model.layers.13.block_sparse_moe.experts.111.w3", "model.layers.13.block_sparse_moe.experts.112.w3", "model.layers.13.block_sparse_moe.experts.113.w3", "model.layers.13.block_sparse_moe.experts.114.w3", "model.layers.13.block_sparse_moe.experts.115.w3", "model.layers.13.block_sparse_moe.experts.116.w3", "model.layers.13.block_sparse_moe.experts.117.w3", "model.layers.13.block_sparse_moe.experts.118.w3", "model.layers.13.block_sparse_moe.experts.119.w3", "model.layers.13.block_sparse_moe.experts.120.w3", "model.layers.13.block_sparse_moe.experts.121.w3", "model.layers.13.block_sparse_moe.experts.122.w3", "model.layers.13.block_sparse_moe.experts.123.w3", "model.layers.13.block_sparse_moe.experts.124.w3", "model.layers.13.block_sparse_moe.experts.125.w3", "model.layers.13.block_sparse_moe.experts.126.w3", "model.layers.13.block_sparse_moe.experts.127.w3", "model.layers.13.block_sparse_moe.experts.128.w3", "model.layers.13.block_sparse_moe.experts.129.w3", "model.layers.13.block_sparse_moe.experts.130.w3", "model.layers.13.block_sparse_moe.experts.131.w3", "model.layers.13.block_sparse_moe.experts.132.w3", "model.layers.13.block_sparse_moe.experts.133.w3", "model.layers.13.block_sparse_moe.experts.134.w3", "model.layers.13.block_sparse_moe.experts.135.w3", "model.layers.13.block_sparse_moe.experts.136.w3", "model.layers.13.block_sparse_moe.experts.137.w3", "model.layers.13.block_sparse_moe.experts.138.w3", "model.layers.13.block_sparse_moe.experts.139.w3", "model.layers.13.block_sparse_moe.experts.140.w3", "model.layers.13.block_sparse_moe.experts.141.w3", "model.layers.13.block_sparse_moe.experts.142.w3", "model.layers.13.block_sparse_moe.experts.143.w3", "model.layers.13.block_sparse_moe.experts.144.w3", "model.layers.13.block_sparse_moe.experts.145.w3", "model.layers.13.block_sparse_moe.experts.146.w3", "model.layers.13.block_sparse_moe.experts.147.w3", "model.layers.13.block_sparse_moe.experts.148.w3", "model.layers.13.block_sparse_moe.experts.149.w3", "model.layers.13.block_sparse_moe.experts.150.w3", "model.layers.13.block_sparse_moe.experts.151.w3", "model.layers.13.block_sparse_moe.experts.152.w3", "model.layers.13.block_sparse_moe.experts.153.w3", "model.layers.13.block_sparse_moe.experts.154.w3", "model.layers.13.block_sparse_moe.experts.155.w3", "model.layers.13.block_sparse_moe.experts.156.w3", "model.layers.13.block_sparse_moe.experts.157.w3", "model.layers.13.block_sparse_moe.experts.158.w3", "model.layers.13.block_sparse_moe.experts.159.w3", "model.layers.13.block_sparse_moe.experts.160.w3", "model.layers.13.block_sparse_moe.experts.161.w3", "model.layers.13.block_sparse_moe.experts.162.w3", "model.layers.13.block_sparse_moe.experts.163.w3", "model.layers.13.block_sparse_moe.experts.164.w3", "model.layers.13.block_sparse_moe.experts.165.w3", "model.layers.13.block_sparse_moe.experts.166.w3", "model.layers.13.block_sparse_moe.experts.167.w3", "model.layers.13.block_sparse_moe.experts.168.w3", "model.layers.13.block_sparse_moe.experts.169.w3", "model.layers.13.block_sparse_moe.experts.170.w3", "model.layers.13.block_sparse_moe.experts.171.w3", "model.layers.13.block_sparse_moe.experts.172.w3", "model.layers.13.block_sparse_moe.experts.173.w3", "model.layers.13.block_sparse_moe.experts.174.w3", "model.layers.13.block_sparse_moe.experts.175.w3", "model.layers.13.block_sparse_moe.experts.176.w3", "model.layers.13.block_sparse_moe.experts.177.w3", "model.layers.13.block_sparse_moe.experts.178.w3", "model.layers.13.block_sparse_moe.experts.179.w3", "model.layers.13.block_sparse_moe.experts.180.w3", "model.layers.13.block_sparse_moe.experts.181.w3", "model.layers.13.block_sparse_moe.experts.182.w3", "model.layers.13.block_sparse_moe.experts.183.w3", "model.layers.13.block_sparse_moe.experts.184.w3", "model.layers.13.block_sparse_moe.experts.185.w3", "model.layers.13.block_sparse_moe.experts.186.w3", "model.layers.13.block_sparse_moe.experts.187.w3", "model.layers.13.block_sparse_moe.experts.188.w3", "model.layers.13.block_sparse_moe.experts.189.w3", "model.layers.13.block_sparse_moe.experts.190.w3", "model.layers.13.block_sparse_moe.experts.191.w3", "model.layers.13.block_sparse_moe.experts.192.w3", "model.layers.13.block_sparse_moe.experts.193.w3", "model.layers.13.block_sparse_moe.experts.194.w3", "model.layers.13.block_sparse_moe.experts.195.w3", "model.layers.13.block_sparse_moe.experts.196.w3", "model.layers.13.block_sparse_moe.experts.197.w3", "model.layers.13.block_sparse_moe.experts.198.w3", "model.layers.13.block_sparse_moe.experts.199.w3", "model.layers.13.block_sparse_moe.experts.200.w3", "model.layers.13.block_sparse_moe.experts.201.w3", "model.layers.13.block_sparse_moe.experts.202.w3", "model.layers.13.block_sparse_moe.experts.203.w3", "model.layers.13.block_sparse_moe.experts.204.w3", "model.layers.13.block_sparse_moe.experts.205.w3", "model.layers.13.block_sparse_moe.experts.206.w3", "model.layers.13.block_sparse_moe.experts.207.w3", "model.layers.13.block_sparse_moe.experts.208.w3", "model.layers.13.block_sparse_moe.experts.209.w3", "model.layers.13.block_sparse_moe.experts.210.w3", "model.layers.13.block_sparse_moe.experts.211.w3", "model.layers.13.block_sparse_moe.experts.212.w3", "model.layers.13.block_sparse_moe.experts.213.w3", "model.layers.13.block_sparse_moe.experts.214.w3", "model.layers.13.block_sparse_moe.experts.215.w3", "model.layers.13.block_sparse_moe.experts.216.w3", "model.layers.13.block_sparse_moe.experts.217.w3", "model.layers.13.block_sparse_moe.experts.218.w3", "model.layers.13.block_sparse_moe.experts.219.w3", "model.layers.13.block_sparse_moe.experts.220.w3", "model.layers.13.block_sparse_moe.experts.221.w3", "model.layers.13.block_sparse_moe.experts.222.w3", "model.layers.13.block_sparse_moe.experts.223.w3", "model.layers.13.block_sparse_moe.experts.224.w3", "model.layers.13.block_sparse_moe.experts.225.w3", "model.layers.13.block_sparse_moe.experts.226.w3", "model.layers.13.block_sparse_moe.experts.227.w3", "model.layers.13.block_sparse_moe.experts.228.w3", "model.layers.13.block_sparse_moe.experts.229.w3", "model.layers.13.block_sparse_moe.experts.230.w3", "model.layers.13.block_sparse_moe.experts.231.w3", "model.layers.13.block_sparse_moe.experts.232.w3", "model.layers.13.block_sparse_moe.experts.233.w3", "model.layers.13.block_sparse_moe.experts.234.w3", "model.layers.13.block_sparse_moe.experts.235.w3", "model.layers.13.block_sparse_moe.experts.236.w3", "model.layers.13.block_sparse_moe.experts.237.w3", "model.layers.13.block_sparse_moe.experts.238.w3", "model.layers.13.block_sparse_moe.experts.239.w3", "model.layers.13.block_sparse_moe.experts.240.w3", "model.layers.13.block_sparse_moe.experts.241.w3", "model.layers.13.block_sparse_moe.experts.242.w3", "model.layers.13.block_sparse_moe.experts.243.w3", "model.layers.13.block_sparse_moe.experts.244.w3", "model.layers.13.block_sparse_moe.experts.245.w3", "model.layers.13.block_sparse_moe.experts.246.w3", "model.layers.13.block_sparse_moe.experts.247.w3", "model.layers.13.block_sparse_moe.experts.248.w3", "model.layers.13.block_sparse_moe.experts.249.w3", "model.layers.13.block_sparse_moe.experts.250.w3", "model.layers.13.block_sparse_moe.experts.251.w3", "model.layers.13.block_sparse_moe.experts.252.w3", "model.layers.13.block_sparse_moe.experts.253.w3", "model.layers.13.block_sparse_moe.experts.254.w3", "model.layers.13.block_sparse_moe.experts.255.w3", "model.layers.13.block_sparse_moe.experts.0.w2", "model.layers.13.block_sparse_moe.experts.1.w2", "model.layers.13.block_sparse_moe.experts.2.w2", "model.layers.13.block_sparse_moe.experts.3.w2", "model.layers.13.block_sparse_moe.experts.4.w2", "model.layers.13.block_sparse_moe.experts.5.w2", "model.layers.13.block_sparse_moe.experts.6.w2", "model.layers.13.block_sparse_moe.experts.7.w2", "model.layers.13.block_sparse_moe.experts.8.w2", "model.layers.13.block_sparse_moe.experts.9.w2", "model.layers.13.block_sparse_moe.experts.10.w2", "model.layers.13.block_sparse_moe.experts.11.w2", "model.layers.13.block_sparse_moe.experts.12.w2", "model.layers.13.block_sparse_moe.experts.13.w2", "model.layers.13.block_sparse_moe.experts.14.w2", "model.layers.13.block_sparse_moe.experts.15.w2", "model.layers.13.block_sparse_moe.experts.16.w2", "model.layers.13.block_sparse_moe.experts.17.w2", "model.layers.13.block_sparse_moe.experts.18.w2", "model.layers.13.block_sparse_moe.experts.19.w2", "model.layers.13.block_sparse_moe.experts.20.w2", "model.layers.13.block_sparse_moe.experts.21.w2", "model.layers.13.block_sparse_moe.experts.22.w2", "model.layers.13.block_sparse_moe.experts.23.w2", "model.layers.13.block_sparse_moe.experts.24.w2", "model.layers.13.block_sparse_moe.experts.25.w2", "model.layers.13.block_sparse_moe.experts.26.w2", "model.layers.13.block_sparse_moe.experts.27.w2", "model.layers.13.block_sparse_moe.experts.28.w2", "model.layers.13.block_sparse_moe.experts.29.w2", "model.layers.13.block_sparse_moe.experts.30.w2", "model.layers.13.block_sparse_moe.experts.31.w2", "model.layers.13.block_sparse_moe.experts.32.w2", "model.layers.13.block_sparse_moe.experts.33.w2", "model.layers.13.block_sparse_moe.experts.34.w2", "model.layers.13.block_sparse_moe.experts.35.w2", "model.layers.13.block_sparse_moe.experts.36.w2", "model.layers.13.block_sparse_moe.experts.37.w2", "model.layers.13.block_sparse_moe.experts.38.w2", "model.layers.13.block_sparse_moe.experts.39.w2", "model.layers.13.block_sparse_moe.experts.40.w2", "model.layers.13.block_sparse_moe.experts.41.w2", "model.layers.13.block_sparse_moe.experts.42.w2", "model.layers.13.block_sparse_moe.experts.43.w2", "model.layers.13.block_sparse_moe.experts.44.w2", "model.layers.13.block_sparse_moe.experts.45.w2", "model.layers.13.block_sparse_moe.experts.46.w2", "model.layers.13.block_sparse_moe.experts.47.w2", "model.layers.13.block_sparse_moe.experts.48.w2", "model.layers.13.block_sparse_moe.experts.49.w2", "model.layers.13.block_sparse_moe.experts.50.w2", "model.layers.13.block_sparse_moe.experts.51.w2", "model.layers.13.block_sparse_moe.experts.52.w2", "model.layers.13.block_sparse_moe.experts.53.w2", "model.layers.13.block_sparse_moe.experts.54.w2", "model.layers.13.block_sparse_moe.experts.55.w2", "model.layers.13.block_sparse_moe.experts.56.w2", "model.layers.13.block_sparse_moe.experts.57.w2", "model.layers.13.block_sparse_moe.experts.58.w2", "model.layers.13.block_sparse_moe.experts.59.w2", "model.layers.13.block_sparse_moe.experts.60.w2", "model.layers.13.block_sparse_moe.experts.61.w2", "model.layers.13.block_sparse_moe.experts.62.w2", "model.layers.13.block_sparse_moe.experts.63.w2", "model.layers.13.block_sparse_moe.experts.64.w2", "model.layers.13.block_sparse_moe.experts.65.w2", "model.layers.13.block_sparse_moe.experts.66.w2", "model.layers.13.block_sparse_moe.experts.67.w2", "model.layers.13.block_sparse_moe.experts.68.w2", "model.layers.13.block_sparse_moe.experts.69.w2", "model.layers.13.block_sparse_moe.experts.70.w2", "model.layers.13.block_sparse_moe.experts.71.w2", "model.layers.13.block_sparse_moe.experts.72.w2", "model.layers.13.block_sparse_moe.experts.73.w2", "model.layers.13.block_sparse_moe.experts.74.w2", "model.layers.13.block_sparse_moe.experts.75.w2", "model.layers.13.block_sparse_moe.experts.76.w2", "model.layers.13.block_sparse_moe.experts.77.w2", "model.layers.13.block_sparse_moe.experts.78.w2", "model.layers.13.block_sparse_moe.experts.79.w2", "model.layers.13.block_sparse_moe.experts.80.w2", "model.layers.13.block_sparse_moe.experts.81.w2", "model.layers.13.block_sparse_moe.experts.82.w2", "model.layers.13.block_sparse_moe.experts.83.w2", "model.layers.13.block_sparse_moe.experts.84.w2", "model.layers.13.block_sparse_moe.experts.85.w2", "model.layers.13.block_sparse_moe.experts.86.w2", "model.layers.13.block_sparse_moe.experts.87.w2", "model.layers.13.block_sparse_moe.experts.88.w2", "model.layers.13.block_sparse_moe.experts.89.w2", "model.layers.13.block_sparse_moe.experts.90.w2", "model.layers.13.block_sparse_moe.experts.91.w2", "model.layers.13.block_sparse_moe.experts.92.w2", "model.layers.13.block_sparse_moe.experts.93.w2", "model.layers.13.block_sparse_moe.experts.94.w2", "model.layers.13.block_sparse_moe.experts.95.w2", "model.layers.13.block_sparse_moe.experts.96.w2", "model.layers.13.block_sparse_moe.experts.97.w2", "model.layers.13.block_sparse_moe.experts.98.w2", "model.layers.13.block_sparse_moe.experts.99.w2", "model.layers.13.block_sparse_moe.experts.100.w2", "model.layers.13.block_sparse_moe.experts.101.w2", "model.layers.13.block_sparse_moe.experts.102.w2", "model.layers.13.block_sparse_moe.experts.103.w2", "model.layers.13.block_sparse_moe.experts.104.w2", "model.layers.13.block_sparse_moe.experts.105.w2", "model.layers.13.block_sparse_moe.experts.106.w2", "model.layers.13.block_sparse_moe.experts.107.w2", "model.layers.13.block_sparse_moe.experts.108.w2", "model.layers.13.block_sparse_moe.experts.109.w2", "model.layers.13.block_sparse_moe.experts.110.w2", "model.layers.13.block_sparse_moe.experts.111.w2", "model.layers.13.block_sparse_moe.experts.112.w2", "model.layers.13.block_sparse_moe.experts.113.w2", "model.layers.13.block_sparse_moe.experts.114.w2", "model.layers.13.block_sparse_moe.experts.115.w2", "model.layers.13.block_sparse_moe.experts.116.w2", "model.layers.13.block_sparse_moe.experts.117.w2", "model.layers.13.block_sparse_moe.experts.118.w2", "model.layers.13.block_sparse_moe.experts.119.w2", "model.layers.13.block_sparse_moe.experts.120.w2", "model.layers.13.block_sparse_moe.experts.121.w2", "model.layers.13.block_sparse_moe.experts.122.w2", "model.layers.13.block_sparse_moe.experts.123.w2", "model.layers.13.block_sparse_moe.experts.124.w2", "model.layers.13.block_sparse_moe.experts.125.w2", "model.layers.13.block_sparse_moe.experts.126.w2", "model.layers.13.block_sparse_moe.experts.127.w2", "model.layers.13.block_sparse_moe.experts.128.w2", "model.layers.13.block_sparse_moe.experts.129.w2", "model.layers.13.block_sparse_moe.experts.130.w2", "model.layers.13.block_sparse_moe.experts.131.w2", "model.layers.13.block_sparse_moe.experts.132.w2", "model.layers.13.block_sparse_moe.experts.133.w2", "model.layers.13.block_sparse_moe.experts.134.w2", "model.layers.13.block_sparse_moe.experts.135.w2", "model.layers.13.block_sparse_moe.experts.136.w2", "model.layers.13.block_sparse_moe.experts.137.w2", "model.layers.13.block_sparse_moe.experts.138.w2", "model.layers.13.block_sparse_moe.experts.139.w2", "model.layers.13.block_sparse_moe.experts.140.w2", "model.layers.13.block_sparse_moe.experts.141.w2", "model.layers.13.block_sparse_moe.experts.142.w2", "model.layers.13.block_sparse_moe.experts.143.w2", "model.layers.13.block_sparse_moe.experts.144.w2", "model.layers.13.block_sparse_moe.experts.145.w2", "model.layers.13.block_sparse_moe.experts.146.w2", "model.layers.13.block_sparse_moe.experts.147.w2", "model.layers.13.block_sparse_moe.experts.148.w2", "model.layers.13.block_sparse_moe.experts.149.w2", "model.layers.13.block_sparse_moe.experts.150.w2", "model.layers.13.block_sparse_moe.experts.151.w2", "model.layers.13.block_sparse_moe.experts.152.w2", "model.layers.13.block_sparse_moe.experts.153.w2", "model.layers.13.block_sparse_moe.experts.154.w2", "model.layers.13.block_sparse_moe.experts.155.w2", "model.layers.13.block_sparse_moe.experts.156.w2", "model.layers.13.block_sparse_moe.experts.157.w2", "model.layers.13.block_sparse_moe.experts.158.w2", "model.layers.13.block_sparse_moe.experts.159.w2", "model.layers.13.block_sparse_moe.experts.160.w2", "model.layers.13.block_sparse_moe.experts.161.w2", "model.layers.13.block_sparse_moe.experts.162.w2", "model.layers.13.block_sparse_moe.experts.163.w2", "model.layers.13.block_sparse_moe.experts.164.w2", "model.layers.13.block_sparse_moe.experts.165.w2", "model.layers.13.block_sparse_moe.experts.166.w2", "model.layers.13.block_sparse_moe.experts.167.w2", "model.layers.13.block_sparse_moe.experts.168.w2", "model.layers.13.block_sparse_moe.experts.169.w2", "model.layers.13.block_sparse_moe.experts.170.w2", "model.layers.13.block_sparse_moe.experts.171.w2", "model.layers.13.block_sparse_moe.experts.172.w2", "model.layers.13.block_sparse_moe.experts.173.w2", "model.layers.13.block_sparse_moe.experts.174.w2", "model.layers.13.block_sparse_moe.experts.175.w2", "model.layers.13.block_sparse_moe.experts.176.w2", "model.layers.13.block_sparse_moe.experts.177.w2", "model.layers.13.block_sparse_moe.experts.178.w2", "model.layers.13.block_sparse_moe.experts.179.w2", "model.layers.13.block_sparse_moe.experts.180.w2", "model.layers.13.block_sparse_moe.experts.181.w2", "model.layers.13.block_sparse_moe.experts.182.w2", "model.layers.13.block_sparse_moe.experts.183.w2", "model.layers.13.block_sparse_moe.experts.184.w2", "model.layers.13.block_sparse_moe.experts.185.w2", "model.layers.13.block_sparse_moe.experts.186.w2", "model.layers.13.block_sparse_moe.experts.187.w2", "model.layers.13.block_sparse_moe.experts.188.w2", "model.layers.13.block_sparse_moe.experts.189.w2", "model.layers.13.block_sparse_moe.experts.190.w2", "model.layers.13.block_sparse_moe.experts.191.w2", "model.layers.13.block_sparse_moe.experts.192.w2", "model.layers.13.block_sparse_moe.experts.193.w2", "model.layers.13.block_sparse_moe.experts.194.w2", "model.layers.13.block_sparse_moe.experts.195.w2", "model.layers.13.block_sparse_moe.experts.196.w2", "model.layers.13.block_sparse_moe.experts.197.w2", "model.layers.13.block_sparse_moe.experts.198.w2", "model.layers.13.block_sparse_moe.experts.199.w2", "model.layers.13.block_sparse_moe.experts.200.w2", "model.layers.13.block_sparse_moe.experts.201.w2", "model.layers.13.block_sparse_moe.experts.202.w2", "model.layers.13.block_sparse_moe.experts.203.w2", "model.layers.13.block_sparse_moe.experts.204.w2", "model.layers.13.block_sparse_moe.experts.205.w2", "model.layers.13.block_sparse_moe.experts.206.w2", "model.layers.13.block_sparse_moe.experts.207.w2", "model.layers.13.block_sparse_moe.experts.208.w2", "model.layers.13.block_sparse_moe.experts.209.w2", "model.layers.13.block_sparse_moe.experts.210.w2", "model.layers.13.block_sparse_moe.experts.211.w2", "model.layers.13.block_sparse_moe.experts.212.w2", "model.layers.13.block_sparse_moe.experts.213.w2", "model.layers.13.block_sparse_moe.experts.214.w2", "model.layers.13.block_sparse_moe.experts.215.w2", "model.layers.13.block_sparse_moe.experts.216.w2", "model.layers.13.block_sparse_moe.experts.217.w2", "model.layers.13.block_sparse_moe.experts.218.w2", "model.layers.13.block_sparse_moe.experts.219.w2", "model.layers.13.block_sparse_moe.experts.220.w2", "model.layers.13.block_sparse_moe.experts.221.w2", "model.layers.13.block_sparse_moe.experts.222.w2", "model.layers.13.block_sparse_moe.experts.223.w2", "model.layers.13.block_sparse_moe.experts.224.w2", "model.layers.13.block_sparse_moe.experts.225.w2", "model.layers.13.block_sparse_moe.experts.226.w2", "model.layers.13.block_sparse_moe.experts.227.w2", "model.layers.13.block_sparse_moe.experts.228.w2", "model.layers.13.block_sparse_moe.experts.229.w2", "model.layers.13.block_sparse_moe.experts.230.w2", "model.layers.13.block_sparse_moe.experts.231.w2", "model.layers.13.block_sparse_moe.experts.232.w2", "model.layers.13.block_sparse_moe.experts.233.w2", "model.layers.13.block_sparse_moe.experts.234.w2", "model.layers.13.block_sparse_moe.experts.235.w2", "model.layers.13.block_sparse_moe.experts.236.w2", "model.layers.13.block_sparse_moe.experts.237.w2", "model.layers.13.block_sparse_moe.experts.238.w2", "model.layers.13.block_sparse_moe.experts.239.w2", "model.layers.13.block_sparse_moe.experts.240.w2", "model.layers.13.block_sparse_moe.experts.241.w2", "model.layers.13.block_sparse_moe.experts.242.w2", "model.layers.13.block_sparse_moe.experts.243.w2", "model.layers.13.block_sparse_moe.experts.244.w2", "model.layers.13.block_sparse_moe.experts.245.w2", "model.layers.13.block_sparse_moe.experts.246.w2", "model.layers.13.block_sparse_moe.experts.247.w2", "model.layers.13.block_sparse_moe.experts.248.w2", "model.layers.13.block_sparse_moe.experts.249.w2", "model.layers.13.block_sparse_moe.experts.250.w2", "model.layers.13.block_sparse_moe.experts.251.w2", "model.layers.13.block_sparse_moe.experts.252.w2", "model.layers.13.block_sparse_moe.experts.253.w2", "model.layers.13.block_sparse_moe.experts.254.w2", "model.layers.13.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008436517789959797, "dbits": 3623878656 } ] }, { "idx": 28, "layers": [ "model.layers.14.self_attn.q_proj", "model.layers.14.self_attn.k_proj", "model.layers.14.self_attn.v_proj", "model.layers.14.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0007451109588146321, "dbits": 44040192 } ] }, { "idx": 29, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w1", "model.layers.14.block_sparse_moe.experts.1.w1", "model.layers.14.block_sparse_moe.experts.2.w1", "model.layers.14.block_sparse_moe.experts.3.w1", "model.layers.14.block_sparse_moe.experts.4.w1", "model.layers.14.block_sparse_moe.experts.5.w1", "model.layers.14.block_sparse_moe.experts.6.w1", "model.layers.14.block_sparse_moe.experts.7.w1", "model.layers.14.block_sparse_moe.experts.8.w1", "model.layers.14.block_sparse_moe.experts.9.w1", "model.layers.14.block_sparse_moe.experts.10.w1", "model.layers.14.block_sparse_moe.experts.11.w1", "model.layers.14.block_sparse_moe.experts.12.w1", "model.layers.14.block_sparse_moe.experts.13.w1", "model.layers.14.block_sparse_moe.experts.14.w1", "model.layers.14.block_sparse_moe.experts.15.w1", "model.layers.14.block_sparse_moe.experts.16.w1", "model.layers.14.block_sparse_moe.experts.17.w1", "model.layers.14.block_sparse_moe.experts.18.w1", "model.layers.14.block_sparse_moe.experts.19.w1", "model.layers.14.block_sparse_moe.experts.20.w1", "model.layers.14.block_sparse_moe.experts.21.w1", "model.layers.14.block_sparse_moe.experts.22.w1", "model.layers.14.block_sparse_moe.experts.23.w1", "model.layers.14.block_sparse_moe.experts.24.w1", "model.layers.14.block_sparse_moe.experts.25.w1", "model.layers.14.block_sparse_moe.experts.26.w1", "model.layers.14.block_sparse_moe.experts.27.w1", "model.layers.14.block_sparse_moe.experts.28.w1", "model.layers.14.block_sparse_moe.experts.29.w1", "model.layers.14.block_sparse_moe.experts.30.w1", "model.layers.14.block_sparse_moe.experts.31.w1", "model.layers.14.block_sparse_moe.experts.32.w1", "model.layers.14.block_sparse_moe.experts.33.w1", "model.layers.14.block_sparse_moe.experts.34.w1", "model.layers.14.block_sparse_moe.experts.35.w1", "model.layers.14.block_sparse_moe.experts.36.w1", "model.layers.14.block_sparse_moe.experts.37.w1", "model.layers.14.block_sparse_moe.experts.38.w1", "model.layers.14.block_sparse_moe.experts.39.w1", "model.layers.14.block_sparse_moe.experts.40.w1", "model.layers.14.block_sparse_moe.experts.41.w1", "model.layers.14.block_sparse_moe.experts.42.w1", "model.layers.14.block_sparse_moe.experts.43.w1", "model.layers.14.block_sparse_moe.experts.44.w1", "model.layers.14.block_sparse_moe.experts.45.w1", "model.layers.14.block_sparse_moe.experts.46.w1", "model.layers.14.block_sparse_moe.experts.47.w1", "model.layers.14.block_sparse_moe.experts.48.w1", "model.layers.14.block_sparse_moe.experts.49.w1", "model.layers.14.block_sparse_moe.experts.50.w1", "model.layers.14.block_sparse_moe.experts.51.w1", "model.layers.14.block_sparse_moe.experts.52.w1", "model.layers.14.block_sparse_moe.experts.53.w1", "model.layers.14.block_sparse_moe.experts.54.w1", "model.layers.14.block_sparse_moe.experts.55.w1", "model.layers.14.block_sparse_moe.experts.56.w1", "model.layers.14.block_sparse_moe.experts.57.w1", "model.layers.14.block_sparse_moe.experts.58.w1", "model.layers.14.block_sparse_moe.experts.59.w1", "model.layers.14.block_sparse_moe.experts.60.w1", "model.layers.14.block_sparse_moe.experts.61.w1", "model.layers.14.block_sparse_moe.experts.62.w1", "model.layers.14.block_sparse_moe.experts.63.w1", "model.layers.14.block_sparse_moe.experts.64.w1", "model.layers.14.block_sparse_moe.experts.65.w1", "model.layers.14.block_sparse_moe.experts.66.w1", "model.layers.14.block_sparse_moe.experts.67.w1", "model.layers.14.block_sparse_moe.experts.68.w1", "model.layers.14.block_sparse_moe.experts.69.w1", "model.layers.14.block_sparse_moe.experts.70.w1", "model.layers.14.block_sparse_moe.experts.71.w1", "model.layers.14.block_sparse_moe.experts.72.w1", "model.layers.14.block_sparse_moe.experts.73.w1", "model.layers.14.block_sparse_moe.experts.74.w1", "model.layers.14.block_sparse_moe.experts.75.w1", "model.layers.14.block_sparse_moe.experts.76.w1", "model.layers.14.block_sparse_moe.experts.77.w1", "model.layers.14.block_sparse_moe.experts.78.w1", "model.layers.14.block_sparse_moe.experts.79.w1", "model.layers.14.block_sparse_moe.experts.80.w1", "model.layers.14.block_sparse_moe.experts.81.w1", "model.layers.14.block_sparse_moe.experts.82.w1", "model.layers.14.block_sparse_moe.experts.83.w1", "model.layers.14.block_sparse_moe.experts.84.w1", "model.layers.14.block_sparse_moe.experts.85.w1", "model.layers.14.block_sparse_moe.experts.86.w1", "model.layers.14.block_sparse_moe.experts.87.w1", "model.layers.14.block_sparse_moe.experts.88.w1", "model.layers.14.block_sparse_moe.experts.89.w1", "model.layers.14.block_sparse_moe.experts.90.w1", "model.layers.14.block_sparse_moe.experts.91.w1", "model.layers.14.block_sparse_moe.experts.92.w1", "model.layers.14.block_sparse_moe.experts.93.w1", "model.layers.14.block_sparse_moe.experts.94.w1", "model.layers.14.block_sparse_moe.experts.95.w1", "model.layers.14.block_sparse_moe.experts.96.w1", "model.layers.14.block_sparse_moe.experts.97.w1", "model.layers.14.block_sparse_moe.experts.98.w1", "model.layers.14.block_sparse_moe.experts.99.w1", "model.layers.14.block_sparse_moe.experts.100.w1", "model.layers.14.block_sparse_moe.experts.101.w1", "model.layers.14.block_sparse_moe.experts.102.w1", "model.layers.14.block_sparse_moe.experts.103.w1", "model.layers.14.block_sparse_moe.experts.104.w1", "model.layers.14.block_sparse_moe.experts.105.w1", "model.layers.14.block_sparse_moe.experts.106.w1", "model.layers.14.block_sparse_moe.experts.107.w1", "model.layers.14.block_sparse_moe.experts.108.w1", "model.layers.14.block_sparse_moe.experts.109.w1", "model.layers.14.block_sparse_moe.experts.110.w1", "model.layers.14.block_sparse_moe.experts.111.w1", "model.layers.14.block_sparse_moe.experts.112.w1", "model.layers.14.block_sparse_moe.experts.113.w1", "model.layers.14.block_sparse_moe.experts.114.w1", "model.layers.14.block_sparse_moe.experts.115.w1", "model.layers.14.block_sparse_moe.experts.116.w1", "model.layers.14.block_sparse_moe.experts.117.w1", "model.layers.14.block_sparse_moe.experts.118.w1", "model.layers.14.block_sparse_moe.experts.119.w1", "model.layers.14.block_sparse_moe.experts.120.w1", "model.layers.14.block_sparse_moe.experts.121.w1", "model.layers.14.block_sparse_moe.experts.122.w1", "model.layers.14.block_sparse_moe.experts.123.w1", "model.layers.14.block_sparse_moe.experts.124.w1", "model.layers.14.block_sparse_moe.experts.125.w1", "model.layers.14.block_sparse_moe.experts.126.w1", "model.layers.14.block_sparse_moe.experts.127.w1", "model.layers.14.block_sparse_moe.experts.128.w1", "model.layers.14.block_sparse_moe.experts.129.w1", "model.layers.14.block_sparse_moe.experts.130.w1", "model.layers.14.block_sparse_moe.experts.131.w1", "model.layers.14.block_sparse_moe.experts.132.w1", "model.layers.14.block_sparse_moe.experts.133.w1", "model.layers.14.block_sparse_moe.experts.134.w1", "model.layers.14.block_sparse_moe.experts.135.w1", "model.layers.14.block_sparse_moe.experts.136.w1", "model.layers.14.block_sparse_moe.experts.137.w1", "model.layers.14.block_sparse_moe.experts.138.w1", "model.layers.14.block_sparse_moe.experts.139.w1", "model.layers.14.block_sparse_moe.experts.140.w1", "model.layers.14.block_sparse_moe.experts.141.w1", "model.layers.14.block_sparse_moe.experts.142.w1", "model.layers.14.block_sparse_moe.experts.143.w1", "model.layers.14.block_sparse_moe.experts.144.w1", "model.layers.14.block_sparse_moe.experts.145.w1", "model.layers.14.block_sparse_moe.experts.146.w1", "model.layers.14.block_sparse_moe.experts.147.w1", "model.layers.14.block_sparse_moe.experts.148.w1", "model.layers.14.block_sparse_moe.experts.149.w1", "model.layers.14.block_sparse_moe.experts.150.w1", "model.layers.14.block_sparse_moe.experts.151.w1", "model.layers.14.block_sparse_moe.experts.152.w1", "model.layers.14.block_sparse_moe.experts.153.w1", "model.layers.14.block_sparse_moe.experts.154.w1", "model.layers.14.block_sparse_moe.experts.155.w1", "model.layers.14.block_sparse_moe.experts.156.w1", "model.layers.14.block_sparse_moe.experts.157.w1", "model.layers.14.block_sparse_moe.experts.158.w1", "model.layers.14.block_sparse_moe.experts.159.w1", "model.layers.14.block_sparse_moe.experts.160.w1", "model.layers.14.block_sparse_moe.experts.161.w1", "model.layers.14.block_sparse_moe.experts.162.w1", "model.layers.14.block_sparse_moe.experts.163.w1", "model.layers.14.block_sparse_moe.experts.164.w1", "model.layers.14.block_sparse_moe.experts.165.w1", "model.layers.14.block_sparse_moe.experts.166.w1", "model.layers.14.block_sparse_moe.experts.167.w1", "model.layers.14.block_sparse_moe.experts.168.w1", "model.layers.14.block_sparse_moe.experts.169.w1", "model.layers.14.block_sparse_moe.experts.170.w1", "model.layers.14.block_sparse_moe.experts.171.w1", "model.layers.14.block_sparse_moe.experts.172.w1", "model.layers.14.block_sparse_moe.experts.173.w1", "model.layers.14.block_sparse_moe.experts.174.w1", "model.layers.14.block_sparse_moe.experts.175.w1", "model.layers.14.block_sparse_moe.experts.176.w1", "model.layers.14.block_sparse_moe.experts.177.w1", "model.layers.14.block_sparse_moe.experts.178.w1", "model.layers.14.block_sparse_moe.experts.179.w1", "model.layers.14.block_sparse_moe.experts.180.w1", "model.layers.14.block_sparse_moe.experts.181.w1", "model.layers.14.block_sparse_moe.experts.182.w1", "model.layers.14.block_sparse_moe.experts.183.w1", "model.layers.14.block_sparse_moe.experts.184.w1", "model.layers.14.block_sparse_moe.experts.185.w1", "model.layers.14.block_sparse_moe.experts.186.w1", "model.layers.14.block_sparse_moe.experts.187.w1", "model.layers.14.block_sparse_moe.experts.188.w1", "model.layers.14.block_sparse_moe.experts.189.w1", "model.layers.14.block_sparse_moe.experts.190.w1", "model.layers.14.block_sparse_moe.experts.191.w1", "model.layers.14.block_sparse_moe.experts.192.w1", "model.layers.14.block_sparse_moe.experts.193.w1", "model.layers.14.block_sparse_moe.experts.194.w1", "model.layers.14.block_sparse_moe.experts.195.w1", "model.layers.14.block_sparse_moe.experts.196.w1", "model.layers.14.block_sparse_moe.experts.197.w1", "model.layers.14.block_sparse_moe.experts.198.w1", "model.layers.14.block_sparse_moe.experts.199.w1", "model.layers.14.block_sparse_moe.experts.200.w1", "model.layers.14.block_sparse_moe.experts.201.w1", "model.layers.14.block_sparse_moe.experts.202.w1", "model.layers.14.block_sparse_moe.experts.203.w1", "model.layers.14.block_sparse_moe.experts.204.w1", "model.layers.14.block_sparse_moe.experts.205.w1", "model.layers.14.block_sparse_moe.experts.206.w1", "model.layers.14.block_sparse_moe.experts.207.w1", "model.layers.14.block_sparse_moe.experts.208.w1", "model.layers.14.block_sparse_moe.experts.209.w1", "model.layers.14.block_sparse_moe.experts.210.w1", "model.layers.14.block_sparse_moe.experts.211.w1", "model.layers.14.block_sparse_moe.experts.212.w1", "model.layers.14.block_sparse_moe.experts.213.w1", "model.layers.14.block_sparse_moe.experts.214.w1", "model.layers.14.block_sparse_moe.experts.215.w1", "model.layers.14.block_sparse_moe.experts.216.w1", "model.layers.14.block_sparse_moe.experts.217.w1", "model.layers.14.block_sparse_moe.experts.218.w1", "model.layers.14.block_sparse_moe.experts.219.w1", "model.layers.14.block_sparse_moe.experts.220.w1", "model.layers.14.block_sparse_moe.experts.221.w1", "model.layers.14.block_sparse_moe.experts.222.w1", "model.layers.14.block_sparse_moe.experts.223.w1", "model.layers.14.block_sparse_moe.experts.224.w1", "model.layers.14.block_sparse_moe.experts.225.w1", "model.layers.14.block_sparse_moe.experts.226.w1", "model.layers.14.block_sparse_moe.experts.227.w1", "model.layers.14.block_sparse_moe.experts.228.w1", "model.layers.14.block_sparse_moe.experts.229.w1", "model.layers.14.block_sparse_moe.experts.230.w1", "model.layers.14.block_sparse_moe.experts.231.w1", "model.layers.14.block_sparse_moe.experts.232.w1", "model.layers.14.block_sparse_moe.experts.233.w1", "model.layers.14.block_sparse_moe.experts.234.w1", "model.layers.14.block_sparse_moe.experts.235.w1", "model.layers.14.block_sparse_moe.experts.236.w1", "model.layers.14.block_sparse_moe.experts.237.w1", "model.layers.14.block_sparse_moe.experts.238.w1", "model.layers.14.block_sparse_moe.experts.239.w1", "model.layers.14.block_sparse_moe.experts.240.w1", "model.layers.14.block_sparse_moe.experts.241.w1", "model.layers.14.block_sparse_moe.experts.242.w1", "model.layers.14.block_sparse_moe.experts.243.w1", "model.layers.14.block_sparse_moe.experts.244.w1", "model.layers.14.block_sparse_moe.experts.245.w1", "model.layers.14.block_sparse_moe.experts.246.w1", "model.layers.14.block_sparse_moe.experts.247.w1", "model.layers.14.block_sparse_moe.experts.248.w1", "model.layers.14.block_sparse_moe.experts.249.w1", "model.layers.14.block_sparse_moe.experts.250.w1", "model.layers.14.block_sparse_moe.experts.251.w1", "model.layers.14.block_sparse_moe.experts.252.w1", "model.layers.14.block_sparse_moe.experts.253.w1", "model.layers.14.block_sparse_moe.experts.254.w1", "model.layers.14.block_sparse_moe.experts.255.w1", "model.layers.14.block_sparse_moe.experts.0.w3", "model.layers.14.block_sparse_moe.experts.1.w3", "model.layers.14.block_sparse_moe.experts.2.w3", "model.layers.14.block_sparse_moe.experts.3.w3", "model.layers.14.block_sparse_moe.experts.4.w3", "model.layers.14.block_sparse_moe.experts.5.w3", "model.layers.14.block_sparse_moe.experts.6.w3", "model.layers.14.block_sparse_moe.experts.7.w3", "model.layers.14.block_sparse_moe.experts.8.w3", "model.layers.14.block_sparse_moe.experts.9.w3", "model.layers.14.block_sparse_moe.experts.10.w3", "model.layers.14.block_sparse_moe.experts.11.w3", "model.layers.14.block_sparse_moe.experts.12.w3", "model.layers.14.block_sparse_moe.experts.13.w3", "model.layers.14.block_sparse_moe.experts.14.w3", "model.layers.14.block_sparse_moe.experts.15.w3", "model.layers.14.block_sparse_moe.experts.16.w3", "model.layers.14.block_sparse_moe.experts.17.w3", "model.layers.14.block_sparse_moe.experts.18.w3", "model.layers.14.block_sparse_moe.experts.19.w3", "model.layers.14.block_sparse_moe.experts.20.w3", "model.layers.14.block_sparse_moe.experts.21.w3", "model.layers.14.block_sparse_moe.experts.22.w3", "model.layers.14.block_sparse_moe.experts.23.w3", "model.layers.14.block_sparse_moe.experts.24.w3", "model.layers.14.block_sparse_moe.experts.25.w3", "model.layers.14.block_sparse_moe.experts.26.w3", "model.layers.14.block_sparse_moe.experts.27.w3", "model.layers.14.block_sparse_moe.experts.28.w3", "model.layers.14.block_sparse_moe.experts.29.w3", "model.layers.14.block_sparse_moe.experts.30.w3", "model.layers.14.block_sparse_moe.experts.31.w3", "model.layers.14.block_sparse_moe.experts.32.w3", "model.layers.14.block_sparse_moe.experts.33.w3", "model.layers.14.block_sparse_moe.experts.34.w3", "model.layers.14.block_sparse_moe.experts.35.w3", "model.layers.14.block_sparse_moe.experts.36.w3", "model.layers.14.block_sparse_moe.experts.37.w3", "model.layers.14.block_sparse_moe.experts.38.w3", "model.layers.14.block_sparse_moe.experts.39.w3", "model.layers.14.block_sparse_moe.experts.40.w3", "model.layers.14.block_sparse_moe.experts.41.w3", "model.layers.14.block_sparse_moe.experts.42.w3", "model.layers.14.block_sparse_moe.experts.43.w3", "model.layers.14.block_sparse_moe.experts.44.w3", "model.layers.14.block_sparse_moe.experts.45.w3", "model.layers.14.block_sparse_moe.experts.46.w3", "model.layers.14.block_sparse_moe.experts.47.w3", "model.layers.14.block_sparse_moe.experts.48.w3", "model.layers.14.block_sparse_moe.experts.49.w3", "model.layers.14.block_sparse_moe.experts.50.w3", "model.layers.14.block_sparse_moe.experts.51.w3", "model.layers.14.block_sparse_moe.experts.52.w3", "model.layers.14.block_sparse_moe.experts.53.w3", "model.layers.14.block_sparse_moe.experts.54.w3", "model.layers.14.block_sparse_moe.experts.55.w3", "model.layers.14.block_sparse_moe.experts.56.w3", "model.layers.14.block_sparse_moe.experts.57.w3", "model.layers.14.block_sparse_moe.experts.58.w3", "model.layers.14.block_sparse_moe.experts.59.w3", "model.layers.14.block_sparse_moe.experts.60.w3", "model.layers.14.block_sparse_moe.experts.61.w3", "model.layers.14.block_sparse_moe.experts.62.w3", "model.layers.14.block_sparse_moe.experts.63.w3", "model.layers.14.block_sparse_moe.experts.64.w3", "model.layers.14.block_sparse_moe.experts.65.w3", "model.layers.14.block_sparse_moe.experts.66.w3", "model.layers.14.block_sparse_moe.experts.67.w3", "model.layers.14.block_sparse_moe.experts.68.w3", "model.layers.14.block_sparse_moe.experts.69.w3", "model.layers.14.block_sparse_moe.experts.70.w3", "model.layers.14.block_sparse_moe.experts.71.w3", "model.layers.14.block_sparse_moe.experts.72.w3", "model.layers.14.block_sparse_moe.experts.73.w3", "model.layers.14.block_sparse_moe.experts.74.w3", "model.layers.14.block_sparse_moe.experts.75.w3", "model.layers.14.block_sparse_moe.experts.76.w3", "model.layers.14.block_sparse_moe.experts.77.w3", "model.layers.14.block_sparse_moe.experts.78.w3", "model.layers.14.block_sparse_moe.experts.79.w3", "model.layers.14.block_sparse_moe.experts.80.w3", "model.layers.14.block_sparse_moe.experts.81.w3", "model.layers.14.block_sparse_moe.experts.82.w3", "model.layers.14.block_sparse_moe.experts.83.w3", "model.layers.14.block_sparse_moe.experts.84.w3", "model.layers.14.block_sparse_moe.experts.85.w3", "model.layers.14.block_sparse_moe.experts.86.w3", "model.layers.14.block_sparse_moe.experts.87.w3", "model.layers.14.block_sparse_moe.experts.88.w3", "model.layers.14.block_sparse_moe.experts.89.w3", "model.layers.14.block_sparse_moe.experts.90.w3", "model.layers.14.block_sparse_moe.experts.91.w3", "model.layers.14.block_sparse_moe.experts.92.w3", "model.layers.14.block_sparse_moe.experts.93.w3", "model.layers.14.block_sparse_moe.experts.94.w3", "model.layers.14.block_sparse_moe.experts.95.w3", "model.layers.14.block_sparse_moe.experts.96.w3", "model.layers.14.block_sparse_moe.experts.97.w3", "model.layers.14.block_sparse_moe.experts.98.w3", "model.layers.14.block_sparse_moe.experts.99.w3", "model.layers.14.block_sparse_moe.experts.100.w3", "model.layers.14.block_sparse_moe.experts.101.w3", "model.layers.14.block_sparse_moe.experts.102.w3", "model.layers.14.block_sparse_moe.experts.103.w3", "model.layers.14.block_sparse_moe.experts.104.w3", "model.layers.14.block_sparse_moe.experts.105.w3", "model.layers.14.block_sparse_moe.experts.106.w3", "model.layers.14.block_sparse_moe.experts.107.w3", "model.layers.14.block_sparse_moe.experts.108.w3", "model.layers.14.block_sparse_moe.experts.109.w3", "model.layers.14.block_sparse_moe.experts.110.w3", "model.layers.14.block_sparse_moe.experts.111.w3", "model.layers.14.block_sparse_moe.experts.112.w3", "model.layers.14.block_sparse_moe.experts.113.w3", "model.layers.14.block_sparse_moe.experts.114.w3", "model.layers.14.block_sparse_moe.experts.115.w3", "model.layers.14.block_sparse_moe.experts.116.w3", "model.layers.14.block_sparse_moe.experts.117.w3", "model.layers.14.block_sparse_moe.experts.118.w3", "model.layers.14.block_sparse_moe.experts.119.w3", "model.layers.14.block_sparse_moe.experts.120.w3", "model.layers.14.block_sparse_moe.experts.121.w3", "model.layers.14.block_sparse_moe.experts.122.w3", "model.layers.14.block_sparse_moe.experts.123.w3", "model.layers.14.block_sparse_moe.experts.124.w3", "model.layers.14.block_sparse_moe.experts.125.w3", "model.layers.14.block_sparse_moe.experts.126.w3", "model.layers.14.block_sparse_moe.experts.127.w3", "model.layers.14.block_sparse_moe.experts.128.w3", "model.layers.14.block_sparse_moe.experts.129.w3", "model.layers.14.block_sparse_moe.experts.130.w3", "model.layers.14.block_sparse_moe.experts.131.w3", "model.layers.14.block_sparse_moe.experts.132.w3", "model.layers.14.block_sparse_moe.experts.133.w3", "model.layers.14.block_sparse_moe.experts.134.w3", "model.layers.14.block_sparse_moe.experts.135.w3", "model.layers.14.block_sparse_moe.experts.136.w3", "model.layers.14.block_sparse_moe.experts.137.w3", "model.layers.14.block_sparse_moe.experts.138.w3", "model.layers.14.block_sparse_moe.experts.139.w3", "model.layers.14.block_sparse_moe.experts.140.w3", "model.layers.14.block_sparse_moe.experts.141.w3", "model.layers.14.block_sparse_moe.experts.142.w3", "model.layers.14.block_sparse_moe.experts.143.w3", "model.layers.14.block_sparse_moe.experts.144.w3", "model.layers.14.block_sparse_moe.experts.145.w3", "model.layers.14.block_sparse_moe.experts.146.w3", "model.layers.14.block_sparse_moe.experts.147.w3", "model.layers.14.block_sparse_moe.experts.148.w3", "model.layers.14.block_sparse_moe.experts.149.w3", "model.layers.14.block_sparse_moe.experts.150.w3", "model.layers.14.block_sparse_moe.experts.151.w3", "model.layers.14.block_sparse_moe.experts.152.w3", "model.layers.14.block_sparse_moe.experts.153.w3", "model.layers.14.block_sparse_moe.experts.154.w3", "model.layers.14.block_sparse_moe.experts.155.w3", "model.layers.14.block_sparse_moe.experts.156.w3", "model.layers.14.block_sparse_moe.experts.157.w3", "model.layers.14.block_sparse_moe.experts.158.w3", "model.layers.14.block_sparse_moe.experts.159.w3", "model.layers.14.block_sparse_moe.experts.160.w3", "model.layers.14.block_sparse_moe.experts.161.w3", "model.layers.14.block_sparse_moe.experts.162.w3", "model.layers.14.block_sparse_moe.experts.163.w3", "model.layers.14.block_sparse_moe.experts.164.w3", "model.layers.14.block_sparse_moe.experts.165.w3", "model.layers.14.block_sparse_moe.experts.166.w3", "model.layers.14.block_sparse_moe.experts.167.w3", "model.layers.14.block_sparse_moe.experts.168.w3", "model.layers.14.block_sparse_moe.experts.169.w3", "model.layers.14.block_sparse_moe.experts.170.w3", "model.layers.14.block_sparse_moe.experts.171.w3", "model.layers.14.block_sparse_moe.experts.172.w3", "model.layers.14.block_sparse_moe.experts.173.w3", "model.layers.14.block_sparse_moe.experts.174.w3", "model.layers.14.block_sparse_moe.experts.175.w3", "model.layers.14.block_sparse_moe.experts.176.w3", "model.layers.14.block_sparse_moe.experts.177.w3", "model.layers.14.block_sparse_moe.experts.178.w3", "model.layers.14.block_sparse_moe.experts.179.w3", "model.layers.14.block_sparse_moe.experts.180.w3", "model.layers.14.block_sparse_moe.experts.181.w3", "model.layers.14.block_sparse_moe.experts.182.w3", "model.layers.14.block_sparse_moe.experts.183.w3", "model.layers.14.block_sparse_moe.experts.184.w3", "model.layers.14.block_sparse_moe.experts.185.w3", "model.layers.14.block_sparse_moe.experts.186.w3", "model.layers.14.block_sparse_moe.experts.187.w3", "model.layers.14.block_sparse_moe.experts.188.w3", "model.layers.14.block_sparse_moe.experts.189.w3", "model.layers.14.block_sparse_moe.experts.190.w3", "model.layers.14.block_sparse_moe.experts.191.w3", "model.layers.14.block_sparse_moe.experts.192.w3", "model.layers.14.block_sparse_moe.experts.193.w3", "model.layers.14.block_sparse_moe.experts.194.w3", "model.layers.14.block_sparse_moe.experts.195.w3", "model.layers.14.block_sparse_moe.experts.196.w3", "model.layers.14.block_sparse_moe.experts.197.w3", "model.layers.14.block_sparse_moe.experts.198.w3", "model.layers.14.block_sparse_moe.experts.199.w3", "model.layers.14.block_sparse_moe.experts.200.w3", "model.layers.14.block_sparse_moe.experts.201.w3", "model.layers.14.block_sparse_moe.experts.202.w3", "model.layers.14.block_sparse_moe.experts.203.w3", "model.layers.14.block_sparse_moe.experts.204.w3", "model.layers.14.block_sparse_moe.experts.205.w3", "model.layers.14.block_sparse_moe.experts.206.w3", "model.layers.14.block_sparse_moe.experts.207.w3", "model.layers.14.block_sparse_moe.experts.208.w3", "model.layers.14.block_sparse_moe.experts.209.w3", "model.layers.14.block_sparse_moe.experts.210.w3", "model.layers.14.block_sparse_moe.experts.211.w3", "model.layers.14.block_sparse_moe.experts.212.w3", "model.layers.14.block_sparse_moe.experts.213.w3", "model.layers.14.block_sparse_moe.experts.214.w3", "model.layers.14.block_sparse_moe.experts.215.w3", "model.layers.14.block_sparse_moe.experts.216.w3", "model.layers.14.block_sparse_moe.experts.217.w3", "model.layers.14.block_sparse_moe.experts.218.w3", "model.layers.14.block_sparse_moe.experts.219.w3", "model.layers.14.block_sparse_moe.experts.220.w3", "model.layers.14.block_sparse_moe.experts.221.w3", "model.layers.14.block_sparse_moe.experts.222.w3", "model.layers.14.block_sparse_moe.experts.223.w3", "model.layers.14.block_sparse_moe.experts.224.w3", "model.layers.14.block_sparse_moe.experts.225.w3", "model.layers.14.block_sparse_moe.experts.226.w3", "model.layers.14.block_sparse_moe.experts.227.w3", "model.layers.14.block_sparse_moe.experts.228.w3", "model.layers.14.block_sparse_moe.experts.229.w3", "model.layers.14.block_sparse_moe.experts.230.w3", "model.layers.14.block_sparse_moe.experts.231.w3", "model.layers.14.block_sparse_moe.experts.232.w3", "model.layers.14.block_sparse_moe.experts.233.w3", "model.layers.14.block_sparse_moe.experts.234.w3", "model.layers.14.block_sparse_moe.experts.235.w3", "model.layers.14.block_sparse_moe.experts.236.w3", "model.layers.14.block_sparse_moe.experts.237.w3", "model.layers.14.block_sparse_moe.experts.238.w3", "model.layers.14.block_sparse_moe.experts.239.w3", "model.layers.14.block_sparse_moe.experts.240.w3", "model.layers.14.block_sparse_moe.experts.241.w3", "model.layers.14.block_sparse_moe.experts.242.w3", "model.layers.14.block_sparse_moe.experts.243.w3", "model.layers.14.block_sparse_moe.experts.244.w3", "model.layers.14.block_sparse_moe.experts.245.w3", "model.layers.14.block_sparse_moe.experts.246.w3", "model.layers.14.block_sparse_moe.experts.247.w3", "model.layers.14.block_sparse_moe.experts.248.w3", "model.layers.14.block_sparse_moe.experts.249.w3", "model.layers.14.block_sparse_moe.experts.250.w3", "model.layers.14.block_sparse_moe.experts.251.w3", "model.layers.14.block_sparse_moe.experts.252.w3", "model.layers.14.block_sparse_moe.experts.253.w3", "model.layers.14.block_sparse_moe.experts.254.w3", "model.layers.14.block_sparse_moe.experts.255.w3", "model.layers.14.block_sparse_moe.experts.0.w2", "model.layers.14.block_sparse_moe.experts.1.w2", "model.layers.14.block_sparse_moe.experts.2.w2", "model.layers.14.block_sparse_moe.experts.3.w2", "model.layers.14.block_sparse_moe.experts.4.w2", "model.layers.14.block_sparse_moe.experts.5.w2", "model.layers.14.block_sparse_moe.experts.6.w2", "model.layers.14.block_sparse_moe.experts.7.w2", "model.layers.14.block_sparse_moe.experts.8.w2", "model.layers.14.block_sparse_moe.experts.9.w2", "model.layers.14.block_sparse_moe.experts.10.w2", "model.layers.14.block_sparse_moe.experts.11.w2", "model.layers.14.block_sparse_moe.experts.12.w2", "model.layers.14.block_sparse_moe.experts.13.w2", "model.layers.14.block_sparse_moe.experts.14.w2", "model.layers.14.block_sparse_moe.experts.15.w2", "model.layers.14.block_sparse_moe.experts.16.w2", "model.layers.14.block_sparse_moe.experts.17.w2", "model.layers.14.block_sparse_moe.experts.18.w2", "model.layers.14.block_sparse_moe.experts.19.w2", "model.layers.14.block_sparse_moe.experts.20.w2", "model.layers.14.block_sparse_moe.experts.21.w2", "model.layers.14.block_sparse_moe.experts.22.w2", "model.layers.14.block_sparse_moe.experts.23.w2", "model.layers.14.block_sparse_moe.experts.24.w2", "model.layers.14.block_sparse_moe.experts.25.w2", "model.layers.14.block_sparse_moe.experts.26.w2", "model.layers.14.block_sparse_moe.experts.27.w2", "model.layers.14.block_sparse_moe.experts.28.w2", "model.layers.14.block_sparse_moe.experts.29.w2", "model.layers.14.block_sparse_moe.experts.30.w2", "model.layers.14.block_sparse_moe.experts.31.w2", "model.layers.14.block_sparse_moe.experts.32.w2", "model.layers.14.block_sparse_moe.experts.33.w2", "model.layers.14.block_sparse_moe.experts.34.w2", "model.layers.14.block_sparse_moe.experts.35.w2", "model.layers.14.block_sparse_moe.experts.36.w2", "model.layers.14.block_sparse_moe.experts.37.w2", "model.layers.14.block_sparse_moe.experts.38.w2", "model.layers.14.block_sparse_moe.experts.39.w2", "model.layers.14.block_sparse_moe.experts.40.w2", "model.layers.14.block_sparse_moe.experts.41.w2", "model.layers.14.block_sparse_moe.experts.42.w2", "model.layers.14.block_sparse_moe.experts.43.w2", "model.layers.14.block_sparse_moe.experts.44.w2", "model.layers.14.block_sparse_moe.experts.45.w2", "model.layers.14.block_sparse_moe.experts.46.w2", "model.layers.14.block_sparse_moe.experts.47.w2", "model.layers.14.block_sparse_moe.experts.48.w2", "model.layers.14.block_sparse_moe.experts.49.w2", "model.layers.14.block_sparse_moe.experts.50.w2", "model.layers.14.block_sparse_moe.experts.51.w2", "model.layers.14.block_sparse_moe.experts.52.w2", "model.layers.14.block_sparse_moe.experts.53.w2", "model.layers.14.block_sparse_moe.experts.54.w2", "model.layers.14.block_sparse_moe.experts.55.w2", "model.layers.14.block_sparse_moe.experts.56.w2", "model.layers.14.block_sparse_moe.experts.57.w2", "model.layers.14.block_sparse_moe.experts.58.w2", "model.layers.14.block_sparse_moe.experts.59.w2", "model.layers.14.block_sparse_moe.experts.60.w2", "model.layers.14.block_sparse_moe.experts.61.w2", "model.layers.14.block_sparse_moe.experts.62.w2", "model.layers.14.block_sparse_moe.experts.63.w2", "model.layers.14.block_sparse_moe.experts.64.w2", "model.layers.14.block_sparse_moe.experts.65.w2", "model.layers.14.block_sparse_moe.experts.66.w2", "model.layers.14.block_sparse_moe.experts.67.w2", "model.layers.14.block_sparse_moe.experts.68.w2", "model.layers.14.block_sparse_moe.experts.69.w2", "model.layers.14.block_sparse_moe.experts.70.w2", "model.layers.14.block_sparse_moe.experts.71.w2", "model.layers.14.block_sparse_moe.experts.72.w2", "model.layers.14.block_sparse_moe.experts.73.w2", "model.layers.14.block_sparse_moe.experts.74.w2", "model.layers.14.block_sparse_moe.experts.75.w2", "model.layers.14.block_sparse_moe.experts.76.w2", "model.layers.14.block_sparse_moe.experts.77.w2", "model.layers.14.block_sparse_moe.experts.78.w2", "model.layers.14.block_sparse_moe.experts.79.w2", "model.layers.14.block_sparse_moe.experts.80.w2", "model.layers.14.block_sparse_moe.experts.81.w2", "model.layers.14.block_sparse_moe.experts.82.w2", "model.layers.14.block_sparse_moe.experts.83.w2", "model.layers.14.block_sparse_moe.experts.84.w2", "model.layers.14.block_sparse_moe.experts.85.w2", "model.layers.14.block_sparse_moe.experts.86.w2", "model.layers.14.block_sparse_moe.experts.87.w2", "model.layers.14.block_sparse_moe.experts.88.w2", "model.layers.14.block_sparse_moe.experts.89.w2", "model.layers.14.block_sparse_moe.experts.90.w2", "model.layers.14.block_sparse_moe.experts.91.w2", "model.layers.14.block_sparse_moe.experts.92.w2", "model.layers.14.block_sparse_moe.experts.93.w2", "model.layers.14.block_sparse_moe.experts.94.w2", "model.layers.14.block_sparse_moe.experts.95.w2", "model.layers.14.block_sparse_moe.experts.96.w2", "model.layers.14.block_sparse_moe.experts.97.w2", "model.layers.14.block_sparse_moe.experts.98.w2", "model.layers.14.block_sparse_moe.experts.99.w2", "model.layers.14.block_sparse_moe.experts.100.w2", "model.layers.14.block_sparse_moe.experts.101.w2", "model.layers.14.block_sparse_moe.experts.102.w2", "model.layers.14.block_sparse_moe.experts.103.w2", "model.layers.14.block_sparse_moe.experts.104.w2", "model.layers.14.block_sparse_moe.experts.105.w2", "model.layers.14.block_sparse_moe.experts.106.w2", "model.layers.14.block_sparse_moe.experts.107.w2", "model.layers.14.block_sparse_moe.experts.108.w2", "model.layers.14.block_sparse_moe.experts.109.w2", "model.layers.14.block_sparse_moe.experts.110.w2", "model.layers.14.block_sparse_moe.experts.111.w2", "model.layers.14.block_sparse_moe.experts.112.w2", "model.layers.14.block_sparse_moe.experts.113.w2", "model.layers.14.block_sparse_moe.experts.114.w2", "model.layers.14.block_sparse_moe.experts.115.w2", "model.layers.14.block_sparse_moe.experts.116.w2", "model.layers.14.block_sparse_moe.experts.117.w2", "model.layers.14.block_sparse_moe.experts.118.w2", "model.layers.14.block_sparse_moe.experts.119.w2", "model.layers.14.block_sparse_moe.experts.120.w2", "model.layers.14.block_sparse_moe.experts.121.w2", "model.layers.14.block_sparse_moe.experts.122.w2", "model.layers.14.block_sparse_moe.experts.123.w2", "model.layers.14.block_sparse_moe.experts.124.w2", "model.layers.14.block_sparse_moe.experts.125.w2", "model.layers.14.block_sparse_moe.experts.126.w2", "model.layers.14.block_sparse_moe.experts.127.w2", "model.layers.14.block_sparse_moe.experts.128.w2", "model.layers.14.block_sparse_moe.experts.129.w2", "model.layers.14.block_sparse_moe.experts.130.w2", "model.layers.14.block_sparse_moe.experts.131.w2", "model.layers.14.block_sparse_moe.experts.132.w2", "model.layers.14.block_sparse_moe.experts.133.w2", "model.layers.14.block_sparse_moe.experts.134.w2", "model.layers.14.block_sparse_moe.experts.135.w2", "model.layers.14.block_sparse_moe.experts.136.w2", "model.layers.14.block_sparse_moe.experts.137.w2", "model.layers.14.block_sparse_moe.experts.138.w2", "model.layers.14.block_sparse_moe.experts.139.w2", "model.layers.14.block_sparse_moe.experts.140.w2", "model.layers.14.block_sparse_moe.experts.141.w2", "model.layers.14.block_sparse_moe.experts.142.w2", "model.layers.14.block_sparse_moe.experts.143.w2", "model.layers.14.block_sparse_moe.experts.144.w2", "model.layers.14.block_sparse_moe.experts.145.w2", "model.layers.14.block_sparse_moe.experts.146.w2", "model.layers.14.block_sparse_moe.experts.147.w2", "model.layers.14.block_sparse_moe.experts.148.w2", "model.layers.14.block_sparse_moe.experts.149.w2", "model.layers.14.block_sparse_moe.experts.150.w2", "model.layers.14.block_sparse_moe.experts.151.w2", "model.layers.14.block_sparse_moe.experts.152.w2", "model.layers.14.block_sparse_moe.experts.153.w2", "model.layers.14.block_sparse_moe.experts.154.w2", "model.layers.14.block_sparse_moe.experts.155.w2", "model.layers.14.block_sparse_moe.experts.156.w2", "model.layers.14.block_sparse_moe.experts.157.w2", "model.layers.14.block_sparse_moe.experts.158.w2", "model.layers.14.block_sparse_moe.experts.159.w2", "model.layers.14.block_sparse_moe.experts.160.w2", "model.layers.14.block_sparse_moe.experts.161.w2", "model.layers.14.block_sparse_moe.experts.162.w2", "model.layers.14.block_sparse_moe.experts.163.w2", "model.layers.14.block_sparse_moe.experts.164.w2", "model.layers.14.block_sparse_moe.experts.165.w2", "model.layers.14.block_sparse_moe.experts.166.w2", "model.layers.14.block_sparse_moe.experts.167.w2", "model.layers.14.block_sparse_moe.experts.168.w2", "model.layers.14.block_sparse_moe.experts.169.w2", "model.layers.14.block_sparse_moe.experts.170.w2", "model.layers.14.block_sparse_moe.experts.171.w2", "model.layers.14.block_sparse_moe.experts.172.w2", "model.layers.14.block_sparse_moe.experts.173.w2", "model.layers.14.block_sparse_moe.experts.174.w2", "model.layers.14.block_sparse_moe.experts.175.w2", "model.layers.14.block_sparse_moe.experts.176.w2", "model.layers.14.block_sparse_moe.experts.177.w2", "model.layers.14.block_sparse_moe.experts.178.w2", "model.layers.14.block_sparse_moe.experts.179.w2", "model.layers.14.block_sparse_moe.experts.180.w2", "model.layers.14.block_sparse_moe.experts.181.w2", "model.layers.14.block_sparse_moe.experts.182.w2", "model.layers.14.block_sparse_moe.experts.183.w2", "model.layers.14.block_sparse_moe.experts.184.w2", "model.layers.14.block_sparse_moe.experts.185.w2", "model.layers.14.block_sparse_moe.experts.186.w2", "model.layers.14.block_sparse_moe.experts.187.w2", "model.layers.14.block_sparse_moe.experts.188.w2", "model.layers.14.block_sparse_moe.experts.189.w2", "model.layers.14.block_sparse_moe.experts.190.w2", "model.layers.14.block_sparse_moe.experts.191.w2", "model.layers.14.block_sparse_moe.experts.192.w2", "model.layers.14.block_sparse_moe.experts.193.w2", "model.layers.14.block_sparse_moe.experts.194.w2", "model.layers.14.block_sparse_moe.experts.195.w2", "model.layers.14.block_sparse_moe.experts.196.w2", "model.layers.14.block_sparse_moe.experts.197.w2", "model.layers.14.block_sparse_moe.experts.198.w2", "model.layers.14.block_sparse_moe.experts.199.w2", "model.layers.14.block_sparse_moe.experts.200.w2", "model.layers.14.block_sparse_moe.experts.201.w2", "model.layers.14.block_sparse_moe.experts.202.w2", "model.layers.14.block_sparse_moe.experts.203.w2", "model.layers.14.block_sparse_moe.experts.204.w2", "model.layers.14.block_sparse_moe.experts.205.w2", "model.layers.14.block_sparse_moe.experts.206.w2", "model.layers.14.block_sparse_moe.experts.207.w2", "model.layers.14.block_sparse_moe.experts.208.w2", "model.layers.14.block_sparse_moe.experts.209.w2", "model.layers.14.block_sparse_moe.experts.210.w2", "model.layers.14.block_sparse_moe.experts.211.w2", "model.layers.14.block_sparse_moe.experts.212.w2", "model.layers.14.block_sparse_moe.experts.213.w2", "model.layers.14.block_sparse_moe.experts.214.w2", "model.layers.14.block_sparse_moe.experts.215.w2", "model.layers.14.block_sparse_moe.experts.216.w2", "model.layers.14.block_sparse_moe.experts.217.w2", "model.layers.14.block_sparse_moe.experts.218.w2", "model.layers.14.block_sparse_moe.experts.219.w2", "model.layers.14.block_sparse_moe.experts.220.w2", "model.layers.14.block_sparse_moe.experts.221.w2", "model.layers.14.block_sparse_moe.experts.222.w2", "model.layers.14.block_sparse_moe.experts.223.w2", "model.layers.14.block_sparse_moe.experts.224.w2", "model.layers.14.block_sparse_moe.experts.225.w2", "model.layers.14.block_sparse_moe.experts.226.w2", "model.layers.14.block_sparse_moe.experts.227.w2", "model.layers.14.block_sparse_moe.experts.228.w2", "model.layers.14.block_sparse_moe.experts.229.w2", "model.layers.14.block_sparse_moe.experts.230.w2", "model.layers.14.block_sparse_moe.experts.231.w2", "model.layers.14.block_sparse_moe.experts.232.w2", "model.layers.14.block_sparse_moe.experts.233.w2", "model.layers.14.block_sparse_moe.experts.234.w2", "model.layers.14.block_sparse_moe.experts.235.w2", "model.layers.14.block_sparse_moe.experts.236.w2", "model.layers.14.block_sparse_moe.experts.237.w2", "model.layers.14.block_sparse_moe.experts.238.w2", "model.layers.14.block_sparse_moe.experts.239.w2", "model.layers.14.block_sparse_moe.experts.240.w2", "model.layers.14.block_sparse_moe.experts.241.w2", "model.layers.14.block_sparse_moe.experts.242.w2", "model.layers.14.block_sparse_moe.experts.243.w2", "model.layers.14.block_sparse_moe.experts.244.w2", "model.layers.14.block_sparse_moe.experts.245.w2", "model.layers.14.block_sparse_moe.experts.246.w2", "model.layers.14.block_sparse_moe.experts.247.w2", "model.layers.14.block_sparse_moe.experts.248.w2", "model.layers.14.block_sparse_moe.experts.249.w2", "model.layers.14.block_sparse_moe.experts.250.w2", "model.layers.14.block_sparse_moe.experts.251.w2", "model.layers.14.block_sparse_moe.experts.252.w2", "model.layers.14.block_sparse_moe.experts.253.w2", "model.layers.14.block_sparse_moe.experts.254.w2", "model.layers.14.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006784830242395068, "dbits": 3623878656 } ] }, { "idx": 30, "layers": [ "model.layers.15.self_attn.q_proj", "model.layers.15.self_attn.k_proj", "model.layers.15.self_attn.v_proj", "model.layers.15.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001362588070333015, "dbits": 44040192 } ] }, { "idx": 31, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w1", "model.layers.15.block_sparse_moe.experts.1.w1", "model.layers.15.block_sparse_moe.experts.2.w1", "model.layers.15.block_sparse_moe.experts.3.w1", "model.layers.15.block_sparse_moe.experts.4.w1", "model.layers.15.block_sparse_moe.experts.5.w1", "model.layers.15.block_sparse_moe.experts.6.w1", "model.layers.15.block_sparse_moe.experts.7.w1", "model.layers.15.block_sparse_moe.experts.8.w1", "model.layers.15.block_sparse_moe.experts.9.w1", "model.layers.15.block_sparse_moe.experts.10.w1", "model.layers.15.block_sparse_moe.experts.11.w1", "model.layers.15.block_sparse_moe.experts.12.w1", "model.layers.15.block_sparse_moe.experts.13.w1", "model.layers.15.block_sparse_moe.experts.14.w1", "model.layers.15.block_sparse_moe.experts.15.w1", "model.layers.15.block_sparse_moe.experts.16.w1", "model.layers.15.block_sparse_moe.experts.17.w1", "model.layers.15.block_sparse_moe.experts.18.w1", "model.layers.15.block_sparse_moe.experts.19.w1", "model.layers.15.block_sparse_moe.experts.20.w1", "model.layers.15.block_sparse_moe.experts.21.w1", "model.layers.15.block_sparse_moe.experts.22.w1", "model.layers.15.block_sparse_moe.experts.23.w1", "model.layers.15.block_sparse_moe.experts.24.w1", "model.layers.15.block_sparse_moe.experts.25.w1", "model.layers.15.block_sparse_moe.experts.26.w1", "model.layers.15.block_sparse_moe.experts.27.w1", "model.layers.15.block_sparse_moe.experts.28.w1", "model.layers.15.block_sparse_moe.experts.29.w1", "model.layers.15.block_sparse_moe.experts.30.w1", "model.layers.15.block_sparse_moe.experts.31.w1", "model.layers.15.block_sparse_moe.experts.32.w1", "model.layers.15.block_sparse_moe.experts.33.w1", "model.layers.15.block_sparse_moe.experts.34.w1", "model.layers.15.block_sparse_moe.experts.35.w1", "model.layers.15.block_sparse_moe.experts.36.w1", "model.layers.15.block_sparse_moe.experts.37.w1", "model.layers.15.block_sparse_moe.experts.38.w1", "model.layers.15.block_sparse_moe.experts.39.w1", "model.layers.15.block_sparse_moe.experts.40.w1", "model.layers.15.block_sparse_moe.experts.41.w1", "model.layers.15.block_sparse_moe.experts.42.w1", "model.layers.15.block_sparse_moe.experts.43.w1", "model.layers.15.block_sparse_moe.experts.44.w1", "model.layers.15.block_sparse_moe.experts.45.w1", "model.layers.15.block_sparse_moe.experts.46.w1", "model.layers.15.block_sparse_moe.experts.47.w1", "model.layers.15.block_sparse_moe.experts.48.w1", "model.layers.15.block_sparse_moe.experts.49.w1", "model.layers.15.block_sparse_moe.experts.50.w1", "model.layers.15.block_sparse_moe.experts.51.w1", "model.layers.15.block_sparse_moe.experts.52.w1", "model.layers.15.block_sparse_moe.experts.53.w1", "model.layers.15.block_sparse_moe.experts.54.w1", "model.layers.15.block_sparse_moe.experts.55.w1", "model.layers.15.block_sparse_moe.experts.56.w1", "model.layers.15.block_sparse_moe.experts.57.w1", "model.layers.15.block_sparse_moe.experts.58.w1", "model.layers.15.block_sparse_moe.experts.59.w1", "model.layers.15.block_sparse_moe.experts.60.w1", "model.layers.15.block_sparse_moe.experts.61.w1", "model.layers.15.block_sparse_moe.experts.62.w1", "model.layers.15.block_sparse_moe.experts.63.w1", "model.layers.15.block_sparse_moe.experts.64.w1", "model.layers.15.block_sparse_moe.experts.65.w1", "model.layers.15.block_sparse_moe.experts.66.w1", "model.layers.15.block_sparse_moe.experts.67.w1", "model.layers.15.block_sparse_moe.experts.68.w1", "model.layers.15.block_sparse_moe.experts.69.w1", "model.layers.15.block_sparse_moe.experts.70.w1", "model.layers.15.block_sparse_moe.experts.71.w1", "model.layers.15.block_sparse_moe.experts.72.w1", "model.layers.15.block_sparse_moe.experts.73.w1", "model.layers.15.block_sparse_moe.experts.74.w1", "model.layers.15.block_sparse_moe.experts.75.w1", "model.layers.15.block_sparse_moe.experts.76.w1", "model.layers.15.block_sparse_moe.experts.77.w1", "model.layers.15.block_sparse_moe.experts.78.w1", "model.layers.15.block_sparse_moe.experts.79.w1", "model.layers.15.block_sparse_moe.experts.80.w1", "model.layers.15.block_sparse_moe.experts.81.w1", "model.layers.15.block_sparse_moe.experts.82.w1", "model.layers.15.block_sparse_moe.experts.83.w1", "model.layers.15.block_sparse_moe.experts.84.w1", "model.layers.15.block_sparse_moe.experts.85.w1", "model.layers.15.block_sparse_moe.experts.86.w1", "model.layers.15.block_sparse_moe.experts.87.w1", "model.layers.15.block_sparse_moe.experts.88.w1", "model.layers.15.block_sparse_moe.experts.89.w1", "model.layers.15.block_sparse_moe.experts.90.w1", "model.layers.15.block_sparse_moe.experts.91.w1", "model.layers.15.block_sparse_moe.experts.92.w1", "model.layers.15.block_sparse_moe.experts.93.w1", "model.layers.15.block_sparse_moe.experts.94.w1", "model.layers.15.block_sparse_moe.experts.95.w1", "model.layers.15.block_sparse_moe.experts.96.w1", "model.layers.15.block_sparse_moe.experts.97.w1", "model.layers.15.block_sparse_moe.experts.98.w1", "model.layers.15.block_sparse_moe.experts.99.w1", "model.layers.15.block_sparse_moe.experts.100.w1", "model.layers.15.block_sparse_moe.experts.101.w1", "model.layers.15.block_sparse_moe.experts.102.w1", "model.layers.15.block_sparse_moe.experts.103.w1", "model.layers.15.block_sparse_moe.experts.104.w1", "model.layers.15.block_sparse_moe.experts.105.w1", "model.layers.15.block_sparse_moe.experts.106.w1", "model.layers.15.block_sparse_moe.experts.107.w1", "model.layers.15.block_sparse_moe.experts.108.w1", "model.layers.15.block_sparse_moe.experts.109.w1", "model.layers.15.block_sparse_moe.experts.110.w1", "model.layers.15.block_sparse_moe.experts.111.w1", "model.layers.15.block_sparse_moe.experts.112.w1", "model.layers.15.block_sparse_moe.experts.113.w1", "model.layers.15.block_sparse_moe.experts.114.w1", "model.layers.15.block_sparse_moe.experts.115.w1", "model.layers.15.block_sparse_moe.experts.116.w1", "model.layers.15.block_sparse_moe.experts.117.w1", "model.layers.15.block_sparse_moe.experts.118.w1", "model.layers.15.block_sparse_moe.experts.119.w1", "model.layers.15.block_sparse_moe.experts.120.w1", "model.layers.15.block_sparse_moe.experts.121.w1", "model.layers.15.block_sparse_moe.experts.122.w1", "model.layers.15.block_sparse_moe.experts.123.w1", "model.layers.15.block_sparse_moe.experts.124.w1", "model.layers.15.block_sparse_moe.experts.125.w1", "model.layers.15.block_sparse_moe.experts.126.w1", "model.layers.15.block_sparse_moe.experts.127.w1", "model.layers.15.block_sparse_moe.experts.128.w1", "model.layers.15.block_sparse_moe.experts.129.w1", "model.layers.15.block_sparse_moe.experts.130.w1", "model.layers.15.block_sparse_moe.experts.131.w1", "model.layers.15.block_sparse_moe.experts.132.w1", "model.layers.15.block_sparse_moe.experts.133.w1", "model.layers.15.block_sparse_moe.experts.134.w1", "model.layers.15.block_sparse_moe.experts.135.w1", "model.layers.15.block_sparse_moe.experts.136.w1", "model.layers.15.block_sparse_moe.experts.137.w1", "model.layers.15.block_sparse_moe.experts.138.w1", "model.layers.15.block_sparse_moe.experts.139.w1", "model.layers.15.block_sparse_moe.experts.140.w1", "model.layers.15.block_sparse_moe.experts.141.w1", "model.layers.15.block_sparse_moe.experts.142.w1", "model.layers.15.block_sparse_moe.experts.143.w1", "model.layers.15.block_sparse_moe.experts.144.w1", "model.layers.15.block_sparse_moe.experts.145.w1", "model.layers.15.block_sparse_moe.experts.146.w1", "model.layers.15.block_sparse_moe.experts.147.w1", "model.layers.15.block_sparse_moe.experts.148.w1", "model.layers.15.block_sparse_moe.experts.149.w1", "model.layers.15.block_sparse_moe.experts.150.w1", "model.layers.15.block_sparse_moe.experts.151.w1", "model.layers.15.block_sparse_moe.experts.152.w1", "model.layers.15.block_sparse_moe.experts.153.w1", "model.layers.15.block_sparse_moe.experts.154.w1", "model.layers.15.block_sparse_moe.experts.155.w1", "model.layers.15.block_sparse_moe.experts.156.w1", "model.layers.15.block_sparse_moe.experts.157.w1", "model.layers.15.block_sparse_moe.experts.158.w1", "model.layers.15.block_sparse_moe.experts.159.w1", "model.layers.15.block_sparse_moe.experts.160.w1", "model.layers.15.block_sparse_moe.experts.161.w1", "model.layers.15.block_sparse_moe.experts.162.w1", "model.layers.15.block_sparse_moe.experts.163.w1", "model.layers.15.block_sparse_moe.experts.164.w1", "model.layers.15.block_sparse_moe.experts.165.w1", "model.layers.15.block_sparse_moe.experts.166.w1", "model.layers.15.block_sparse_moe.experts.167.w1", "model.layers.15.block_sparse_moe.experts.168.w1", "model.layers.15.block_sparse_moe.experts.169.w1", "model.layers.15.block_sparse_moe.experts.170.w1", "model.layers.15.block_sparse_moe.experts.171.w1", "model.layers.15.block_sparse_moe.experts.172.w1", "model.layers.15.block_sparse_moe.experts.173.w1", "model.layers.15.block_sparse_moe.experts.174.w1", "model.layers.15.block_sparse_moe.experts.175.w1", "model.layers.15.block_sparse_moe.experts.176.w1", "model.layers.15.block_sparse_moe.experts.177.w1", "model.layers.15.block_sparse_moe.experts.178.w1", "model.layers.15.block_sparse_moe.experts.179.w1", "model.layers.15.block_sparse_moe.experts.180.w1", "model.layers.15.block_sparse_moe.experts.181.w1", "model.layers.15.block_sparse_moe.experts.182.w1", "model.layers.15.block_sparse_moe.experts.183.w1", "model.layers.15.block_sparse_moe.experts.184.w1", "model.layers.15.block_sparse_moe.experts.185.w1", "model.layers.15.block_sparse_moe.experts.186.w1", "model.layers.15.block_sparse_moe.experts.187.w1", "model.layers.15.block_sparse_moe.experts.188.w1", "model.layers.15.block_sparse_moe.experts.189.w1", "model.layers.15.block_sparse_moe.experts.190.w1", "model.layers.15.block_sparse_moe.experts.191.w1", "model.layers.15.block_sparse_moe.experts.192.w1", "model.layers.15.block_sparse_moe.experts.193.w1", "model.layers.15.block_sparse_moe.experts.194.w1", "model.layers.15.block_sparse_moe.experts.195.w1", "model.layers.15.block_sparse_moe.experts.196.w1", "model.layers.15.block_sparse_moe.experts.197.w1", "model.layers.15.block_sparse_moe.experts.198.w1", "model.layers.15.block_sparse_moe.experts.199.w1", "model.layers.15.block_sparse_moe.experts.200.w1", "model.layers.15.block_sparse_moe.experts.201.w1", "model.layers.15.block_sparse_moe.experts.202.w1", "model.layers.15.block_sparse_moe.experts.203.w1", "model.layers.15.block_sparse_moe.experts.204.w1", "model.layers.15.block_sparse_moe.experts.205.w1", "model.layers.15.block_sparse_moe.experts.206.w1", "model.layers.15.block_sparse_moe.experts.207.w1", "model.layers.15.block_sparse_moe.experts.208.w1", "model.layers.15.block_sparse_moe.experts.209.w1", "model.layers.15.block_sparse_moe.experts.210.w1", "model.layers.15.block_sparse_moe.experts.211.w1", "model.layers.15.block_sparse_moe.experts.212.w1", "model.layers.15.block_sparse_moe.experts.213.w1", "model.layers.15.block_sparse_moe.experts.214.w1", "model.layers.15.block_sparse_moe.experts.215.w1", "model.layers.15.block_sparse_moe.experts.216.w1", "model.layers.15.block_sparse_moe.experts.217.w1", "model.layers.15.block_sparse_moe.experts.218.w1", "model.layers.15.block_sparse_moe.experts.219.w1", "model.layers.15.block_sparse_moe.experts.220.w1", "model.layers.15.block_sparse_moe.experts.221.w1", "model.layers.15.block_sparse_moe.experts.222.w1", "model.layers.15.block_sparse_moe.experts.223.w1", "model.layers.15.block_sparse_moe.experts.224.w1", "model.layers.15.block_sparse_moe.experts.225.w1", "model.layers.15.block_sparse_moe.experts.226.w1", "model.layers.15.block_sparse_moe.experts.227.w1", "model.layers.15.block_sparse_moe.experts.228.w1", "model.layers.15.block_sparse_moe.experts.229.w1", "model.layers.15.block_sparse_moe.experts.230.w1", "model.layers.15.block_sparse_moe.experts.231.w1", "model.layers.15.block_sparse_moe.experts.232.w1", "model.layers.15.block_sparse_moe.experts.233.w1", "model.layers.15.block_sparse_moe.experts.234.w1", "model.layers.15.block_sparse_moe.experts.235.w1", "model.layers.15.block_sparse_moe.experts.236.w1", "model.layers.15.block_sparse_moe.experts.237.w1", "model.layers.15.block_sparse_moe.experts.238.w1", "model.layers.15.block_sparse_moe.experts.239.w1", "model.layers.15.block_sparse_moe.experts.240.w1", "model.layers.15.block_sparse_moe.experts.241.w1", "model.layers.15.block_sparse_moe.experts.242.w1", "model.layers.15.block_sparse_moe.experts.243.w1", "model.layers.15.block_sparse_moe.experts.244.w1", "model.layers.15.block_sparse_moe.experts.245.w1", "model.layers.15.block_sparse_moe.experts.246.w1", "model.layers.15.block_sparse_moe.experts.247.w1", "model.layers.15.block_sparse_moe.experts.248.w1", "model.layers.15.block_sparse_moe.experts.249.w1", "model.layers.15.block_sparse_moe.experts.250.w1", "model.layers.15.block_sparse_moe.experts.251.w1", "model.layers.15.block_sparse_moe.experts.252.w1", "model.layers.15.block_sparse_moe.experts.253.w1", "model.layers.15.block_sparse_moe.experts.254.w1", "model.layers.15.block_sparse_moe.experts.255.w1", "model.layers.15.block_sparse_moe.experts.0.w3", "model.layers.15.block_sparse_moe.experts.1.w3", "model.layers.15.block_sparse_moe.experts.2.w3", "model.layers.15.block_sparse_moe.experts.3.w3", "model.layers.15.block_sparse_moe.experts.4.w3", "model.layers.15.block_sparse_moe.experts.5.w3", "model.layers.15.block_sparse_moe.experts.6.w3", "model.layers.15.block_sparse_moe.experts.7.w3", "model.layers.15.block_sparse_moe.experts.8.w3", "model.layers.15.block_sparse_moe.experts.9.w3", "model.layers.15.block_sparse_moe.experts.10.w3", "model.layers.15.block_sparse_moe.experts.11.w3", "model.layers.15.block_sparse_moe.experts.12.w3", "model.layers.15.block_sparse_moe.experts.13.w3", "model.layers.15.block_sparse_moe.experts.14.w3", "model.layers.15.block_sparse_moe.experts.15.w3", "model.layers.15.block_sparse_moe.experts.16.w3", "model.layers.15.block_sparse_moe.experts.17.w3", "model.layers.15.block_sparse_moe.experts.18.w3", "model.layers.15.block_sparse_moe.experts.19.w3", "model.layers.15.block_sparse_moe.experts.20.w3", "model.layers.15.block_sparse_moe.experts.21.w3", "model.layers.15.block_sparse_moe.experts.22.w3", "model.layers.15.block_sparse_moe.experts.23.w3", "model.layers.15.block_sparse_moe.experts.24.w3", "model.layers.15.block_sparse_moe.experts.25.w3", "model.layers.15.block_sparse_moe.experts.26.w3", "model.layers.15.block_sparse_moe.experts.27.w3", "model.layers.15.block_sparse_moe.experts.28.w3", "model.layers.15.block_sparse_moe.experts.29.w3", "model.layers.15.block_sparse_moe.experts.30.w3", "model.layers.15.block_sparse_moe.experts.31.w3", "model.layers.15.block_sparse_moe.experts.32.w3", "model.layers.15.block_sparse_moe.experts.33.w3", "model.layers.15.block_sparse_moe.experts.34.w3", "model.layers.15.block_sparse_moe.experts.35.w3", "model.layers.15.block_sparse_moe.experts.36.w3", "model.layers.15.block_sparse_moe.experts.37.w3", "model.layers.15.block_sparse_moe.experts.38.w3", "model.layers.15.block_sparse_moe.experts.39.w3", "model.layers.15.block_sparse_moe.experts.40.w3", "model.layers.15.block_sparse_moe.experts.41.w3", "model.layers.15.block_sparse_moe.experts.42.w3", "model.layers.15.block_sparse_moe.experts.43.w3", "model.layers.15.block_sparse_moe.experts.44.w3", "model.layers.15.block_sparse_moe.experts.45.w3", "model.layers.15.block_sparse_moe.experts.46.w3", "model.layers.15.block_sparse_moe.experts.47.w3", "model.layers.15.block_sparse_moe.experts.48.w3", "model.layers.15.block_sparse_moe.experts.49.w3", "model.layers.15.block_sparse_moe.experts.50.w3", "model.layers.15.block_sparse_moe.experts.51.w3", "model.layers.15.block_sparse_moe.experts.52.w3", "model.layers.15.block_sparse_moe.experts.53.w3", "model.layers.15.block_sparse_moe.experts.54.w3", "model.layers.15.block_sparse_moe.experts.55.w3", "model.layers.15.block_sparse_moe.experts.56.w3", "model.layers.15.block_sparse_moe.experts.57.w3", "model.layers.15.block_sparse_moe.experts.58.w3", "model.layers.15.block_sparse_moe.experts.59.w3", "model.layers.15.block_sparse_moe.experts.60.w3", "model.layers.15.block_sparse_moe.experts.61.w3", "model.layers.15.block_sparse_moe.experts.62.w3", "model.layers.15.block_sparse_moe.experts.63.w3", "model.layers.15.block_sparse_moe.experts.64.w3", "model.layers.15.block_sparse_moe.experts.65.w3", "model.layers.15.block_sparse_moe.experts.66.w3", "model.layers.15.block_sparse_moe.experts.67.w3", "model.layers.15.block_sparse_moe.experts.68.w3", "model.layers.15.block_sparse_moe.experts.69.w3", "model.layers.15.block_sparse_moe.experts.70.w3", "model.layers.15.block_sparse_moe.experts.71.w3", "model.layers.15.block_sparse_moe.experts.72.w3", "model.layers.15.block_sparse_moe.experts.73.w3", "model.layers.15.block_sparse_moe.experts.74.w3", "model.layers.15.block_sparse_moe.experts.75.w3", "model.layers.15.block_sparse_moe.experts.76.w3", "model.layers.15.block_sparse_moe.experts.77.w3", "model.layers.15.block_sparse_moe.experts.78.w3", "model.layers.15.block_sparse_moe.experts.79.w3", "model.layers.15.block_sparse_moe.experts.80.w3", "model.layers.15.block_sparse_moe.experts.81.w3", "model.layers.15.block_sparse_moe.experts.82.w3", "model.layers.15.block_sparse_moe.experts.83.w3", "model.layers.15.block_sparse_moe.experts.84.w3", "model.layers.15.block_sparse_moe.experts.85.w3", "model.layers.15.block_sparse_moe.experts.86.w3", "model.layers.15.block_sparse_moe.experts.87.w3", "model.layers.15.block_sparse_moe.experts.88.w3", "model.layers.15.block_sparse_moe.experts.89.w3", "model.layers.15.block_sparse_moe.experts.90.w3", "model.layers.15.block_sparse_moe.experts.91.w3", "model.layers.15.block_sparse_moe.experts.92.w3", "model.layers.15.block_sparse_moe.experts.93.w3", "model.layers.15.block_sparse_moe.experts.94.w3", "model.layers.15.block_sparse_moe.experts.95.w3", "model.layers.15.block_sparse_moe.experts.96.w3", "model.layers.15.block_sparse_moe.experts.97.w3", "model.layers.15.block_sparse_moe.experts.98.w3", "model.layers.15.block_sparse_moe.experts.99.w3", "model.layers.15.block_sparse_moe.experts.100.w3", "model.layers.15.block_sparse_moe.experts.101.w3", "model.layers.15.block_sparse_moe.experts.102.w3", "model.layers.15.block_sparse_moe.experts.103.w3", "model.layers.15.block_sparse_moe.experts.104.w3", "model.layers.15.block_sparse_moe.experts.105.w3", "model.layers.15.block_sparse_moe.experts.106.w3", "model.layers.15.block_sparse_moe.experts.107.w3", "model.layers.15.block_sparse_moe.experts.108.w3", "model.layers.15.block_sparse_moe.experts.109.w3", "model.layers.15.block_sparse_moe.experts.110.w3", "model.layers.15.block_sparse_moe.experts.111.w3", "model.layers.15.block_sparse_moe.experts.112.w3", "model.layers.15.block_sparse_moe.experts.113.w3", "model.layers.15.block_sparse_moe.experts.114.w3", "model.layers.15.block_sparse_moe.experts.115.w3", "model.layers.15.block_sparse_moe.experts.116.w3", "model.layers.15.block_sparse_moe.experts.117.w3", "model.layers.15.block_sparse_moe.experts.118.w3", "model.layers.15.block_sparse_moe.experts.119.w3", "model.layers.15.block_sparse_moe.experts.120.w3", "model.layers.15.block_sparse_moe.experts.121.w3", "model.layers.15.block_sparse_moe.experts.122.w3", "model.layers.15.block_sparse_moe.experts.123.w3", "model.layers.15.block_sparse_moe.experts.124.w3", "model.layers.15.block_sparse_moe.experts.125.w3", "model.layers.15.block_sparse_moe.experts.126.w3", "model.layers.15.block_sparse_moe.experts.127.w3", "model.layers.15.block_sparse_moe.experts.128.w3", "model.layers.15.block_sparse_moe.experts.129.w3", "model.layers.15.block_sparse_moe.experts.130.w3", "model.layers.15.block_sparse_moe.experts.131.w3", "model.layers.15.block_sparse_moe.experts.132.w3", "model.layers.15.block_sparse_moe.experts.133.w3", "model.layers.15.block_sparse_moe.experts.134.w3", "model.layers.15.block_sparse_moe.experts.135.w3", "model.layers.15.block_sparse_moe.experts.136.w3", "model.layers.15.block_sparse_moe.experts.137.w3", "model.layers.15.block_sparse_moe.experts.138.w3", "model.layers.15.block_sparse_moe.experts.139.w3", "model.layers.15.block_sparse_moe.experts.140.w3", "model.layers.15.block_sparse_moe.experts.141.w3", "model.layers.15.block_sparse_moe.experts.142.w3", "model.layers.15.block_sparse_moe.experts.143.w3", "model.layers.15.block_sparse_moe.experts.144.w3", "model.layers.15.block_sparse_moe.experts.145.w3", "model.layers.15.block_sparse_moe.experts.146.w3", "model.layers.15.block_sparse_moe.experts.147.w3", "model.layers.15.block_sparse_moe.experts.148.w3", "model.layers.15.block_sparse_moe.experts.149.w3", "model.layers.15.block_sparse_moe.experts.150.w3", "model.layers.15.block_sparse_moe.experts.151.w3", "model.layers.15.block_sparse_moe.experts.152.w3", "model.layers.15.block_sparse_moe.experts.153.w3", "model.layers.15.block_sparse_moe.experts.154.w3", "model.layers.15.block_sparse_moe.experts.155.w3", "model.layers.15.block_sparse_moe.experts.156.w3", "model.layers.15.block_sparse_moe.experts.157.w3", "model.layers.15.block_sparse_moe.experts.158.w3", "model.layers.15.block_sparse_moe.experts.159.w3", "model.layers.15.block_sparse_moe.experts.160.w3", "model.layers.15.block_sparse_moe.experts.161.w3", "model.layers.15.block_sparse_moe.experts.162.w3", "model.layers.15.block_sparse_moe.experts.163.w3", "model.layers.15.block_sparse_moe.experts.164.w3", "model.layers.15.block_sparse_moe.experts.165.w3", "model.layers.15.block_sparse_moe.experts.166.w3", "model.layers.15.block_sparse_moe.experts.167.w3", "model.layers.15.block_sparse_moe.experts.168.w3", "model.layers.15.block_sparse_moe.experts.169.w3", "model.layers.15.block_sparse_moe.experts.170.w3", "model.layers.15.block_sparse_moe.experts.171.w3", "model.layers.15.block_sparse_moe.experts.172.w3", "model.layers.15.block_sparse_moe.experts.173.w3", "model.layers.15.block_sparse_moe.experts.174.w3", "model.layers.15.block_sparse_moe.experts.175.w3", "model.layers.15.block_sparse_moe.experts.176.w3", "model.layers.15.block_sparse_moe.experts.177.w3", "model.layers.15.block_sparse_moe.experts.178.w3", "model.layers.15.block_sparse_moe.experts.179.w3", "model.layers.15.block_sparse_moe.experts.180.w3", "model.layers.15.block_sparse_moe.experts.181.w3", "model.layers.15.block_sparse_moe.experts.182.w3", "model.layers.15.block_sparse_moe.experts.183.w3", "model.layers.15.block_sparse_moe.experts.184.w3", "model.layers.15.block_sparse_moe.experts.185.w3", "model.layers.15.block_sparse_moe.experts.186.w3", "model.layers.15.block_sparse_moe.experts.187.w3", "model.layers.15.block_sparse_moe.experts.188.w3", "model.layers.15.block_sparse_moe.experts.189.w3", "model.layers.15.block_sparse_moe.experts.190.w3", "model.layers.15.block_sparse_moe.experts.191.w3", "model.layers.15.block_sparse_moe.experts.192.w3", "model.layers.15.block_sparse_moe.experts.193.w3", "model.layers.15.block_sparse_moe.experts.194.w3", "model.layers.15.block_sparse_moe.experts.195.w3", "model.layers.15.block_sparse_moe.experts.196.w3", "model.layers.15.block_sparse_moe.experts.197.w3", "model.layers.15.block_sparse_moe.experts.198.w3", "model.layers.15.block_sparse_moe.experts.199.w3", "model.layers.15.block_sparse_moe.experts.200.w3", "model.layers.15.block_sparse_moe.experts.201.w3", "model.layers.15.block_sparse_moe.experts.202.w3", "model.layers.15.block_sparse_moe.experts.203.w3", "model.layers.15.block_sparse_moe.experts.204.w3", "model.layers.15.block_sparse_moe.experts.205.w3", "model.layers.15.block_sparse_moe.experts.206.w3", "model.layers.15.block_sparse_moe.experts.207.w3", "model.layers.15.block_sparse_moe.experts.208.w3", "model.layers.15.block_sparse_moe.experts.209.w3", "model.layers.15.block_sparse_moe.experts.210.w3", "model.layers.15.block_sparse_moe.experts.211.w3", "model.layers.15.block_sparse_moe.experts.212.w3", "model.layers.15.block_sparse_moe.experts.213.w3", "model.layers.15.block_sparse_moe.experts.214.w3", "model.layers.15.block_sparse_moe.experts.215.w3", "model.layers.15.block_sparse_moe.experts.216.w3", "model.layers.15.block_sparse_moe.experts.217.w3", "model.layers.15.block_sparse_moe.experts.218.w3", "model.layers.15.block_sparse_moe.experts.219.w3", "model.layers.15.block_sparse_moe.experts.220.w3", "model.layers.15.block_sparse_moe.experts.221.w3", "model.layers.15.block_sparse_moe.experts.222.w3", "model.layers.15.block_sparse_moe.experts.223.w3", "model.layers.15.block_sparse_moe.experts.224.w3", "model.layers.15.block_sparse_moe.experts.225.w3", "model.layers.15.block_sparse_moe.experts.226.w3", "model.layers.15.block_sparse_moe.experts.227.w3", "model.layers.15.block_sparse_moe.experts.228.w3", "model.layers.15.block_sparse_moe.experts.229.w3", "model.layers.15.block_sparse_moe.experts.230.w3", "model.layers.15.block_sparse_moe.experts.231.w3", "model.layers.15.block_sparse_moe.experts.232.w3", "model.layers.15.block_sparse_moe.experts.233.w3", "model.layers.15.block_sparse_moe.experts.234.w3", "model.layers.15.block_sparse_moe.experts.235.w3", "model.layers.15.block_sparse_moe.experts.236.w3", "model.layers.15.block_sparse_moe.experts.237.w3", "model.layers.15.block_sparse_moe.experts.238.w3", "model.layers.15.block_sparse_moe.experts.239.w3", "model.layers.15.block_sparse_moe.experts.240.w3", "model.layers.15.block_sparse_moe.experts.241.w3", "model.layers.15.block_sparse_moe.experts.242.w3", "model.layers.15.block_sparse_moe.experts.243.w3", "model.layers.15.block_sparse_moe.experts.244.w3", "model.layers.15.block_sparse_moe.experts.245.w3", "model.layers.15.block_sparse_moe.experts.246.w3", "model.layers.15.block_sparse_moe.experts.247.w3", "model.layers.15.block_sparse_moe.experts.248.w3", "model.layers.15.block_sparse_moe.experts.249.w3", "model.layers.15.block_sparse_moe.experts.250.w3", "model.layers.15.block_sparse_moe.experts.251.w3", "model.layers.15.block_sparse_moe.experts.252.w3", "model.layers.15.block_sparse_moe.experts.253.w3", "model.layers.15.block_sparse_moe.experts.254.w3", "model.layers.15.block_sparse_moe.experts.255.w3", "model.layers.15.block_sparse_moe.experts.0.w2", "model.layers.15.block_sparse_moe.experts.1.w2", "model.layers.15.block_sparse_moe.experts.2.w2", "model.layers.15.block_sparse_moe.experts.3.w2", "model.layers.15.block_sparse_moe.experts.4.w2", "model.layers.15.block_sparse_moe.experts.5.w2", "model.layers.15.block_sparse_moe.experts.6.w2", "model.layers.15.block_sparse_moe.experts.7.w2", "model.layers.15.block_sparse_moe.experts.8.w2", "model.layers.15.block_sparse_moe.experts.9.w2", "model.layers.15.block_sparse_moe.experts.10.w2", "model.layers.15.block_sparse_moe.experts.11.w2", "model.layers.15.block_sparse_moe.experts.12.w2", "model.layers.15.block_sparse_moe.experts.13.w2", "model.layers.15.block_sparse_moe.experts.14.w2", "model.layers.15.block_sparse_moe.experts.15.w2", "model.layers.15.block_sparse_moe.experts.16.w2", "model.layers.15.block_sparse_moe.experts.17.w2", "model.layers.15.block_sparse_moe.experts.18.w2", "model.layers.15.block_sparse_moe.experts.19.w2", "model.layers.15.block_sparse_moe.experts.20.w2", "model.layers.15.block_sparse_moe.experts.21.w2", "model.layers.15.block_sparse_moe.experts.22.w2", "model.layers.15.block_sparse_moe.experts.23.w2", "model.layers.15.block_sparse_moe.experts.24.w2", "model.layers.15.block_sparse_moe.experts.25.w2", "model.layers.15.block_sparse_moe.experts.26.w2", "model.layers.15.block_sparse_moe.experts.27.w2", "model.layers.15.block_sparse_moe.experts.28.w2", "model.layers.15.block_sparse_moe.experts.29.w2", "model.layers.15.block_sparse_moe.experts.30.w2", "model.layers.15.block_sparse_moe.experts.31.w2", "model.layers.15.block_sparse_moe.experts.32.w2", "model.layers.15.block_sparse_moe.experts.33.w2", "model.layers.15.block_sparse_moe.experts.34.w2", "model.layers.15.block_sparse_moe.experts.35.w2", "model.layers.15.block_sparse_moe.experts.36.w2", "model.layers.15.block_sparse_moe.experts.37.w2", "model.layers.15.block_sparse_moe.experts.38.w2", "model.layers.15.block_sparse_moe.experts.39.w2", "model.layers.15.block_sparse_moe.experts.40.w2", "model.layers.15.block_sparse_moe.experts.41.w2", "model.layers.15.block_sparse_moe.experts.42.w2", "model.layers.15.block_sparse_moe.experts.43.w2", "model.layers.15.block_sparse_moe.experts.44.w2", "model.layers.15.block_sparse_moe.experts.45.w2", "model.layers.15.block_sparse_moe.experts.46.w2", "model.layers.15.block_sparse_moe.experts.47.w2", "model.layers.15.block_sparse_moe.experts.48.w2", "model.layers.15.block_sparse_moe.experts.49.w2", "model.layers.15.block_sparse_moe.experts.50.w2", "model.layers.15.block_sparse_moe.experts.51.w2", "model.layers.15.block_sparse_moe.experts.52.w2", "model.layers.15.block_sparse_moe.experts.53.w2", "model.layers.15.block_sparse_moe.experts.54.w2", "model.layers.15.block_sparse_moe.experts.55.w2", "model.layers.15.block_sparse_moe.experts.56.w2", "model.layers.15.block_sparse_moe.experts.57.w2", "model.layers.15.block_sparse_moe.experts.58.w2", "model.layers.15.block_sparse_moe.experts.59.w2", "model.layers.15.block_sparse_moe.experts.60.w2", "model.layers.15.block_sparse_moe.experts.61.w2", "model.layers.15.block_sparse_moe.experts.62.w2", "model.layers.15.block_sparse_moe.experts.63.w2", "model.layers.15.block_sparse_moe.experts.64.w2", "model.layers.15.block_sparse_moe.experts.65.w2", "model.layers.15.block_sparse_moe.experts.66.w2", "model.layers.15.block_sparse_moe.experts.67.w2", "model.layers.15.block_sparse_moe.experts.68.w2", "model.layers.15.block_sparse_moe.experts.69.w2", "model.layers.15.block_sparse_moe.experts.70.w2", "model.layers.15.block_sparse_moe.experts.71.w2", "model.layers.15.block_sparse_moe.experts.72.w2", "model.layers.15.block_sparse_moe.experts.73.w2", "model.layers.15.block_sparse_moe.experts.74.w2", "model.layers.15.block_sparse_moe.experts.75.w2", "model.layers.15.block_sparse_moe.experts.76.w2", "model.layers.15.block_sparse_moe.experts.77.w2", "model.layers.15.block_sparse_moe.experts.78.w2", "model.layers.15.block_sparse_moe.experts.79.w2", "model.layers.15.block_sparse_moe.experts.80.w2", "model.layers.15.block_sparse_moe.experts.81.w2", "model.layers.15.block_sparse_moe.experts.82.w2", "model.layers.15.block_sparse_moe.experts.83.w2", "model.layers.15.block_sparse_moe.experts.84.w2", "model.layers.15.block_sparse_moe.experts.85.w2", "model.layers.15.block_sparse_moe.experts.86.w2", "model.layers.15.block_sparse_moe.experts.87.w2", "model.layers.15.block_sparse_moe.experts.88.w2", "model.layers.15.block_sparse_moe.experts.89.w2", "model.layers.15.block_sparse_moe.experts.90.w2", "model.layers.15.block_sparse_moe.experts.91.w2", "model.layers.15.block_sparse_moe.experts.92.w2", "model.layers.15.block_sparse_moe.experts.93.w2", "model.layers.15.block_sparse_moe.experts.94.w2", "model.layers.15.block_sparse_moe.experts.95.w2", "model.layers.15.block_sparse_moe.experts.96.w2", "model.layers.15.block_sparse_moe.experts.97.w2", "model.layers.15.block_sparse_moe.experts.98.w2", "model.layers.15.block_sparse_moe.experts.99.w2", "model.layers.15.block_sparse_moe.experts.100.w2", "model.layers.15.block_sparse_moe.experts.101.w2", "model.layers.15.block_sparse_moe.experts.102.w2", "model.layers.15.block_sparse_moe.experts.103.w2", "model.layers.15.block_sparse_moe.experts.104.w2", "model.layers.15.block_sparse_moe.experts.105.w2", "model.layers.15.block_sparse_moe.experts.106.w2", "model.layers.15.block_sparse_moe.experts.107.w2", "model.layers.15.block_sparse_moe.experts.108.w2", "model.layers.15.block_sparse_moe.experts.109.w2", "model.layers.15.block_sparse_moe.experts.110.w2", "model.layers.15.block_sparse_moe.experts.111.w2", "model.layers.15.block_sparse_moe.experts.112.w2", "model.layers.15.block_sparse_moe.experts.113.w2", "model.layers.15.block_sparse_moe.experts.114.w2", "model.layers.15.block_sparse_moe.experts.115.w2", "model.layers.15.block_sparse_moe.experts.116.w2", "model.layers.15.block_sparse_moe.experts.117.w2", "model.layers.15.block_sparse_moe.experts.118.w2", "model.layers.15.block_sparse_moe.experts.119.w2", "model.layers.15.block_sparse_moe.experts.120.w2", "model.layers.15.block_sparse_moe.experts.121.w2", "model.layers.15.block_sparse_moe.experts.122.w2", "model.layers.15.block_sparse_moe.experts.123.w2", "model.layers.15.block_sparse_moe.experts.124.w2", "model.layers.15.block_sparse_moe.experts.125.w2", "model.layers.15.block_sparse_moe.experts.126.w2", "model.layers.15.block_sparse_moe.experts.127.w2", "model.layers.15.block_sparse_moe.experts.128.w2", "model.layers.15.block_sparse_moe.experts.129.w2", "model.layers.15.block_sparse_moe.experts.130.w2", "model.layers.15.block_sparse_moe.experts.131.w2", "model.layers.15.block_sparse_moe.experts.132.w2", "model.layers.15.block_sparse_moe.experts.133.w2", "model.layers.15.block_sparse_moe.experts.134.w2", "model.layers.15.block_sparse_moe.experts.135.w2", "model.layers.15.block_sparse_moe.experts.136.w2", "model.layers.15.block_sparse_moe.experts.137.w2", "model.layers.15.block_sparse_moe.experts.138.w2", "model.layers.15.block_sparse_moe.experts.139.w2", "model.layers.15.block_sparse_moe.experts.140.w2", "model.layers.15.block_sparse_moe.experts.141.w2", "model.layers.15.block_sparse_moe.experts.142.w2", "model.layers.15.block_sparse_moe.experts.143.w2", "model.layers.15.block_sparse_moe.experts.144.w2", "model.layers.15.block_sparse_moe.experts.145.w2", "model.layers.15.block_sparse_moe.experts.146.w2", "model.layers.15.block_sparse_moe.experts.147.w2", "model.layers.15.block_sparse_moe.experts.148.w2", "model.layers.15.block_sparse_moe.experts.149.w2", "model.layers.15.block_sparse_moe.experts.150.w2", "model.layers.15.block_sparse_moe.experts.151.w2", "model.layers.15.block_sparse_moe.experts.152.w2", "model.layers.15.block_sparse_moe.experts.153.w2", "model.layers.15.block_sparse_moe.experts.154.w2", "model.layers.15.block_sparse_moe.experts.155.w2", "model.layers.15.block_sparse_moe.experts.156.w2", "model.layers.15.block_sparse_moe.experts.157.w2", "model.layers.15.block_sparse_moe.experts.158.w2", "model.layers.15.block_sparse_moe.experts.159.w2", "model.layers.15.block_sparse_moe.experts.160.w2", "model.layers.15.block_sparse_moe.experts.161.w2", "model.layers.15.block_sparse_moe.experts.162.w2", "model.layers.15.block_sparse_moe.experts.163.w2", "model.layers.15.block_sparse_moe.experts.164.w2", "model.layers.15.block_sparse_moe.experts.165.w2", "model.layers.15.block_sparse_moe.experts.166.w2", "model.layers.15.block_sparse_moe.experts.167.w2", "model.layers.15.block_sparse_moe.experts.168.w2", "model.layers.15.block_sparse_moe.experts.169.w2", "model.layers.15.block_sparse_moe.experts.170.w2", "model.layers.15.block_sparse_moe.experts.171.w2", "model.layers.15.block_sparse_moe.experts.172.w2", "model.layers.15.block_sparse_moe.experts.173.w2", "model.layers.15.block_sparse_moe.experts.174.w2", "model.layers.15.block_sparse_moe.experts.175.w2", "model.layers.15.block_sparse_moe.experts.176.w2", "model.layers.15.block_sparse_moe.experts.177.w2", "model.layers.15.block_sparse_moe.experts.178.w2", "model.layers.15.block_sparse_moe.experts.179.w2", "model.layers.15.block_sparse_moe.experts.180.w2", "model.layers.15.block_sparse_moe.experts.181.w2", "model.layers.15.block_sparse_moe.experts.182.w2", "model.layers.15.block_sparse_moe.experts.183.w2", "model.layers.15.block_sparse_moe.experts.184.w2", "model.layers.15.block_sparse_moe.experts.185.w2", "model.layers.15.block_sparse_moe.experts.186.w2", "model.layers.15.block_sparse_moe.experts.187.w2", "model.layers.15.block_sparse_moe.experts.188.w2", "model.layers.15.block_sparse_moe.experts.189.w2", "model.layers.15.block_sparse_moe.experts.190.w2", "model.layers.15.block_sparse_moe.experts.191.w2", "model.layers.15.block_sparse_moe.experts.192.w2", "model.layers.15.block_sparse_moe.experts.193.w2", "model.layers.15.block_sparse_moe.experts.194.w2", "model.layers.15.block_sparse_moe.experts.195.w2", "model.layers.15.block_sparse_moe.experts.196.w2", "model.layers.15.block_sparse_moe.experts.197.w2", "model.layers.15.block_sparse_moe.experts.198.w2", "model.layers.15.block_sparse_moe.experts.199.w2", "model.layers.15.block_sparse_moe.experts.200.w2", "model.layers.15.block_sparse_moe.experts.201.w2", "model.layers.15.block_sparse_moe.experts.202.w2", "model.layers.15.block_sparse_moe.experts.203.w2", "model.layers.15.block_sparse_moe.experts.204.w2", "model.layers.15.block_sparse_moe.experts.205.w2", "model.layers.15.block_sparse_moe.experts.206.w2", "model.layers.15.block_sparse_moe.experts.207.w2", "model.layers.15.block_sparse_moe.experts.208.w2", "model.layers.15.block_sparse_moe.experts.209.w2", "model.layers.15.block_sparse_moe.experts.210.w2", "model.layers.15.block_sparse_moe.experts.211.w2", "model.layers.15.block_sparse_moe.experts.212.w2", "model.layers.15.block_sparse_moe.experts.213.w2", "model.layers.15.block_sparse_moe.experts.214.w2", "model.layers.15.block_sparse_moe.experts.215.w2", "model.layers.15.block_sparse_moe.experts.216.w2", "model.layers.15.block_sparse_moe.experts.217.w2", "model.layers.15.block_sparse_moe.experts.218.w2", "model.layers.15.block_sparse_moe.experts.219.w2", "model.layers.15.block_sparse_moe.experts.220.w2", "model.layers.15.block_sparse_moe.experts.221.w2", "model.layers.15.block_sparse_moe.experts.222.w2", "model.layers.15.block_sparse_moe.experts.223.w2", "model.layers.15.block_sparse_moe.experts.224.w2", "model.layers.15.block_sparse_moe.experts.225.w2", "model.layers.15.block_sparse_moe.experts.226.w2", "model.layers.15.block_sparse_moe.experts.227.w2", "model.layers.15.block_sparse_moe.experts.228.w2", "model.layers.15.block_sparse_moe.experts.229.w2", "model.layers.15.block_sparse_moe.experts.230.w2", "model.layers.15.block_sparse_moe.experts.231.w2", "model.layers.15.block_sparse_moe.experts.232.w2", "model.layers.15.block_sparse_moe.experts.233.w2", "model.layers.15.block_sparse_moe.experts.234.w2", "model.layers.15.block_sparse_moe.experts.235.w2", "model.layers.15.block_sparse_moe.experts.236.w2", "model.layers.15.block_sparse_moe.experts.237.w2", "model.layers.15.block_sparse_moe.experts.238.w2", "model.layers.15.block_sparse_moe.experts.239.w2", "model.layers.15.block_sparse_moe.experts.240.w2", "model.layers.15.block_sparse_moe.experts.241.w2", "model.layers.15.block_sparse_moe.experts.242.w2", "model.layers.15.block_sparse_moe.experts.243.w2", "model.layers.15.block_sparse_moe.experts.244.w2", "model.layers.15.block_sparse_moe.experts.245.w2", "model.layers.15.block_sparse_moe.experts.246.w2", "model.layers.15.block_sparse_moe.experts.247.w2", "model.layers.15.block_sparse_moe.experts.248.w2", "model.layers.15.block_sparse_moe.experts.249.w2", "model.layers.15.block_sparse_moe.experts.250.w2", "model.layers.15.block_sparse_moe.experts.251.w2", "model.layers.15.block_sparse_moe.experts.252.w2", "model.layers.15.block_sparse_moe.experts.253.w2", "model.layers.15.block_sparse_moe.experts.254.w2", "model.layers.15.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0020456749945879094, "dbits": 3623878656 } ] }, { "idx": 32, "layers": [ "model.layers.16.self_attn.q_proj", "model.layers.16.self_attn.k_proj", "model.layers.16.self_attn.v_proj", "model.layers.16.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0040488094091415405, "dbits": 44040192 } ] }, { "idx": 33, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w1", "model.layers.16.block_sparse_moe.experts.1.w1", "model.layers.16.block_sparse_moe.experts.2.w1", "model.layers.16.block_sparse_moe.experts.3.w1", "model.layers.16.block_sparse_moe.experts.4.w1", "model.layers.16.block_sparse_moe.experts.5.w1", "model.layers.16.block_sparse_moe.experts.6.w1", "model.layers.16.block_sparse_moe.experts.7.w1", "model.layers.16.block_sparse_moe.experts.8.w1", "model.layers.16.block_sparse_moe.experts.9.w1", "model.layers.16.block_sparse_moe.experts.10.w1", "model.layers.16.block_sparse_moe.experts.11.w1", "model.layers.16.block_sparse_moe.experts.12.w1", "model.layers.16.block_sparse_moe.experts.13.w1", "model.layers.16.block_sparse_moe.experts.14.w1", "model.layers.16.block_sparse_moe.experts.15.w1", "model.layers.16.block_sparse_moe.experts.16.w1", "model.layers.16.block_sparse_moe.experts.17.w1", "model.layers.16.block_sparse_moe.experts.18.w1", "model.layers.16.block_sparse_moe.experts.19.w1", "model.layers.16.block_sparse_moe.experts.20.w1", "model.layers.16.block_sparse_moe.experts.21.w1", "model.layers.16.block_sparse_moe.experts.22.w1", "model.layers.16.block_sparse_moe.experts.23.w1", "model.layers.16.block_sparse_moe.experts.24.w1", "model.layers.16.block_sparse_moe.experts.25.w1", "model.layers.16.block_sparse_moe.experts.26.w1", "model.layers.16.block_sparse_moe.experts.27.w1", "model.layers.16.block_sparse_moe.experts.28.w1", "model.layers.16.block_sparse_moe.experts.29.w1", "model.layers.16.block_sparse_moe.experts.30.w1", "model.layers.16.block_sparse_moe.experts.31.w1", "model.layers.16.block_sparse_moe.experts.32.w1", "model.layers.16.block_sparse_moe.experts.33.w1", "model.layers.16.block_sparse_moe.experts.34.w1", "model.layers.16.block_sparse_moe.experts.35.w1", "model.layers.16.block_sparse_moe.experts.36.w1", "model.layers.16.block_sparse_moe.experts.37.w1", "model.layers.16.block_sparse_moe.experts.38.w1", "model.layers.16.block_sparse_moe.experts.39.w1", "model.layers.16.block_sparse_moe.experts.40.w1", "model.layers.16.block_sparse_moe.experts.41.w1", "model.layers.16.block_sparse_moe.experts.42.w1", "model.layers.16.block_sparse_moe.experts.43.w1", "model.layers.16.block_sparse_moe.experts.44.w1", "model.layers.16.block_sparse_moe.experts.45.w1", "model.layers.16.block_sparse_moe.experts.46.w1", "model.layers.16.block_sparse_moe.experts.47.w1", "model.layers.16.block_sparse_moe.experts.48.w1", "model.layers.16.block_sparse_moe.experts.49.w1", "model.layers.16.block_sparse_moe.experts.50.w1", "model.layers.16.block_sparse_moe.experts.51.w1", "model.layers.16.block_sparse_moe.experts.52.w1", "model.layers.16.block_sparse_moe.experts.53.w1", "model.layers.16.block_sparse_moe.experts.54.w1", "model.layers.16.block_sparse_moe.experts.55.w1", "model.layers.16.block_sparse_moe.experts.56.w1", "model.layers.16.block_sparse_moe.experts.57.w1", "model.layers.16.block_sparse_moe.experts.58.w1", "model.layers.16.block_sparse_moe.experts.59.w1", "model.layers.16.block_sparse_moe.experts.60.w1", "model.layers.16.block_sparse_moe.experts.61.w1", "model.layers.16.block_sparse_moe.experts.62.w1", "model.layers.16.block_sparse_moe.experts.63.w1", "model.layers.16.block_sparse_moe.experts.64.w1", "model.layers.16.block_sparse_moe.experts.65.w1", "model.layers.16.block_sparse_moe.experts.66.w1", "model.layers.16.block_sparse_moe.experts.67.w1", "model.layers.16.block_sparse_moe.experts.68.w1", "model.layers.16.block_sparse_moe.experts.69.w1", "model.layers.16.block_sparse_moe.experts.70.w1", "model.layers.16.block_sparse_moe.experts.71.w1", "model.layers.16.block_sparse_moe.experts.72.w1", "model.layers.16.block_sparse_moe.experts.73.w1", "model.layers.16.block_sparse_moe.experts.74.w1", "model.layers.16.block_sparse_moe.experts.75.w1", "model.layers.16.block_sparse_moe.experts.76.w1", "model.layers.16.block_sparse_moe.experts.77.w1", "model.layers.16.block_sparse_moe.experts.78.w1", "model.layers.16.block_sparse_moe.experts.79.w1", "model.layers.16.block_sparse_moe.experts.80.w1", "model.layers.16.block_sparse_moe.experts.81.w1", "model.layers.16.block_sparse_moe.experts.82.w1", "model.layers.16.block_sparse_moe.experts.83.w1", "model.layers.16.block_sparse_moe.experts.84.w1", "model.layers.16.block_sparse_moe.experts.85.w1", "model.layers.16.block_sparse_moe.experts.86.w1", "model.layers.16.block_sparse_moe.experts.87.w1", "model.layers.16.block_sparse_moe.experts.88.w1", "model.layers.16.block_sparse_moe.experts.89.w1", "model.layers.16.block_sparse_moe.experts.90.w1", "model.layers.16.block_sparse_moe.experts.91.w1", "model.layers.16.block_sparse_moe.experts.92.w1", "model.layers.16.block_sparse_moe.experts.93.w1", "model.layers.16.block_sparse_moe.experts.94.w1", "model.layers.16.block_sparse_moe.experts.95.w1", "model.layers.16.block_sparse_moe.experts.96.w1", "model.layers.16.block_sparse_moe.experts.97.w1", "model.layers.16.block_sparse_moe.experts.98.w1", "model.layers.16.block_sparse_moe.experts.99.w1", "model.layers.16.block_sparse_moe.experts.100.w1", "model.layers.16.block_sparse_moe.experts.101.w1", "model.layers.16.block_sparse_moe.experts.102.w1", "model.layers.16.block_sparse_moe.experts.103.w1", "model.layers.16.block_sparse_moe.experts.104.w1", "model.layers.16.block_sparse_moe.experts.105.w1", "model.layers.16.block_sparse_moe.experts.106.w1", "model.layers.16.block_sparse_moe.experts.107.w1", "model.layers.16.block_sparse_moe.experts.108.w1", "model.layers.16.block_sparse_moe.experts.109.w1", "model.layers.16.block_sparse_moe.experts.110.w1", "model.layers.16.block_sparse_moe.experts.111.w1", "model.layers.16.block_sparse_moe.experts.112.w1", "model.layers.16.block_sparse_moe.experts.113.w1", "model.layers.16.block_sparse_moe.experts.114.w1", "model.layers.16.block_sparse_moe.experts.115.w1", "model.layers.16.block_sparse_moe.experts.116.w1", "model.layers.16.block_sparse_moe.experts.117.w1", "model.layers.16.block_sparse_moe.experts.118.w1", "model.layers.16.block_sparse_moe.experts.119.w1", "model.layers.16.block_sparse_moe.experts.120.w1", "model.layers.16.block_sparse_moe.experts.121.w1", "model.layers.16.block_sparse_moe.experts.122.w1", "model.layers.16.block_sparse_moe.experts.123.w1", "model.layers.16.block_sparse_moe.experts.124.w1", "model.layers.16.block_sparse_moe.experts.125.w1", "model.layers.16.block_sparse_moe.experts.126.w1", "model.layers.16.block_sparse_moe.experts.127.w1", "model.layers.16.block_sparse_moe.experts.128.w1", "model.layers.16.block_sparse_moe.experts.129.w1", "model.layers.16.block_sparse_moe.experts.130.w1", "model.layers.16.block_sparse_moe.experts.131.w1", "model.layers.16.block_sparse_moe.experts.132.w1", "model.layers.16.block_sparse_moe.experts.133.w1", "model.layers.16.block_sparse_moe.experts.134.w1", "model.layers.16.block_sparse_moe.experts.135.w1", "model.layers.16.block_sparse_moe.experts.136.w1", "model.layers.16.block_sparse_moe.experts.137.w1", "model.layers.16.block_sparse_moe.experts.138.w1", "model.layers.16.block_sparse_moe.experts.139.w1", "model.layers.16.block_sparse_moe.experts.140.w1", "model.layers.16.block_sparse_moe.experts.141.w1", "model.layers.16.block_sparse_moe.experts.142.w1", "model.layers.16.block_sparse_moe.experts.143.w1", "model.layers.16.block_sparse_moe.experts.144.w1", "model.layers.16.block_sparse_moe.experts.145.w1", "model.layers.16.block_sparse_moe.experts.146.w1", "model.layers.16.block_sparse_moe.experts.147.w1", "model.layers.16.block_sparse_moe.experts.148.w1", "model.layers.16.block_sparse_moe.experts.149.w1", "model.layers.16.block_sparse_moe.experts.150.w1", "model.layers.16.block_sparse_moe.experts.151.w1", "model.layers.16.block_sparse_moe.experts.152.w1", "model.layers.16.block_sparse_moe.experts.153.w1", "model.layers.16.block_sparse_moe.experts.154.w1", "model.layers.16.block_sparse_moe.experts.155.w1", "model.layers.16.block_sparse_moe.experts.156.w1", "model.layers.16.block_sparse_moe.experts.157.w1", "model.layers.16.block_sparse_moe.experts.158.w1", "model.layers.16.block_sparse_moe.experts.159.w1", "model.layers.16.block_sparse_moe.experts.160.w1", "model.layers.16.block_sparse_moe.experts.161.w1", "model.layers.16.block_sparse_moe.experts.162.w1", "model.layers.16.block_sparse_moe.experts.163.w1", "model.layers.16.block_sparse_moe.experts.164.w1", "model.layers.16.block_sparse_moe.experts.165.w1", "model.layers.16.block_sparse_moe.experts.166.w1", "model.layers.16.block_sparse_moe.experts.167.w1", "model.layers.16.block_sparse_moe.experts.168.w1", "model.layers.16.block_sparse_moe.experts.169.w1", "model.layers.16.block_sparse_moe.experts.170.w1", "model.layers.16.block_sparse_moe.experts.171.w1", "model.layers.16.block_sparse_moe.experts.172.w1", "model.layers.16.block_sparse_moe.experts.173.w1", "model.layers.16.block_sparse_moe.experts.174.w1", "model.layers.16.block_sparse_moe.experts.175.w1", "model.layers.16.block_sparse_moe.experts.176.w1", "model.layers.16.block_sparse_moe.experts.177.w1", "model.layers.16.block_sparse_moe.experts.178.w1", "model.layers.16.block_sparse_moe.experts.179.w1", "model.layers.16.block_sparse_moe.experts.180.w1", "model.layers.16.block_sparse_moe.experts.181.w1", "model.layers.16.block_sparse_moe.experts.182.w1", "model.layers.16.block_sparse_moe.experts.183.w1", "model.layers.16.block_sparse_moe.experts.184.w1", "model.layers.16.block_sparse_moe.experts.185.w1", "model.layers.16.block_sparse_moe.experts.186.w1", "model.layers.16.block_sparse_moe.experts.187.w1", "model.layers.16.block_sparse_moe.experts.188.w1", "model.layers.16.block_sparse_moe.experts.189.w1", "model.layers.16.block_sparse_moe.experts.190.w1", "model.layers.16.block_sparse_moe.experts.191.w1", "model.layers.16.block_sparse_moe.experts.192.w1", "model.layers.16.block_sparse_moe.experts.193.w1", "model.layers.16.block_sparse_moe.experts.194.w1", "model.layers.16.block_sparse_moe.experts.195.w1", "model.layers.16.block_sparse_moe.experts.196.w1", "model.layers.16.block_sparse_moe.experts.197.w1", "model.layers.16.block_sparse_moe.experts.198.w1", "model.layers.16.block_sparse_moe.experts.199.w1", "model.layers.16.block_sparse_moe.experts.200.w1", "model.layers.16.block_sparse_moe.experts.201.w1", "model.layers.16.block_sparse_moe.experts.202.w1", "model.layers.16.block_sparse_moe.experts.203.w1", "model.layers.16.block_sparse_moe.experts.204.w1", "model.layers.16.block_sparse_moe.experts.205.w1", "model.layers.16.block_sparse_moe.experts.206.w1", "model.layers.16.block_sparse_moe.experts.207.w1", "model.layers.16.block_sparse_moe.experts.208.w1", "model.layers.16.block_sparse_moe.experts.209.w1", "model.layers.16.block_sparse_moe.experts.210.w1", "model.layers.16.block_sparse_moe.experts.211.w1", "model.layers.16.block_sparse_moe.experts.212.w1", "model.layers.16.block_sparse_moe.experts.213.w1", "model.layers.16.block_sparse_moe.experts.214.w1", "model.layers.16.block_sparse_moe.experts.215.w1", "model.layers.16.block_sparse_moe.experts.216.w1", "model.layers.16.block_sparse_moe.experts.217.w1", "model.layers.16.block_sparse_moe.experts.218.w1", "model.layers.16.block_sparse_moe.experts.219.w1", "model.layers.16.block_sparse_moe.experts.220.w1", "model.layers.16.block_sparse_moe.experts.221.w1", "model.layers.16.block_sparse_moe.experts.222.w1", "model.layers.16.block_sparse_moe.experts.223.w1", "model.layers.16.block_sparse_moe.experts.224.w1", "model.layers.16.block_sparse_moe.experts.225.w1", "model.layers.16.block_sparse_moe.experts.226.w1", "model.layers.16.block_sparse_moe.experts.227.w1", "model.layers.16.block_sparse_moe.experts.228.w1", "model.layers.16.block_sparse_moe.experts.229.w1", "model.layers.16.block_sparse_moe.experts.230.w1", "model.layers.16.block_sparse_moe.experts.231.w1", "model.layers.16.block_sparse_moe.experts.232.w1", "model.layers.16.block_sparse_moe.experts.233.w1", "model.layers.16.block_sparse_moe.experts.234.w1", "model.layers.16.block_sparse_moe.experts.235.w1", "model.layers.16.block_sparse_moe.experts.236.w1", "model.layers.16.block_sparse_moe.experts.237.w1", "model.layers.16.block_sparse_moe.experts.238.w1", "model.layers.16.block_sparse_moe.experts.239.w1", "model.layers.16.block_sparse_moe.experts.240.w1", "model.layers.16.block_sparse_moe.experts.241.w1", "model.layers.16.block_sparse_moe.experts.242.w1", "model.layers.16.block_sparse_moe.experts.243.w1", "model.layers.16.block_sparse_moe.experts.244.w1", "model.layers.16.block_sparse_moe.experts.245.w1", "model.layers.16.block_sparse_moe.experts.246.w1", "model.layers.16.block_sparse_moe.experts.247.w1", "model.layers.16.block_sparse_moe.experts.248.w1", "model.layers.16.block_sparse_moe.experts.249.w1", "model.layers.16.block_sparse_moe.experts.250.w1", "model.layers.16.block_sparse_moe.experts.251.w1", "model.layers.16.block_sparse_moe.experts.252.w1", "model.layers.16.block_sparse_moe.experts.253.w1", "model.layers.16.block_sparse_moe.experts.254.w1", "model.layers.16.block_sparse_moe.experts.255.w1", "model.layers.16.block_sparse_moe.experts.0.w3", "model.layers.16.block_sparse_moe.experts.1.w3", "model.layers.16.block_sparse_moe.experts.2.w3", "model.layers.16.block_sparse_moe.experts.3.w3", "model.layers.16.block_sparse_moe.experts.4.w3", "model.layers.16.block_sparse_moe.experts.5.w3", "model.layers.16.block_sparse_moe.experts.6.w3", "model.layers.16.block_sparse_moe.experts.7.w3", "model.layers.16.block_sparse_moe.experts.8.w3", "model.layers.16.block_sparse_moe.experts.9.w3", "model.layers.16.block_sparse_moe.experts.10.w3", "model.layers.16.block_sparse_moe.experts.11.w3", "model.layers.16.block_sparse_moe.experts.12.w3", "model.layers.16.block_sparse_moe.experts.13.w3", "model.layers.16.block_sparse_moe.experts.14.w3", "model.layers.16.block_sparse_moe.experts.15.w3", "model.layers.16.block_sparse_moe.experts.16.w3", "model.layers.16.block_sparse_moe.experts.17.w3", "model.layers.16.block_sparse_moe.experts.18.w3", "model.layers.16.block_sparse_moe.experts.19.w3", "model.layers.16.block_sparse_moe.experts.20.w3", "model.layers.16.block_sparse_moe.experts.21.w3", "model.layers.16.block_sparse_moe.experts.22.w3", "model.layers.16.block_sparse_moe.experts.23.w3", "model.layers.16.block_sparse_moe.experts.24.w3", "model.layers.16.block_sparse_moe.experts.25.w3", "model.layers.16.block_sparse_moe.experts.26.w3", "model.layers.16.block_sparse_moe.experts.27.w3", "model.layers.16.block_sparse_moe.experts.28.w3", "model.layers.16.block_sparse_moe.experts.29.w3", "model.layers.16.block_sparse_moe.experts.30.w3", "model.layers.16.block_sparse_moe.experts.31.w3", "model.layers.16.block_sparse_moe.experts.32.w3", "model.layers.16.block_sparse_moe.experts.33.w3", "model.layers.16.block_sparse_moe.experts.34.w3", "model.layers.16.block_sparse_moe.experts.35.w3", "model.layers.16.block_sparse_moe.experts.36.w3", "model.layers.16.block_sparse_moe.experts.37.w3", "model.layers.16.block_sparse_moe.experts.38.w3", "model.layers.16.block_sparse_moe.experts.39.w3", "model.layers.16.block_sparse_moe.experts.40.w3", "model.layers.16.block_sparse_moe.experts.41.w3", "model.layers.16.block_sparse_moe.experts.42.w3", "model.layers.16.block_sparse_moe.experts.43.w3", "model.layers.16.block_sparse_moe.experts.44.w3", "model.layers.16.block_sparse_moe.experts.45.w3", "model.layers.16.block_sparse_moe.experts.46.w3", "model.layers.16.block_sparse_moe.experts.47.w3", "model.layers.16.block_sparse_moe.experts.48.w3", "model.layers.16.block_sparse_moe.experts.49.w3", "model.layers.16.block_sparse_moe.experts.50.w3", "model.layers.16.block_sparse_moe.experts.51.w3", "model.layers.16.block_sparse_moe.experts.52.w3", "model.layers.16.block_sparse_moe.experts.53.w3", "model.layers.16.block_sparse_moe.experts.54.w3", "model.layers.16.block_sparse_moe.experts.55.w3", "model.layers.16.block_sparse_moe.experts.56.w3", "model.layers.16.block_sparse_moe.experts.57.w3", "model.layers.16.block_sparse_moe.experts.58.w3", "model.layers.16.block_sparse_moe.experts.59.w3", "model.layers.16.block_sparse_moe.experts.60.w3", "model.layers.16.block_sparse_moe.experts.61.w3", "model.layers.16.block_sparse_moe.experts.62.w3", "model.layers.16.block_sparse_moe.experts.63.w3", "model.layers.16.block_sparse_moe.experts.64.w3", "model.layers.16.block_sparse_moe.experts.65.w3", "model.layers.16.block_sparse_moe.experts.66.w3", "model.layers.16.block_sparse_moe.experts.67.w3", "model.layers.16.block_sparse_moe.experts.68.w3", "model.layers.16.block_sparse_moe.experts.69.w3", "model.layers.16.block_sparse_moe.experts.70.w3", "model.layers.16.block_sparse_moe.experts.71.w3", "model.layers.16.block_sparse_moe.experts.72.w3", "model.layers.16.block_sparse_moe.experts.73.w3", "model.layers.16.block_sparse_moe.experts.74.w3", "model.layers.16.block_sparse_moe.experts.75.w3", "model.layers.16.block_sparse_moe.experts.76.w3", "model.layers.16.block_sparse_moe.experts.77.w3", "model.layers.16.block_sparse_moe.experts.78.w3", "model.layers.16.block_sparse_moe.experts.79.w3", "model.layers.16.block_sparse_moe.experts.80.w3", "model.layers.16.block_sparse_moe.experts.81.w3", "model.layers.16.block_sparse_moe.experts.82.w3", "model.layers.16.block_sparse_moe.experts.83.w3", "model.layers.16.block_sparse_moe.experts.84.w3", "model.layers.16.block_sparse_moe.experts.85.w3", "model.layers.16.block_sparse_moe.experts.86.w3", "model.layers.16.block_sparse_moe.experts.87.w3", "model.layers.16.block_sparse_moe.experts.88.w3", "model.layers.16.block_sparse_moe.experts.89.w3", "model.layers.16.block_sparse_moe.experts.90.w3", "model.layers.16.block_sparse_moe.experts.91.w3", "model.layers.16.block_sparse_moe.experts.92.w3", "model.layers.16.block_sparse_moe.experts.93.w3", "model.layers.16.block_sparse_moe.experts.94.w3", "model.layers.16.block_sparse_moe.experts.95.w3", "model.layers.16.block_sparse_moe.experts.96.w3", "model.layers.16.block_sparse_moe.experts.97.w3", "model.layers.16.block_sparse_moe.experts.98.w3", "model.layers.16.block_sparse_moe.experts.99.w3", "model.layers.16.block_sparse_moe.experts.100.w3", "model.layers.16.block_sparse_moe.experts.101.w3", "model.layers.16.block_sparse_moe.experts.102.w3", "model.layers.16.block_sparse_moe.experts.103.w3", "model.layers.16.block_sparse_moe.experts.104.w3", "model.layers.16.block_sparse_moe.experts.105.w3", "model.layers.16.block_sparse_moe.experts.106.w3", "model.layers.16.block_sparse_moe.experts.107.w3", "model.layers.16.block_sparse_moe.experts.108.w3", "model.layers.16.block_sparse_moe.experts.109.w3", "model.layers.16.block_sparse_moe.experts.110.w3", "model.layers.16.block_sparse_moe.experts.111.w3", "model.layers.16.block_sparse_moe.experts.112.w3", "model.layers.16.block_sparse_moe.experts.113.w3", "model.layers.16.block_sparse_moe.experts.114.w3", "model.layers.16.block_sparse_moe.experts.115.w3", "model.layers.16.block_sparse_moe.experts.116.w3", "model.layers.16.block_sparse_moe.experts.117.w3", "model.layers.16.block_sparse_moe.experts.118.w3", "model.layers.16.block_sparse_moe.experts.119.w3", "model.layers.16.block_sparse_moe.experts.120.w3", "model.layers.16.block_sparse_moe.experts.121.w3", "model.layers.16.block_sparse_moe.experts.122.w3", "model.layers.16.block_sparse_moe.experts.123.w3", "model.layers.16.block_sparse_moe.experts.124.w3", "model.layers.16.block_sparse_moe.experts.125.w3", "model.layers.16.block_sparse_moe.experts.126.w3", "model.layers.16.block_sparse_moe.experts.127.w3", "model.layers.16.block_sparse_moe.experts.128.w3", "model.layers.16.block_sparse_moe.experts.129.w3", "model.layers.16.block_sparse_moe.experts.130.w3", "model.layers.16.block_sparse_moe.experts.131.w3", "model.layers.16.block_sparse_moe.experts.132.w3", "model.layers.16.block_sparse_moe.experts.133.w3", "model.layers.16.block_sparse_moe.experts.134.w3", "model.layers.16.block_sparse_moe.experts.135.w3", "model.layers.16.block_sparse_moe.experts.136.w3", "model.layers.16.block_sparse_moe.experts.137.w3", "model.layers.16.block_sparse_moe.experts.138.w3", "model.layers.16.block_sparse_moe.experts.139.w3", "model.layers.16.block_sparse_moe.experts.140.w3", "model.layers.16.block_sparse_moe.experts.141.w3", "model.layers.16.block_sparse_moe.experts.142.w3", "model.layers.16.block_sparse_moe.experts.143.w3", "model.layers.16.block_sparse_moe.experts.144.w3", "model.layers.16.block_sparse_moe.experts.145.w3", "model.layers.16.block_sparse_moe.experts.146.w3", "model.layers.16.block_sparse_moe.experts.147.w3", "model.layers.16.block_sparse_moe.experts.148.w3", "model.layers.16.block_sparse_moe.experts.149.w3", "model.layers.16.block_sparse_moe.experts.150.w3", "model.layers.16.block_sparse_moe.experts.151.w3", "model.layers.16.block_sparse_moe.experts.152.w3", "model.layers.16.block_sparse_moe.experts.153.w3", "model.layers.16.block_sparse_moe.experts.154.w3", "model.layers.16.block_sparse_moe.experts.155.w3", "model.layers.16.block_sparse_moe.experts.156.w3", "model.layers.16.block_sparse_moe.experts.157.w3", "model.layers.16.block_sparse_moe.experts.158.w3", "model.layers.16.block_sparse_moe.experts.159.w3", "model.layers.16.block_sparse_moe.experts.160.w3", "model.layers.16.block_sparse_moe.experts.161.w3", "model.layers.16.block_sparse_moe.experts.162.w3", "model.layers.16.block_sparse_moe.experts.163.w3", "model.layers.16.block_sparse_moe.experts.164.w3", "model.layers.16.block_sparse_moe.experts.165.w3", "model.layers.16.block_sparse_moe.experts.166.w3", "model.layers.16.block_sparse_moe.experts.167.w3", "model.layers.16.block_sparse_moe.experts.168.w3", "model.layers.16.block_sparse_moe.experts.169.w3", "model.layers.16.block_sparse_moe.experts.170.w3", "model.layers.16.block_sparse_moe.experts.171.w3", "model.layers.16.block_sparse_moe.experts.172.w3", "model.layers.16.block_sparse_moe.experts.173.w3", "model.layers.16.block_sparse_moe.experts.174.w3", "model.layers.16.block_sparse_moe.experts.175.w3", "model.layers.16.block_sparse_moe.experts.176.w3", "model.layers.16.block_sparse_moe.experts.177.w3", "model.layers.16.block_sparse_moe.experts.178.w3", "model.layers.16.block_sparse_moe.experts.179.w3", "model.layers.16.block_sparse_moe.experts.180.w3", "model.layers.16.block_sparse_moe.experts.181.w3", "model.layers.16.block_sparse_moe.experts.182.w3", "model.layers.16.block_sparse_moe.experts.183.w3", "model.layers.16.block_sparse_moe.experts.184.w3", "model.layers.16.block_sparse_moe.experts.185.w3", "model.layers.16.block_sparse_moe.experts.186.w3", "model.layers.16.block_sparse_moe.experts.187.w3", "model.layers.16.block_sparse_moe.experts.188.w3", "model.layers.16.block_sparse_moe.experts.189.w3", "model.layers.16.block_sparse_moe.experts.190.w3", "model.layers.16.block_sparse_moe.experts.191.w3", "model.layers.16.block_sparse_moe.experts.192.w3", "model.layers.16.block_sparse_moe.experts.193.w3", "model.layers.16.block_sparse_moe.experts.194.w3", "model.layers.16.block_sparse_moe.experts.195.w3", "model.layers.16.block_sparse_moe.experts.196.w3", "model.layers.16.block_sparse_moe.experts.197.w3", "model.layers.16.block_sparse_moe.experts.198.w3", "model.layers.16.block_sparse_moe.experts.199.w3", "model.layers.16.block_sparse_moe.experts.200.w3", "model.layers.16.block_sparse_moe.experts.201.w3", "model.layers.16.block_sparse_moe.experts.202.w3", "model.layers.16.block_sparse_moe.experts.203.w3", "model.layers.16.block_sparse_moe.experts.204.w3", "model.layers.16.block_sparse_moe.experts.205.w3", "model.layers.16.block_sparse_moe.experts.206.w3", "model.layers.16.block_sparse_moe.experts.207.w3", "model.layers.16.block_sparse_moe.experts.208.w3", "model.layers.16.block_sparse_moe.experts.209.w3", "model.layers.16.block_sparse_moe.experts.210.w3", "model.layers.16.block_sparse_moe.experts.211.w3", "model.layers.16.block_sparse_moe.experts.212.w3", "model.layers.16.block_sparse_moe.experts.213.w3", "model.layers.16.block_sparse_moe.experts.214.w3", "model.layers.16.block_sparse_moe.experts.215.w3", "model.layers.16.block_sparse_moe.experts.216.w3", "model.layers.16.block_sparse_moe.experts.217.w3", "model.layers.16.block_sparse_moe.experts.218.w3", "model.layers.16.block_sparse_moe.experts.219.w3", "model.layers.16.block_sparse_moe.experts.220.w3", "model.layers.16.block_sparse_moe.experts.221.w3", "model.layers.16.block_sparse_moe.experts.222.w3", "model.layers.16.block_sparse_moe.experts.223.w3", "model.layers.16.block_sparse_moe.experts.224.w3", "model.layers.16.block_sparse_moe.experts.225.w3", "model.layers.16.block_sparse_moe.experts.226.w3", "model.layers.16.block_sparse_moe.experts.227.w3", "model.layers.16.block_sparse_moe.experts.228.w3", "model.layers.16.block_sparse_moe.experts.229.w3", "model.layers.16.block_sparse_moe.experts.230.w3", "model.layers.16.block_sparse_moe.experts.231.w3", "model.layers.16.block_sparse_moe.experts.232.w3", "model.layers.16.block_sparse_moe.experts.233.w3", "model.layers.16.block_sparse_moe.experts.234.w3", "model.layers.16.block_sparse_moe.experts.235.w3", "model.layers.16.block_sparse_moe.experts.236.w3", "model.layers.16.block_sparse_moe.experts.237.w3", "model.layers.16.block_sparse_moe.experts.238.w3", "model.layers.16.block_sparse_moe.experts.239.w3", "model.layers.16.block_sparse_moe.experts.240.w3", "model.layers.16.block_sparse_moe.experts.241.w3", "model.layers.16.block_sparse_moe.experts.242.w3", "model.layers.16.block_sparse_moe.experts.243.w3", "model.layers.16.block_sparse_moe.experts.244.w3", "model.layers.16.block_sparse_moe.experts.245.w3", "model.layers.16.block_sparse_moe.experts.246.w3", "model.layers.16.block_sparse_moe.experts.247.w3", "model.layers.16.block_sparse_moe.experts.248.w3", "model.layers.16.block_sparse_moe.experts.249.w3", "model.layers.16.block_sparse_moe.experts.250.w3", "model.layers.16.block_sparse_moe.experts.251.w3", "model.layers.16.block_sparse_moe.experts.252.w3", "model.layers.16.block_sparse_moe.experts.253.w3", "model.layers.16.block_sparse_moe.experts.254.w3", "model.layers.16.block_sparse_moe.experts.255.w3", "model.layers.16.block_sparse_moe.experts.0.w2", "model.layers.16.block_sparse_moe.experts.1.w2", "model.layers.16.block_sparse_moe.experts.2.w2", "model.layers.16.block_sparse_moe.experts.3.w2", "model.layers.16.block_sparse_moe.experts.4.w2", "model.layers.16.block_sparse_moe.experts.5.w2", "model.layers.16.block_sparse_moe.experts.6.w2", "model.layers.16.block_sparse_moe.experts.7.w2", "model.layers.16.block_sparse_moe.experts.8.w2", "model.layers.16.block_sparse_moe.experts.9.w2", "model.layers.16.block_sparse_moe.experts.10.w2", "model.layers.16.block_sparse_moe.experts.11.w2", "model.layers.16.block_sparse_moe.experts.12.w2", "model.layers.16.block_sparse_moe.experts.13.w2", "model.layers.16.block_sparse_moe.experts.14.w2", "model.layers.16.block_sparse_moe.experts.15.w2", "model.layers.16.block_sparse_moe.experts.16.w2", "model.layers.16.block_sparse_moe.experts.17.w2", "model.layers.16.block_sparse_moe.experts.18.w2", "model.layers.16.block_sparse_moe.experts.19.w2", "model.layers.16.block_sparse_moe.experts.20.w2", "model.layers.16.block_sparse_moe.experts.21.w2", "model.layers.16.block_sparse_moe.experts.22.w2", "model.layers.16.block_sparse_moe.experts.23.w2", "model.layers.16.block_sparse_moe.experts.24.w2", "model.layers.16.block_sparse_moe.experts.25.w2", "model.layers.16.block_sparse_moe.experts.26.w2", "model.layers.16.block_sparse_moe.experts.27.w2", "model.layers.16.block_sparse_moe.experts.28.w2", "model.layers.16.block_sparse_moe.experts.29.w2", "model.layers.16.block_sparse_moe.experts.30.w2", "model.layers.16.block_sparse_moe.experts.31.w2", "model.layers.16.block_sparse_moe.experts.32.w2", "model.layers.16.block_sparse_moe.experts.33.w2", "model.layers.16.block_sparse_moe.experts.34.w2", "model.layers.16.block_sparse_moe.experts.35.w2", "model.layers.16.block_sparse_moe.experts.36.w2", "model.layers.16.block_sparse_moe.experts.37.w2", "model.layers.16.block_sparse_moe.experts.38.w2", "model.layers.16.block_sparse_moe.experts.39.w2", "model.layers.16.block_sparse_moe.experts.40.w2", "model.layers.16.block_sparse_moe.experts.41.w2", "model.layers.16.block_sparse_moe.experts.42.w2", "model.layers.16.block_sparse_moe.experts.43.w2", "model.layers.16.block_sparse_moe.experts.44.w2", "model.layers.16.block_sparse_moe.experts.45.w2", "model.layers.16.block_sparse_moe.experts.46.w2", "model.layers.16.block_sparse_moe.experts.47.w2", "model.layers.16.block_sparse_moe.experts.48.w2", "model.layers.16.block_sparse_moe.experts.49.w2", "model.layers.16.block_sparse_moe.experts.50.w2", "model.layers.16.block_sparse_moe.experts.51.w2", "model.layers.16.block_sparse_moe.experts.52.w2", "model.layers.16.block_sparse_moe.experts.53.w2", "model.layers.16.block_sparse_moe.experts.54.w2", "model.layers.16.block_sparse_moe.experts.55.w2", "model.layers.16.block_sparse_moe.experts.56.w2", "model.layers.16.block_sparse_moe.experts.57.w2", "model.layers.16.block_sparse_moe.experts.58.w2", "model.layers.16.block_sparse_moe.experts.59.w2", "model.layers.16.block_sparse_moe.experts.60.w2", "model.layers.16.block_sparse_moe.experts.61.w2", "model.layers.16.block_sparse_moe.experts.62.w2", "model.layers.16.block_sparse_moe.experts.63.w2", "model.layers.16.block_sparse_moe.experts.64.w2", "model.layers.16.block_sparse_moe.experts.65.w2", "model.layers.16.block_sparse_moe.experts.66.w2", "model.layers.16.block_sparse_moe.experts.67.w2", "model.layers.16.block_sparse_moe.experts.68.w2", "model.layers.16.block_sparse_moe.experts.69.w2", "model.layers.16.block_sparse_moe.experts.70.w2", "model.layers.16.block_sparse_moe.experts.71.w2", "model.layers.16.block_sparse_moe.experts.72.w2", "model.layers.16.block_sparse_moe.experts.73.w2", "model.layers.16.block_sparse_moe.experts.74.w2", "model.layers.16.block_sparse_moe.experts.75.w2", "model.layers.16.block_sparse_moe.experts.76.w2", "model.layers.16.block_sparse_moe.experts.77.w2", "model.layers.16.block_sparse_moe.experts.78.w2", "model.layers.16.block_sparse_moe.experts.79.w2", "model.layers.16.block_sparse_moe.experts.80.w2", "model.layers.16.block_sparse_moe.experts.81.w2", "model.layers.16.block_sparse_moe.experts.82.w2", "model.layers.16.block_sparse_moe.experts.83.w2", "model.layers.16.block_sparse_moe.experts.84.w2", "model.layers.16.block_sparse_moe.experts.85.w2", "model.layers.16.block_sparse_moe.experts.86.w2", "model.layers.16.block_sparse_moe.experts.87.w2", "model.layers.16.block_sparse_moe.experts.88.w2", "model.layers.16.block_sparse_moe.experts.89.w2", "model.layers.16.block_sparse_moe.experts.90.w2", "model.layers.16.block_sparse_moe.experts.91.w2", "model.layers.16.block_sparse_moe.experts.92.w2", "model.layers.16.block_sparse_moe.experts.93.w2", "model.layers.16.block_sparse_moe.experts.94.w2", "model.layers.16.block_sparse_moe.experts.95.w2", "model.layers.16.block_sparse_moe.experts.96.w2", "model.layers.16.block_sparse_moe.experts.97.w2", "model.layers.16.block_sparse_moe.experts.98.w2", "model.layers.16.block_sparse_moe.experts.99.w2", "model.layers.16.block_sparse_moe.experts.100.w2", "model.layers.16.block_sparse_moe.experts.101.w2", "model.layers.16.block_sparse_moe.experts.102.w2", "model.layers.16.block_sparse_moe.experts.103.w2", "model.layers.16.block_sparse_moe.experts.104.w2", "model.layers.16.block_sparse_moe.experts.105.w2", "model.layers.16.block_sparse_moe.experts.106.w2", "model.layers.16.block_sparse_moe.experts.107.w2", "model.layers.16.block_sparse_moe.experts.108.w2", "model.layers.16.block_sparse_moe.experts.109.w2", "model.layers.16.block_sparse_moe.experts.110.w2", "model.layers.16.block_sparse_moe.experts.111.w2", "model.layers.16.block_sparse_moe.experts.112.w2", "model.layers.16.block_sparse_moe.experts.113.w2", "model.layers.16.block_sparse_moe.experts.114.w2", "model.layers.16.block_sparse_moe.experts.115.w2", "model.layers.16.block_sparse_moe.experts.116.w2", "model.layers.16.block_sparse_moe.experts.117.w2", "model.layers.16.block_sparse_moe.experts.118.w2", "model.layers.16.block_sparse_moe.experts.119.w2", "model.layers.16.block_sparse_moe.experts.120.w2", "model.layers.16.block_sparse_moe.experts.121.w2", "model.layers.16.block_sparse_moe.experts.122.w2", "model.layers.16.block_sparse_moe.experts.123.w2", "model.layers.16.block_sparse_moe.experts.124.w2", "model.layers.16.block_sparse_moe.experts.125.w2", "model.layers.16.block_sparse_moe.experts.126.w2", "model.layers.16.block_sparse_moe.experts.127.w2", "model.layers.16.block_sparse_moe.experts.128.w2", "model.layers.16.block_sparse_moe.experts.129.w2", "model.layers.16.block_sparse_moe.experts.130.w2", "model.layers.16.block_sparse_moe.experts.131.w2", "model.layers.16.block_sparse_moe.experts.132.w2", "model.layers.16.block_sparse_moe.experts.133.w2", "model.layers.16.block_sparse_moe.experts.134.w2", "model.layers.16.block_sparse_moe.experts.135.w2", "model.layers.16.block_sparse_moe.experts.136.w2", "model.layers.16.block_sparse_moe.experts.137.w2", "model.layers.16.block_sparse_moe.experts.138.w2", "model.layers.16.block_sparse_moe.experts.139.w2", "model.layers.16.block_sparse_moe.experts.140.w2", "model.layers.16.block_sparse_moe.experts.141.w2", "model.layers.16.block_sparse_moe.experts.142.w2", "model.layers.16.block_sparse_moe.experts.143.w2", "model.layers.16.block_sparse_moe.experts.144.w2", "model.layers.16.block_sparse_moe.experts.145.w2", "model.layers.16.block_sparse_moe.experts.146.w2", "model.layers.16.block_sparse_moe.experts.147.w2", "model.layers.16.block_sparse_moe.experts.148.w2", "model.layers.16.block_sparse_moe.experts.149.w2", "model.layers.16.block_sparse_moe.experts.150.w2", "model.layers.16.block_sparse_moe.experts.151.w2", "model.layers.16.block_sparse_moe.experts.152.w2", "model.layers.16.block_sparse_moe.experts.153.w2", "model.layers.16.block_sparse_moe.experts.154.w2", "model.layers.16.block_sparse_moe.experts.155.w2", "model.layers.16.block_sparse_moe.experts.156.w2", "model.layers.16.block_sparse_moe.experts.157.w2", "model.layers.16.block_sparse_moe.experts.158.w2", "model.layers.16.block_sparse_moe.experts.159.w2", "model.layers.16.block_sparse_moe.experts.160.w2", "model.layers.16.block_sparse_moe.experts.161.w2", "model.layers.16.block_sparse_moe.experts.162.w2", "model.layers.16.block_sparse_moe.experts.163.w2", "model.layers.16.block_sparse_moe.experts.164.w2", "model.layers.16.block_sparse_moe.experts.165.w2", "model.layers.16.block_sparse_moe.experts.166.w2", "model.layers.16.block_sparse_moe.experts.167.w2", "model.layers.16.block_sparse_moe.experts.168.w2", "model.layers.16.block_sparse_moe.experts.169.w2", "model.layers.16.block_sparse_moe.experts.170.w2", "model.layers.16.block_sparse_moe.experts.171.w2", "model.layers.16.block_sparse_moe.experts.172.w2", "model.layers.16.block_sparse_moe.experts.173.w2", "model.layers.16.block_sparse_moe.experts.174.w2", "model.layers.16.block_sparse_moe.experts.175.w2", "model.layers.16.block_sparse_moe.experts.176.w2", "model.layers.16.block_sparse_moe.experts.177.w2", "model.layers.16.block_sparse_moe.experts.178.w2", "model.layers.16.block_sparse_moe.experts.179.w2", "model.layers.16.block_sparse_moe.experts.180.w2", "model.layers.16.block_sparse_moe.experts.181.w2", "model.layers.16.block_sparse_moe.experts.182.w2", "model.layers.16.block_sparse_moe.experts.183.w2", "model.layers.16.block_sparse_moe.experts.184.w2", "model.layers.16.block_sparse_moe.experts.185.w2", "model.layers.16.block_sparse_moe.experts.186.w2", "model.layers.16.block_sparse_moe.experts.187.w2", "model.layers.16.block_sparse_moe.experts.188.w2", "model.layers.16.block_sparse_moe.experts.189.w2", "model.layers.16.block_sparse_moe.experts.190.w2", "model.layers.16.block_sparse_moe.experts.191.w2", "model.layers.16.block_sparse_moe.experts.192.w2", "model.layers.16.block_sparse_moe.experts.193.w2", "model.layers.16.block_sparse_moe.experts.194.w2", "model.layers.16.block_sparse_moe.experts.195.w2", "model.layers.16.block_sparse_moe.experts.196.w2", "model.layers.16.block_sparse_moe.experts.197.w2", "model.layers.16.block_sparse_moe.experts.198.w2", "model.layers.16.block_sparse_moe.experts.199.w2", "model.layers.16.block_sparse_moe.experts.200.w2", "model.layers.16.block_sparse_moe.experts.201.w2", "model.layers.16.block_sparse_moe.experts.202.w2", "model.layers.16.block_sparse_moe.experts.203.w2", "model.layers.16.block_sparse_moe.experts.204.w2", "model.layers.16.block_sparse_moe.experts.205.w2", "model.layers.16.block_sparse_moe.experts.206.w2", "model.layers.16.block_sparse_moe.experts.207.w2", "model.layers.16.block_sparse_moe.experts.208.w2", "model.layers.16.block_sparse_moe.experts.209.w2", "model.layers.16.block_sparse_moe.experts.210.w2", "model.layers.16.block_sparse_moe.experts.211.w2", "model.layers.16.block_sparse_moe.experts.212.w2", "model.layers.16.block_sparse_moe.experts.213.w2", "model.layers.16.block_sparse_moe.experts.214.w2", "model.layers.16.block_sparse_moe.experts.215.w2", "model.layers.16.block_sparse_moe.experts.216.w2", "model.layers.16.block_sparse_moe.experts.217.w2", "model.layers.16.block_sparse_moe.experts.218.w2", "model.layers.16.block_sparse_moe.experts.219.w2", "model.layers.16.block_sparse_moe.experts.220.w2", "model.layers.16.block_sparse_moe.experts.221.w2", "model.layers.16.block_sparse_moe.experts.222.w2", "model.layers.16.block_sparse_moe.experts.223.w2", "model.layers.16.block_sparse_moe.experts.224.w2", "model.layers.16.block_sparse_moe.experts.225.w2", "model.layers.16.block_sparse_moe.experts.226.w2", "model.layers.16.block_sparse_moe.experts.227.w2", "model.layers.16.block_sparse_moe.experts.228.w2", "model.layers.16.block_sparse_moe.experts.229.w2", "model.layers.16.block_sparse_moe.experts.230.w2", "model.layers.16.block_sparse_moe.experts.231.w2", "model.layers.16.block_sparse_moe.experts.232.w2", "model.layers.16.block_sparse_moe.experts.233.w2", "model.layers.16.block_sparse_moe.experts.234.w2", "model.layers.16.block_sparse_moe.experts.235.w2", "model.layers.16.block_sparse_moe.experts.236.w2", "model.layers.16.block_sparse_moe.experts.237.w2", "model.layers.16.block_sparse_moe.experts.238.w2", "model.layers.16.block_sparse_moe.experts.239.w2", "model.layers.16.block_sparse_moe.experts.240.w2", "model.layers.16.block_sparse_moe.experts.241.w2", "model.layers.16.block_sparse_moe.experts.242.w2", "model.layers.16.block_sparse_moe.experts.243.w2", "model.layers.16.block_sparse_moe.experts.244.w2", "model.layers.16.block_sparse_moe.experts.245.w2", "model.layers.16.block_sparse_moe.experts.246.w2", "model.layers.16.block_sparse_moe.experts.247.w2", "model.layers.16.block_sparse_moe.experts.248.w2", "model.layers.16.block_sparse_moe.experts.249.w2", "model.layers.16.block_sparse_moe.experts.250.w2", "model.layers.16.block_sparse_moe.experts.251.w2", "model.layers.16.block_sparse_moe.experts.252.w2", "model.layers.16.block_sparse_moe.experts.253.w2", "model.layers.16.block_sparse_moe.experts.254.w2", "model.layers.16.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0004408322274684906, "dbits": 3623878656 } ] }, { "idx": 34, "layers": [ "model.layers.17.self_attn.q_proj", "model.layers.17.self_attn.k_proj", "model.layers.17.self_attn.v_proj", "model.layers.17.self_attn.o_proj" ], "candidates": [ { "dkld": -0.003545604087412335, "dbits": 44040192 } ] }, { "idx": 35, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w1", "model.layers.17.block_sparse_moe.experts.1.w1", "model.layers.17.block_sparse_moe.experts.2.w1", "model.layers.17.block_sparse_moe.experts.3.w1", "model.layers.17.block_sparse_moe.experts.4.w1", "model.layers.17.block_sparse_moe.experts.5.w1", "model.layers.17.block_sparse_moe.experts.6.w1", "model.layers.17.block_sparse_moe.experts.7.w1", "model.layers.17.block_sparse_moe.experts.8.w1", "model.layers.17.block_sparse_moe.experts.9.w1", "model.layers.17.block_sparse_moe.experts.10.w1", "model.layers.17.block_sparse_moe.experts.11.w1", "model.layers.17.block_sparse_moe.experts.12.w1", "model.layers.17.block_sparse_moe.experts.13.w1", "model.layers.17.block_sparse_moe.experts.14.w1", "model.layers.17.block_sparse_moe.experts.15.w1", "model.layers.17.block_sparse_moe.experts.16.w1", "model.layers.17.block_sparse_moe.experts.17.w1", "model.layers.17.block_sparse_moe.experts.18.w1", "model.layers.17.block_sparse_moe.experts.19.w1", "model.layers.17.block_sparse_moe.experts.20.w1", "model.layers.17.block_sparse_moe.experts.21.w1", "model.layers.17.block_sparse_moe.experts.22.w1", "model.layers.17.block_sparse_moe.experts.23.w1", "model.layers.17.block_sparse_moe.experts.24.w1", "model.layers.17.block_sparse_moe.experts.25.w1", "model.layers.17.block_sparse_moe.experts.26.w1", "model.layers.17.block_sparse_moe.experts.27.w1", "model.layers.17.block_sparse_moe.experts.28.w1", "model.layers.17.block_sparse_moe.experts.29.w1", "model.layers.17.block_sparse_moe.experts.30.w1", "model.layers.17.block_sparse_moe.experts.31.w1", "model.layers.17.block_sparse_moe.experts.32.w1", "model.layers.17.block_sparse_moe.experts.33.w1", "model.layers.17.block_sparse_moe.experts.34.w1", "model.layers.17.block_sparse_moe.experts.35.w1", "model.layers.17.block_sparse_moe.experts.36.w1", "model.layers.17.block_sparse_moe.experts.37.w1", "model.layers.17.block_sparse_moe.experts.38.w1", "model.layers.17.block_sparse_moe.experts.39.w1", "model.layers.17.block_sparse_moe.experts.40.w1", "model.layers.17.block_sparse_moe.experts.41.w1", "model.layers.17.block_sparse_moe.experts.42.w1", "model.layers.17.block_sparse_moe.experts.43.w1", "model.layers.17.block_sparse_moe.experts.44.w1", "model.layers.17.block_sparse_moe.experts.45.w1", "model.layers.17.block_sparse_moe.experts.46.w1", "model.layers.17.block_sparse_moe.experts.47.w1", "model.layers.17.block_sparse_moe.experts.48.w1", "model.layers.17.block_sparse_moe.experts.49.w1", "model.layers.17.block_sparse_moe.experts.50.w1", "model.layers.17.block_sparse_moe.experts.51.w1", "model.layers.17.block_sparse_moe.experts.52.w1", "model.layers.17.block_sparse_moe.experts.53.w1", "model.layers.17.block_sparse_moe.experts.54.w1", "model.layers.17.block_sparse_moe.experts.55.w1", "model.layers.17.block_sparse_moe.experts.56.w1", "model.layers.17.block_sparse_moe.experts.57.w1", "model.layers.17.block_sparse_moe.experts.58.w1", "model.layers.17.block_sparse_moe.experts.59.w1", "model.layers.17.block_sparse_moe.experts.60.w1", "model.layers.17.block_sparse_moe.experts.61.w1", "model.layers.17.block_sparse_moe.experts.62.w1", "model.layers.17.block_sparse_moe.experts.63.w1", "model.layers.17.block_sparse_moe.experts.64.w1", "model.layers.17.block_sparse_moe.experts.65.w1", "model.layers.17.block_sparse_moe.experts.66.w1", "model.layers.17.block_sparse_moe.experts.67.w1", "model.layers.17.block_sparse_moe.experts.68.w1", "model.layers.17.block_sparse_moe.experts.69.w1", "model.layers.17.block_sparse_moe.experts.70.w1", "model.layers.17.block_sparse_moe.experts.71.w1", "model.layers.17.block_sparse_moe.experts.72.w1", "model.layers.17.block_sparse_moe.experts.73.w1", "model.layers.17.block_sparse_moe.experts.74.w1", "model.layers.17.block_sparse_moe.experts.75.w1", "model.layers.17.block_sparse_moe.experts.76.w1", "model.layers.17.block_sparse_moe.experts.77.w1", "model.layers.17.block_sparse_moe.experts.78.w1", "model.layers.17.block_sparse_moe.experts.79.w1", "model.layers.17.block_sparse_moe.experts.80.w1", "model.layers.17.block_sparse_moe.experts.81.w1", "model.layers.17.block_sparse_moe.experts.82.w1", "model.layers.17.block_sparse_moe.experts.83.w1", "model.layers.17.block_sparse_moe.experts.84.w1", "model.layers.17.block_sparse_moe.experts.85.w1", "model.layers.17.block_sparse_moe.experts.86.w1", "model.layers.17.block_sparse_moe.experts.87.w1", "model.layers.17.block_sparse_moe.experts.88.w1", "model.layers.17.block_sparse_moe.experts.89.w1", "model.layers.17.block_sparse_moe.experts.90.w1", "model.layers.17.block_sparse_moe.experts.91.w1", "model.layers.17.block_sparse_moe.experts.92.w1", "model.layers.17.block_sparse_moe.experts.93.w1", "model.layers.17.block_sparse_moe.experts.94.w1", "model.layers.17.block_sparse_moe.experts.95.w1", "model.layers.17.block_sparse_moe.experts.96.w1", "model.layers.17.block_sparse_moe.experts.97.w1", "model.layers.17.block_sparse_moe.experts.98.w1", "model.layers.17.block_sparse_moe.experts.99.w1", "model.layers.17.block_sparse_moe.experts.100.w1", "model.layers.17.block_sparse_moe.experts.101.w1", "model.layers.17.block_sparse_moe.experts.102.w1", "model.layers.17.block_sparse_moe.experts.103.w1", "model.layers.17.block_sparse_moe.experts.104.w1", "model.layers.17.block_sparse_moe.experts.105.w1", "model.layers.17.block_sparse_moe.experts.106.w1", "model.layers.17.block_sparse_moe.experts.107.w1", "model.layers.17.block_sparse_moe.experts.108.w1", "model.layers.17.block_sparse_moe.experts.109.w1", "model.layers.17.block_sparse_moe.experts.110.w1", "model.layers.17.block_sparse_moe.experts.111.w1", "model.layers.17.block_sparse_moe.experts.112.w1", "model.layers.17.block_sparse_moe.experts.113.w1", "model.layers.17.block_sparse_moe.experts.114.w1", "model.layers.17.block_sparse_moe.experts.115.w1", "model.layers.17.block_sparse_moe.experts.116.w1", "model.layers.17.block_sparse_moe.experts.117.w1", "model.layers.17.block_sparse_moe.experts.118.w1", "model.layers.17.block_sparse_moe.experts.119.w1", "model.layers.17.block_sparse_moe.experts.120.w1", "model.layers.17.block_sparse_moe.experts.121.w1", "model.layers.17.block_sparse_moe.experts.122.w1", "model.layers.17.block_sparse_moe.experts.123.w1", "model.layers.17.block_sparse_moe.experts.124.w1", "model.layers.17.block_sparse_moe.experts.125.w1", "model.layers.17.block_sparse_moe.experts.126.w1", "model.layers.17.block_sparse_moe.experts.127.w1", "model.layers.17.block_sparse_moe.experts.128.w1", "model.layers.17.block_sparse_moe.experts.129.w1", "model.layers.17.block_sparse_moe.experts.130.w1", "model.layers.17.block_sparse_moe.experts.131.w1", "model.layers.17.block_sparse_moe.experts.132.w1", "model.layers.17.block_sparse_moe.experts.133.w1", "model.layers.17.block_sparse_moe.experts.134.w1", "model.layers.17.block_sparse_moe.experts.135.w1", "model.layers.17.block_sparse_moe.experts.136.w1", "model.layers.17.block_sparse_moe.experts.137.w1", "model.layers.17.block_sparse_moe.experts.138.w1", "model.layers.17.block_sparse_moe.experts.139.w1", "model.layers.17.block_sparse_moe.experts.140.w1", "model.layers.17.block_sparse_moe.experts.141.w1", "model.layers.17.block_sparse_moe.experts.142.w1", "model.layers.17.block_sparse_moe.experts.143.w1", "model.layers.17.block_sparse_moe.experts.144.w1", "model.layers.17.block_sparse_moe.experts.145.w1", "model.layers.17.block_sparse_moe.experts.146.w1", "model.layers.17.block_sparse_moe.experts.147.w1", "model.layers.17.block_sparse_moe.experts.148.w1", "model.layers.17.block_sparse_moe.experts.149.w1", "model.layers.17.block_sparse_moe.experts.150.w1", "model.layers.17.block_sparse_moe.experts.151.w1", "model.layers.17.block_sparse_moe.experts.152.w1", "model.layers.17.block_sparse_moe.experts.153.w1", "model.layers.17.block_sparse_moe.experts.154.w1", "model.layers.17.block_sparse_moe.experts.155.w1", "model.layers.17.block_sparse_moe.experts.156.w1", "model.layers.17.block_sparse_moe.experts.157.w1", "model.layers.17.block_sparse_moe.experts.158.w1", "model.layers.17.block_sparse_moe.experts.159.w1", "model.layers.17.block_sparse_moe.experts.160.w1", "model.layers.17.block_sparse_moe.experts.161.w1", "model.layers.17.block_sparse_moe.experts.162.w1", "model.layers.17.block_sparse_moe.experts.163.w1", "model.layers.17.block_sparse_moe.experts.164.w1", "model.layers.17.block_sparse_moe.experts.165.w1", "model.layers.17.block_sparse_moe.experts.166.w1", "model.layers.17.block_sparse_moe.experts.167.w1", "model.layers.17.block_sparse_moe.experts.168.w1", "model.layers.17.block_sparse_moe.experts.169.w1", "model.layers.17.block_sparse_moe.experts.170.w1", "model.layers.17.block_sparse_moe.experts.171.w1", "model.layers.17.block_sparse_moe.experts.172.w1", "model.layers.17.block_sparse_moe.experts.173.w1", "model.layers.17.block_sparse_moe.experts.174.w1", "model.layers.17.block_sparse_moe.experts.175.w1", "model.layers.17.block_sparse_moe.experts.176.w1", "model.layers.17.block_sparse_moe.experts.177.w1", "model.layers.17.block_sparse_moe.experts.178.w1", "model.layers.17.block_sparse_moe.experts.179.w1", "model.layers.17.block_sparse_moe.experts.180.w1", "model.layers.17.block_sparse_moe.experts.181.w1", "model.layers.17.block_sparse_moe.experts.182.w1", "model.layers.17.block_sparse_moe.experts.183.w1", "model.layers.17.block_sparse_moe.experts.184.w1", "model.layers.17.block_sparse_moe.experts.185.w1", "model.layers.17.block_sparse_moe.experts.186.w1", "model.layers.17.block_sparse_moe.experts.187.w1", "model.layers.17.block_sparse_moe.experts.188.w1", "model.layers.17.block_sparse_moe.experts.189.w1", "model.layers.17.block_sparse_moe.experts.190.w1", "model.layers.17.block_sparse_moe.experts.191.w1", "model.layers.17.block_sparse_moe.experts.192.w1", "model.layers.17.block_sparse_moe.experts.193.w1", "model.layers.17.block_sparse_moe.experts.194.w1", "model.layers.17.block_sparse_moe.experts.195.w1", "model.layers.17.block_sparse_moe.experts.196.w1", "model.layers.17.block_sparse_moe.experts.197.w1", "model.layers.17.block_sparse_moe.experts.198.w1", "model.layers.17.block_sparse_moe.experts.199.w1", "model.layers.17.block_sparse_moe.experts.200.w1", "model.layers.17.block_sparse_moe.experts.201.w1", "model.layers.17.block_sparse_moe.experts.202.w1", "model.layers.17.block_sparse_moe.experts.203.w1", "model.layers.17.block_sparse_moe.experts.204.w1", "model.layers.17.block_sparse_moe.experts.205.w1", "model.layers.17.block_sparse_moe.experts.206.w1", "model.layers.17.block_sparse_moe.experts.207.w1", "model.layers.17.block_sparse_moe.experts.208.w1", "model.layers.17.block_sparse_moe.experts.209.w1", "model.layers.17.block_sparse_moe.experts.210.w1", "model.layers.17.block_sparse_moe.experts.211.w1", "model.layers.17.block_sparse_moe.experts.212.w1", "model.layers.17.block_sparse_moe.experts.213.w1", "model.layers.17.block_sparse_moe.experts.214.w1", "model.layers.17.block_sparse_moe.experts.215.w1", "model.layers.17.block_sparse_moe.experts.216.w1", "model.layers.17.block_sparse_moe.experts.217.w1", "model.layers.17.block_sparse_moe.experts.218.w1", "model.layers.17.block_sparse_moe.experts.219.w1", "model.layers.17.block_sparse_moe.experts.220.w1", "model.layers.17.block_sparse_moe.experts.221.w1", "model.layers.17.block_sparse_moe.experts.222.w1", "model.layers.17.block_sparse_moe.experts.223.w1", "model.layers.17.block_sparse_moe.experts.224.w1", "model.layers.17.block_sparse_moe.experts.225.w1", "model.layers.17.block_sparse_moe.experts.226.w1", "model.layers.17.block_sparse_moe.experts.227.w1", "model.layers.17.block_sparse_moe.experts.228.w1", "model.layers.17.block_sparse_moe.experts.229.w1", "model.layers.17.block_sparse_moe.experts.230.w1", "model.layers.17.block_sparse_moe.experts.231.w1", "model.layers.17.block_sparse_moe.experts.232.w1", "model.layers.17.block_sparse_moe.experts.233.w1", "model.layers.17.block_sparse_moe.experts.234.w1", "model.layers.17.block_sparse_moe.experts.235.w1", "model.layers.17.block_sparse_moe.experts.236.w1", "model.layers.17.block_sparse_moe.experts.237.w1", "model.layers.17.block_sparse_moe.experts.238.w1", "model.layers.17.block_sparse_moe.experts.239.w1", "model.layers.17.block_sparse_moe.experts.240.w1", "model.layers.17.block_sparse_moe.experts.241.w1", "model.layers.17.block_sparse_moe.experts.242.w1", "model.layers.17.block_sparse_moe.experts.243.w1", "model.layers.17.block_sparse_moe.experts.244.w1", "model.layers.17.block_sparse_moe.experts.245.w1", "model.layers.17.block_sparse_moe.experts.246.w1", "model.layers.17.block_sparse_moe.experts.247.w1", "model.layers.17.block_sparse_moe.experts.248.w1", "model.layers.17.block_sparse_moe.experts.249.w1", "model.layers.17.block_sparse_moe.experts.250.w1", "model.layers.17.block_sparse_moe.experts.251.w1", "model.layers.17.block_sparse_moe.experts.252.w1", "model.layers.17.block_sparse_moe.experts.253.w1", "model.layers.17.block_sparse_moe.experts.254.w1", "model.layers.17.block_sparse_moe.experts.255.w1", "model.layers.17.block_sparse_moe.experts.0.w3", "model.layers.17.block_sparse_moe.experts.1.w3", "model.layers.17.block_sparse_moe.experts.2.w3", "model.layers.17.block_sparse_moe.experts.3.w3", "model.layers.17.block_sparse_moe.experts.4.w3", "model.layers.17.block_sparse_moe.experts.5.w3", "model.layers.17.block_sparse_moe.experts.6.w3", "model.layers.17.block_sparse_moe.experts.7.w3", "model.layers.17.block_sparse_moe.experts.8.w3", "model.layers.17.block_sparse_moe.experts.9.w3", "model.layers.17.block_sparse_moe.experts.10.w3", "model.layers.17.block_sparse_moe.experts.11.w3", "model.layers.17.block_sparse_moe.experts.12.w3", "model.layers.17.block_sparse_moe.experts.13.w3", "model.layers.17.block_sparse_moe.experts.14.w3", "model.layers.17.block_sparse_moe.experts.15.w3", "model.layers.17.block_sparse_moe.experts.16.w3", "model.layers.17.block_sparse_moe.experts.17.w3", "model.layers.17.block_sparse_moe.experts.18.w3", "model.layers.17.block_sparse_moe.experts.19.w3", "model.layers.17.block_sparse_moe.experts.20.w3", "model.layers.17.block_sparse_moe.experts.21.w3", "model.layers.17.block_sparse_moe.experts.22.w3", "model.layers.17.block_sparse_moe.experts.23.w3", "model.layers.17.block_sparse_moe.experts.24.w3", "model.layers.17.block_sparse_moe.experts.25.w3", "model.layers.17.block_sparse_moe.experts.26.w3", "model.layers.17.block_sparse_moe.experts.27.w3", "model.layers.17.block_sparse_moe.experts.28.w3", "model.layers.17.block_sparse_moe.experts.29.w3", "model.layers.17.block_sparse_moe.experts.30.w3", "model.layers.17.block_sparse_moe.experts.31.w3", "model.layers.17.block_sparse_moe.experts.32.w3", "model.layers.17.block_sparse_moe.experts.33.w3", "model.layers.17.block_sparse_moe.experts.34.w3", "model.layers.17.block_sparse_moe.experts.35.w3", "model.layers.17.block_sparse_moe.experts.36.w3", "model.layers.17.block_sparse_moe.experts.37.w3", "model.layers.17.block_sparse_moe.experts.38.w3", "model.layers.17.block_sparse_moe.experts.39.w3", "model.layers.17.block_sparse_moe.experts.40.w3", "model.layers.17.block_sparse_moe.experts.41.w3", "model.layers.17.block_sparse_moe.experts.42.w3", "model.layers.17.block_sparse_moe.experts.43.w3", "model.layers.17.block_sparse_moe.experts.44.w3", "model.layers.17.block_sparse_moe.experts.45.w3", "model.layers.17.block_sparse_moe.experts.46.w3", "model.layers.17.block_sparse_moe.experts.47.w3", "model.layers.17.block_sparse_moe.experts.48.w3", "model.layers.17.block_sparse_moe.experts.49.w3", "model.layers.17.block_sparse_moe.experts.50.w3", "model.layers.17.block_sparse_moe.experts.51.w3", "model.layers.17.block_sparse_moe.experts.52.w3", "model.layers.17.block_sparse_moe.experts.53.w3", "model.layers.17.block_sparse_moe.experts.54.w3", "model.layers.17.block_sparse_moe.experts.55.w3", "model.layers.17.block_sparse_moe.experts.56.w3", "model.layers.17.block_sparse_moe.experts.57.w3", "model.layers.17.block_sparse_moe.experts.58.w3", "model.layers.17.block_sparse_moe.experts.59.w3", "model.layers.17.block_sparse_moe.experts.60.w3", "model.layers.17.block_sparse_moe.experts.61.w3", "model.layers.17.block_sparse_moe.experts.62.w3", "model.layers.17.block_sparse_moe.experts.63.w3", "model.layers.17.block_sparse_moe.experts.64.w3", "model.layers.17.block_sparse_moe.experts.65.w3", "model.layers.17.block_sparse_moe.experts.66.w3", "model.layers.17.block_sparse_moe.experts.67.w3", "model.layers.17.block_sparse_moe.experts.68.w3", "model.layers.17.block_sparse_moe.experts.69.w3", "model.layers.17.block_sparse_moe.experts.70.w3", "model.layers.17.block_sparse_moe.experts.71.w3", "model.layers.17.block_sparse_moe.experts.72.w3", "model.layers.17.block_sparse_moe.experts.73.w3", "model.layers.17.block_sparse_moe.experts.74.w3", "model.layers.17.block_sparse_moe.experts.75.w3", "model.layers.17.block_sparse_moe.experts.76.w3", "model.layers.17.block_sparse_moe.experts.77.w3", "model.layers.17.block_sparse_moe.experts.78.w3", "model.layers.17.block_sparse_moe.experts.79.w3", "model.layers.17.block_sparse_moe.experts.80.w3", "model.layers.17.block_sparse_moe.experts.81.w3", "model.layers.17.block_sparse_moe.experts.82.w3", "model.layers.17.block_sparse_moe.experts.83.w3", "model.layers.17.block_sparse_moe.experts.84.w3", "model.layers.17.block_sparse_moe.experts.85.w3", "model.layers.17.block_sparse_moe.experts.86.w3", "model.layers.17.block_sparse_moe.experts.87.w3", "model.layers.17.block_sparse_moe.experts.88.w3", "model.layers.17.block_sparse_moe.experts.89.w3", "model.layers.17.block_sparse_moe.experts.90.w3", "model.layers.17.block_sparse_moe.experts.91.w3", "model.layers.17.block_sparse_moe.experts.92.w3", "model.layers.17.block_sparse_moe.experts.93.w3", "model.layers.17.block_sparse_moe.experts.94.w3", "model.layers.17.block_sparse_moe.experts.95.w3", "model.layers.17.block_sparse_moe.experts.96.w3", "model.layers.17.block_sparse_moe.experts.97.w3", "model.layers.17.block_sparse_moe.experts.98.w3", "model.layers.17.block_sparse_moe.experts.99.w3", "model.layers.17.block_sparse_moe.experts.100.w3", "model.layers.17.block_sparse_moe.experts.101.w3", "model.layers.17.block_sparse_moe.experts.102.w3", "model.layers.17.block_sparse_moe.experts.103.w3", "model.layers.17.block_sparse_moe.experts.104.w3", "model.layers.17.block_sparse_moe.experts.105.w3", "model.layers.17.block_sparse_moe.experts.106.w3", "model.layers.17.block_sparse_moe.experts.107.w3", "model.layers.17.block_sparse_moe.experts.108.w3", "model.layers.17.block_sparse_moe.experts.109.w3", "model.layers.17.block_sparse_moe.experts.110.w3", "model.layers.17.block_sparse_moe.experts.111.w3", "model.layers.17.block_sparse_moe.experts.112.w3", "model.layers.17.block_sparse_moe.experts.113.w3", "model.layers.17.block_sparse_moe.experts.114.w3", "model.layers.17.block_sparse_moe.experts.115.w3", "model.layers.17.block_sparse_moe.experts.116.w3", "model.layers.17.block_sparse_moe.experts.117.w3", "model.layers.17.block_sparse_moe.experts.118.w3", "model.layers.17.block_sparse_moe.experts.119.w3", "model.layers.17.block_sparse_moe.experts.120.w3", "model.layers.17.block_sparse_moe.experts.121.w3", "model.layers.17.block_sparse_moe.experts.122.w3", "model.layers.17.block_sparse_moe.experts.123.w3", "model.layers.17.block_sparse_moe.experts.124.w3", "model.layers.17.block_sparse_moe.experts.125.w3", "model.layers.17.block_sparse_moe.experts.126.w3", "model.layers.17.block_sparse_moe.experts.127.w3", "model.layers.17.block_sparse_moe.experts.128.w3", "model.layers.17.block_sparse_moe.experts.129.w3", "model.layers.17.block_sparse_moe.experts.130.w3", "model.layers.17.block_sparse_moe.experts.131.w3", "model.layers.17.block_sparse_moe.experts.132.w3", "model.layers.17.block_sparse_moe.experts.133.w3", "model.layers.17.block_sparse_moe.experts.134.w3", "model.layers.17.block_sparse_moe.experts.135.w3", "model.layers.17.block_sparse_moe.experts.136.w3", "model.layers.17.block_sparse_moe.experts.137.w3", "model.layers.17.block_sparse_moe.experts.138.w3", "model.layers.17.block_sparse_moe.experts.139.w3", "model.layers.17.block_sparse_moe.experts.140.w3", "model.layers.17.block_sparse_moe.experts.141.w3", "model.layers.17.block_sparse_moe.experts.142.w3", "model.layers.17.block_sparse_moe.experts.143.w3", "model.layers.17.block_sparse_moe.experts.144.w3", "model.layers.17.block_sparse_moe.experts.145.w3", "model.layers.17.block_sparse_moe.experts.146.w3", "model.layers.17.block_sparse_moe.experts.147.w3", "model.layers.17.block_sparse_moe.experts.148.w3", "model.layers.17.block_sparse_moe.experts.149.w3", "model.layers.17.block_sparse_moe.experts.150.w3", "model.layers.17.block_sparse_moe.experts.151.w3", "model.layers.17.block_sparse_moe.experts.152.w3", "model.layers.17.block_sparse_moe.experts.153.w3", "model.layers.17.block_sparse_moe.experts.154.w3", "model.layers.17.block_sparse_moe.experts.155.w3", "model.layers.17.block_sparse_moe.experts.156.w3", "model.layers.17.block_sparse_moe.experts.157.w3", "model.layers.17.block_sparse_moe.experts.158.w3", "model.layers.17.block_sparse_moe.experts.159.w3", "model.layers.17.block_sparse_moe.experts.160.w3", "model.layers.17.block_sparse_moe.experts.161.w3", "model.layers.17.block_sparse_moe.experts.162.w3", "model.layers.17.block_sparse_moe.experts.163.w3", "model.layers.17.block_sparse_moe.experts.164.w3", "model.layers.17.block_sparse_moe.experts.165.w3", "model.layers.17.block_sparse_moe.experts.166.w3", "model.layers.17.block_sparse_moe.experts.167.w3", "model.layers.17.block_sparse_moe.experts.168.w3", "model.layers.17.block_sparse_moe.experts.169.w3", "model.layers.17.block_sparse_moe.experts.170.w3", "model.layers.17.block_sparse_moe.experts.171.w3", "model.layers.17.block_sparse_moe.experts.172.w3", "model.layers.17.block_sparse_moe.experts.173.w3", "model.layers.17.block_sparse_moe.experts.174.w3", "model.layers.17.block_sparse_moe.experts.175.w3", "model.layers.17.block_sparse_moe.experts.176.w3", "model.layers.17.block_sparse_moe.experts.177.w3", "model.layers.17.block_sparse_moe.experts.178.w3", "model.layers.17.block_sparse_moe.experts.179.w3", "model.layers.17.block_sparse_moe.experts.180.w3", "model.layers.17.block_sparse_moe.experts.181.w3", "model.layers.17.block_sparse_moe.experts.182.w3", "model.layers.17.block_sparse_moe.experts.183.w3", "model.layers.17.block_sparse_moe.experts.184.w3", "model.layers.17.block_sparse_moe.experts.185.w3", "model.layers.17.block_sparse_moe.experts.186.w3", "model.layers.17.block_sparse_moe.experts.187.w3", "model.layers.17.block_sparse_moe.experts.188.w3", "model.layers.17.block_sparse_moe.experts.189.w3", "model.layers.17.block_sparse_moe.experts.190.w3", "model.layers.17.block_sparse_moe.experts.191.w3", "model.layers.17.block_sparse_moe.experts.192.w3", "model.layers.17.block_sparse_moe.experts.193.w3", "model.layers.17.block_sparse_moe.experts.194.w3", "model.layers.17.block_sparse_moe.experts.195.w3", "model.layers.17.block_sparse_moe.experts.196.w3", "model.layers.17.block_sparse_moe.experts.197.w3", "model.layers.17.block_sparse_moe.experts.198.w3", "model.layers.17.block_sparse_moe.experts.199.w3", "model.layers.17.block_sparse_moe.experts.200.w3", "model.layers.17.block_sparse_moe.experts.201.w3", "model.layers.17.block_sparse_moe.experts.202.w3", "model.layers.17.block_sparse_moe.experts.203.w3", "model.layers.17.block_sparse_moe.experts.204.w3", "model.layers.17.block_sparse_moe.experts.205.w3", "model.layers.17.block_sparse_moe.experts.206.w3", "model.layers.17.block_sparse_moe.experts.207.w3", "model.layers.17.block_sparse_moe.experts.208.w3", "model.layers.17.block_sparse_moe.experts.209.w3", "model.layers.17.block_sparse_moe.experts.210.w3", "model.layers.17.block_sparse_moe.experts.211.w3", "model.layers.17.block_sparse_moe.experts.212.w3", "model.layers.17.block_sparse_moe.experts.213.w3", "model.layers.17.block_sparse_moe.experts.214.w3", "model.layers.17.block_sparse_moe.experts.215.w3", "model.layers.17.block_sparse_moe.experts.216.w3", "model.layers.17.block_sparse_moe.experts.217.w3", "model.layers.17.block_sparse_moe.experts.218.w3", "model.layers.17.block_sparse_moe.experts.219.w3", "model.layers.17.block_sparse_moe.experts.220.w3", "model.layers.17.block_sparse_moe.experts.221.w3", "model.layers.17.block_sparse_moe.experts.222.w3", "model.layers.17.block_sparse_moe.experts.223.w3", "model.layers.17.block_sparse_moe.experts.224.w3", "model.layers.17.block_sparse_moe.experts.225.w3", "model.layers.17.block_sparse_moe.experts.226.w3", "model.layers.17.block_sparse_moe.experts.227.w3", "model.layers.17.block_sparse_moe.experts.228.w3", "model.layers.17.block_sparse_moe.experts.229.w3", "model.layers.17.block_sparse_moe.experts.230.w3", "model.layers.17.block_sparse_moe.experts.231.w3", "model.layers.17.block_sparse_moe.experts.232.w3", "model.layers.17.block_sparse_moe.experts.233.w3", "model.layers.17.block_sparse_moe.experts.234.w3", "model.layers.17.block_sparse_moe.experts.235.w3", "model.layers.17.block_sparse_moe.experts.236.w3", "model.layers.17.block_sparse_moe.experts.237.w3", "model.layers.17.block_sparse_moe.experts.238.w3", "model.layers.17.block_sparse_moe.experts.239.w3", "model.layers.17.block_sparse_moe.experts.240.w3", "model.layers.17.block_sparse_moe.experts.241.w3", "model.layers.17.block_sparse_moe.experts.242.w3", "model.layers.17.block_sparse_moe.experts.243.w3", "model.layers.17.block_sparse_moe.experts.244.w3", "model.layers.17.block_sparse_moe.experts.245.w3", "model.layers.17.block_sparse_moe.experts.246.w3", "model.layers.17.block_sparse_moe.experts.247.w3", "model.layers.17.block_sparse_moe.experts.248.w3", "model.layers.17.block_sparse_moe.experts.249.w3", "model.layers.17.block_sparse_moe.experts.250.w3", "model.layers.17.block_sparse_moe.experts.251.w3", "model.layers.17.block_sparse_moe.experts.252.w3", "model.layers.17.block_sparse_moe.experts.253.w3", "model.layers.17.block_sparse_moe.experts.254.w3", "model.layers.17.block_sparse_moe.experts.255.w3", "model.layers.17.block_sparse_moe.experts.0.w2", "model.layers.17.block_sparse_moe.experts.1.w2", "model.layers.17.block_sparse_moe.experts.2.w2", "model.layers.17.block_sparse_moe.experts.3.w2", "model.layers.17.block_sparse_moe.experts.4.w2", "model.layers.17.block_sparse_moe.experts.5.w2", "model.layers.17.block_sparse_moe.experts.6.w2", "model.layers.17.block_sparse_moe.experts.7.w2", "model.layers.17.block_sparse_moe.experts.8.w2", "model.layers.17.block_sparse_moe.experts.9.w2", "model.layers.17.block_sparse_moe.experts.10.w2", "model.layers.17.block_sparse_moe.experts.11.w2", "model.layers.17.block_sparse_moe.experts.12.w2", "model.layers.17.block_sparse_moe.experts.13.w2", "model.layers.17.block_sparse_moe.experts.14.w2", "model.layers.17.block_sparse_moe.experts.15.w2", "model.layers.17.block_sparse_moe.experts.16.w2", "model.layers.17.block_sparse_moe.experts.17.w2", "model.layers.17.block_sparse_moe.experts.18.w2", "model.layers.17.block_sparse_moe.experts.19.w2", "model.layers.17.block_sparse_moe.experts.20.w2", "model.layers.17.block_sparse_moe.experts.21.w2", "model.layers.17.block_sparse_moe.experts.22.w2", "model.layers.17.block_sparse_moe.experts.23.w2", "model.layers.17.block_sparse_moe.experts.24.w2", "model.layers.17.block_sparse_moe.experts.25.w2", "model.layers.17.block_sparse_moe.experts.26.w2", "model.layers.17.block_sparse_moe.experts.27.w2", "model.layers.17.block_sparse_moe.experts.28.w2", "model.layers.17.block_sparse_moe.experts.29.w2", "model.layers.17.block_sparse_moe.experts.30.w2", "model.layers.17.block_sparse_moe.experts.31.w2", "model.layers.17.block_sparse_moe.experts.32.w2", "model.layers.17.block_sparse_moe.experts.33.w2", "model.layers.17.block_sparse_moe.experts.34.w2", "model.layers.17.block_sparse_moe.experts.35.w2", "model.layers.17.block_sparse_moe.experts.36.w2", "model.layers.17.block_sparse_moe.experts.37.w2", "model.layers.17.block_sparse_moe.experts.38.w2", "model.layers.17.block_sparse_moe.experts.39.w2", "model.layers.17.block_sparse_moe.experts.40.w2", "model.layers.17.block_sparse_moe.experts.41.w2", "model.layers.17.block_sparse_moe.experts.42.w2", "model.layers.17.block_sparse_moe.experts.43.w2", "model.layers.17.block_sparse_moe.experts.44.w2", "model.layers.17.block_sparse_moe.experts.45.w2", "model.layers.17.block_sparse_moe.experts.46.w2", "model.layers.17.block_sparse_moe.experts.47.w2", "model.layers.17.block_sparse_moe.experts.48.w2", "model.layers.17.block_sparse_moe.experts.49.w2", "model.layers.17.block_sparse_moe.experts.50.w2", "model.layers.17.block_sparse_moe.experts.51.w2", "model.layers.17.block_sparse_moe.experts.52.w2", "model.layers.17.block_sparse_moe.experts.53.w2", "model.layers.17.block_sparse_moe.experts.54.w2", "model.layers.17.block_sparse_moe.experts.55.w2", "model.layers.17.block_sparse_moe.experts.56.w2", "model.layers.17.block_sparse_moe.experts.57.w2", "model.layers.17.block_sparse_moe.experts.58.w2", "model.layers.17.block_sparse_moe.experts.59.w2", "model.layers.17.block_sparse_moe.experts.60.w2", "model.layers.17.block_sparse_moe.experts.61.w2", "model.layers.17.block_sparse_moe.experts.62.w2", "model.layers.17.block_sparse_moe.experts.63.w2", "model.layers.17.block_sparse_moe.experts.64.w2", "model.layers.17.block_sparse_moe.experts.65.w2", "model.layers.17.block_sparse_moe.experts.66.w2", "model.layers.17.block_sparse_moe.experts.67.w2", "model.layers.17.block_sparse_moe.experts.68.w2", "model.layers.17.block_sparse_moe.experts.69.w2", "model.layers.17.block_sparse_moe.experts.70.w2", "model.layers.17.block_sparse_moe.experts.71.w2", "model.layers.17.block_sparse_moe.experts.72.w2", "model.layers.17.block_sparse_moe.experts.73.w2", "model.layers.17.block_sparse_moe.experts.74.w2", "model.layers.17.block_sparse_moe.experts.75.w2", "model.layers.17.block_sparse_moe.experts.76.w2", "model.layers.17.block_sparse_moe.experts.77.w2", "model.layers.17.block_sparse_moe.experts.78.w2", "model.layers.17.block_sparse_moe.experts.79.w2", "model.layers.17.block_sparse_moe.experts.80.w2", "model.layers.17.block_sparse_moe.experts.81.w2", "model.layers.17.block_sparse_moe.experts.82.w2", "model.layers.17.block_sparse_moe.experts.83.w2", "model.layers.17.block_sparse_moe.experts.84.w2", "model.layers.17.block_sparse_moe.experts.85.w2", "model.layers.17.block_sparse_moe.experts.86.w2", "model.layers.17.block_sparse_moe.experts.87.w2", "model.layers.17.block_sparse_moe.experts.88.w2", "model.layers.17.block_sparse_moe.experts.89.w2", "model.layers.17.block_sparse_moe.experts.90.w2", "model.layers.17.block_sparse_moe.experts.91.w2", "model.layers.17.block_sparse_moe.experts.92.w2", "model.layers.17.block_sparse_moe.experts.93.w2", "model.layers.17.block_sparse_moe.experts.94.w2", "model.layers.17.block_sparse_moe.experts.95.w2", "model.layers.17.block_sparse_moe.experts.96.w2", "model.layers.17.block_sparse_moe.experts.97.w2", "model.layers.17.block_sparse_moe.experts.98.w2", "model.layers.17.block_sparse_moe.experts.99.w2", "model.layers.17.block_sparse_moe.experts.100.w2", "model.layers.17.block_sparse_moe.experts.101.w2", "model.layers.17.block_sparse_moe.experts.102.w2", "model.layers.17.block_sparse_moe.experts.103.w2", "model.layers.17.block_sparse_moe.experts.104.w2", "model.layers.17.block_sparse_moe.experts.105.w2", "model.layers.17.block_sparse_moe.experts.106.w2", "model.layers.17.block_sparse_moe.experts.107.w2", "model.layers.17.block_sparse_moe.experts.108.w2", "model.layers.17.block_sparse_moe.experts.109.w2", "model.layers.17.block_sparse_moe.experts.110.w2", "model.layers.17.block_sparse_moe.experts.111.w2", "model.layers.17.block_sparse_moe.experts.112.w2", "model.layers.17.block_sparse_moe.experts.113.w2", "model.layers.17.block_sparse_moe.experts.114.w2", "model.layers.17.block_sparse_moe.experts.115.w2", "model.layers.17.block_sparse_moe.experts.116.w2", "model.layers.17.block_sparse_moe.experts.117.w2", "model.layers.17.block_sparse_moe.experts.118.w2", "model.layers.17.block_sparse_moe.experts.119.w2", "model.layers.17.block_sparse_moe.experts.120.w2", "model.layers.17.block_sparse_moe.experts.121.w2", "model.layers.17.block_sparse_moe.experts.122.w2", "model.layers.17.block_sparse_moe.experts.123.w2", "model.layers.17.block_sparse_moe.experts.124.w2", "model.layers.17.block_sparse_moe.experts.125.w2", "model.layers.17.block_sparse_moe.experts.126.w2", "model.layers.17.block_sparse_moe.experts.127.w2", "model.layers.17.block_sparse_moe.experts.128.w2", "model.layers.17.block_sparse_moe.experts.129.w2", "model.layers.17.block_sparse_moe.experts.130.w2", "model.layers.17.block_sparse_moe.experts.131.w2", "model.layers.17.block_sparse_moe.experts.132.w2", "model.layers.17.block_sparse_moe.experts.133.w2", "model.layers.17.block_sparse_moe.experts.134.w2", "model.layers.17.block_sparse_moe.experts.135.w2", "model.layers.17.block_sparse_moe.experts.136.w2", "model.layers.17.block_sparse_moe.experts.137.w2", "model.layers.17.block_sparse_moe.experts.138.w2", "model.layers.17.block_sparse_moe.experts.139.w2", "model.layers.17.block_sparse_moe.experts.140.w2", "model.layers.17.block_sparse_moe.experts.141.w2", "model.layers.17.block_sparse_moe.experts.142.w2", "model.layers.17.block_sparse_moe.experts.143.w2", "model.layers.17.block_sparse_moe.experts.144.w2", "model.layers.17.block_sparse_moe.experts.145.w2", "model.layers.17.block_sparse_moe.experts.146.w2", "model.layers.17.block_sparse_moe.experts.147.w2", "model.layers.17.block_sparse_moe.experts.148.w2", "model.layers.17.block_sparse_moe.experts.149.w2", "model.layers.17.block_sparse_moe.experts.150.w2", "model.layers.17.block_sparse_moe.experts.151.w2", "model.layers.17.block_sparse_moe.experts.152.w2", "model.layers.17.block_sparse_moe.experts.153.w2", "model.layers.17.block_sparse_moe.experts.154.w2", "model.layers.17.block_sparse_moe.experts.155.w2", "model.layers.17.block_sparse_moe.experts.156.w2", "model.layers.17.block_sparse_moe.experts.157.w2", "model.layers.17.block_sparse_moe.experts.158.w2", "model.layers.17.block_sparse_moe.experts.159.w2", "model.layers.17.block_sparse_moe.experts.160.w2", "model.layers.17.block_sparse_moe.experts.161.w2", "model.layers.17.block_sparse_moe.experts.162.w2", "model.layers.17.block_sparse_moe.experts.163.w2", "model.layers.17.block_sparse_moe.experts.164.w2", "model.layers.17.block_sparse_moe.experts.165.w2", "model.layers.17.block_sparse_moe.experts.166.w2", "model.layers.17.block_sparse_moe.experts.167.w2", "model.layers.17.block_sparse_moe.experts.168.w2", "model.layers.17.block_sparse_moe.experts.169.w2", "model.layers.17.block_sparse_moe.experts.170.w2", "model.layers.17.block_sparse_moe.experts.171.w2", "model.layers.17.block_sparse_moe.experts.172.w2", "model.layers.17.block_sparse_moe.experts.173.w2", "model.layers.17.block_sparse_moe.experts.174.w2", "model.layers.17.block_sparse_moe.experts.175.w2", "model.layers.17.block_sparse_moe.experts.176.w2", "model.layers.17.block_sparse_moe.experts.177.w2", "model.layers.17.block_sparse_moe.experts.178.w2", "model.layers.17.block_sparse_moe.experts.179.w2", "model.layers.17.block_sparse_moe.experts.180.w2", "model.layers.17.block_sparse_moe.experts.181.w2", "model.layers.17.block_sparse_moe.experts.182.w2", "model.layers.17.block_sparse_moe.experts.183.w2", "model.layers.17.block_sparse_moe.experts.184.w2", "model.layers.17.block_sparse_moe.experts.185.w2", "model.layers.17.block_sparse_moe.experts.186.w2", "model.layers.17.block_sparse_moe.experts.187.w2", "model.layers.17.block_sparse_moe.experts.188.w2", "model.layers.17.block_sparse_moe.experts.189.w2", "model.layers.17.block_sparse_moe.experts.190.w2", "model.layers.17.block_sparse_moe.experts.191.w2", "model.layers.17.block_sparse_moe.experts.192.w2", "model.layers.17.block_sparse_moe.experts.193.w2", "model.layers.17.block_sparse_moe.experts.194.w2", "model.layers.17.block_sparse_moe.experts.195.w2", "model.layers.17.block_sparse_moe.experts.196.w2", "model.layers.17.block_sparse_moe.experts.197.w2", "model.layers.17.block_sparse_moe.experts.198.w2", "model.layers.17.block_sparse_moe.experts.199.w2", "model.layers.17.block_sparse_moe.experts.200.w2", "model.layers.17.block_sparse_moe.experts.201.w2", "model.layers.17.block_sparse_moe.experts.202.w2", "model.layers.17.block_sparse_moe.experts.203.w2", "model.layers.17.block_sparse_moe.experts.204.w2", "model.layers.17.block_sparse_moe.experts.205.w2", "model.layers.17.block_sparse_moe.experts.206.w2", "model.layers.17.block_sparse_moe.experts.207.w2", "model.layers.17.block_sparse_moe.experts.208.w2", "model.layers.17.block_sparse_moe.experts.209.w2", "model.layers.17.block_sparse_moe.experts.210.w2", "model.layers.17.block_sparse_moe.experts.211.w2", "model.layers.17.block_sparse_moe.experts.212.w2", "model.layers.17.block_sparse_moe.experts.213.w2", "model.layers.17.block_sparse_moe.experts.214.w2", "model.layers.17.block_sparse_moe.experts.215.w2", "model.layers.17.block_sparse_moe.experts.216.w2", "model.layers.17.block_sparse_moe.experts.217.w2", "model.layers.17.block_sparse_moe.experts.218.w2", "model.layers.17.block_sparse_moe.experts.219.w2", "model.layers.17.block_sparse_moe.experts.220.w2", "model.layers.17.block_sparse_moe.experts.221.w2", "model.layers.17.block_sparse_moe.experts.222.w2", "model.layers.17.block_sparse_moe.experts.223.w2", "model.layers.17.block_sparse_moe.experts.224.w2", "model.layers.17.block_sparse_moe.experts.225.w2", "model.layers.17.block_sparse_moe.experts.226.w2", "model.layers.17.block_sparse_moe.experts.227.w2", "model.layers.17.block_sparse_moe.experts.228.w2", "model.layers.17.block_sparse_moe.experts.229.w2", "model.layers.17.block_sparse_moe.experts.230.w2", "model.layers.17.block_sparse_moe.experts.231.w2", "model.layers.17.block_sparse_moe.experts.232.w2", "model.layers.17.block_sparse_moe.experts.233.w2", "model.layers.17.block_sparse_moe.experts.234.w2", "model.layers.17.block_sparse_moe.experts.235.w2", "model.layers.17.block_sparse_moe.experts.236.w2", "model.layers.17.block_sparse_moe.experts.237.w2", "model.layers.17.block_sparse_moe.experts.238.w2", "model.layers.17.block_sparse_moe.experts.239.w2", "model.layers.17.block_sparse_moe.experts.240.w2", "model.layers.17.block_sparse_moe.experts.241.w2", "model.layers.17.block_sparse_moe.experts.242.w2", "model.layers.17.block_sparse_moe.experts.243.w2", "model.layers.17.block_sparse_moe.experts.244.w2", "model.layers.17.block_sparse_moe.experts.245.w2", "model.layers.17.block_sparse_moe.experts.246.w2", "model.layers.17.block_sparse_moe.experts.247.w2", "model.layers.17.block_sparse_moe.experts.248.w2", "model.layers.17.block_sparse_moe.experts.249.w2", "model.layers.17.block_sparse_moe.experts.250.w2", "model.layers.17.block_sparse_moe.experts.251.w2", "model.layers.17.block_sparse_moe.experts.252.w2", "model.layers.17.block_sparse_moe.experts.253.w2", "model.layers.17.block_sparse_moe.experts.254.w2", "model.layers.17.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006479147821664699, "dbits": 3623878656 } ] }, { "idx": 36, "layers": [ "model.layers.18.self_attn.q_proj", "model.layers.18.self_attn.k_proj", "model.layers.18.self_attn.v_proj", "model.layers.18.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0010831361636519654, "dbits": 44040192 } ] }, { "idx": 37, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w1", "model.layers.18.block_sparse_moe.experts.1.w1", "model.layers.18.block_sparse_moe.experts.2.w1", "model.layers.18.block_sparse_moe.experts.3.w1", "model.layers.18.block_sparse_moe.experts.4.w1", "model.layers.18.block_sparse_moe.experts.5.w1", "model.layers.18.block_sparse_moe.experts.6.w1", "model.layers.18.block_sparse_moe.experts.7.w1", "model.layers.18.block_sparse_moe.experts.8.w1", "model.layers.18.block_sparse_moe.experts.9.w1", "model.layers.18.block_sparse_moe.experts.10.w1", "model.layers.18.block_sparse_moe.experts.11.w1", "model.layers.18.block_sparse_moe.experts.12.w1", "model.layers.18.block_sparse_moe.experts.13.w1", "model.layers.18.block_sparse_moe.experts.14.w1", "model.layers.18.block_sparse_moe.experts.15.w1", "model.layers.18.block_sparse_moe.experts.16.w1", "model.layers.18.block_sparse_moe.experts.17.w1", "model.layers.18.block_sparse_moe.experts.18.w1", "model.layers.18.block_sparse_moe.experts.19.w1", "model.layers.18.block_sparse_moe.experts.20.w1", "model.layers.18.block_sparse_moe.experts.21.w1", "model.layers.18.block_sparse_moe.experts.22.w1", "model.layers.18.block_sparse_moe.experts.23.w1", "model.layers.18.block_sparse_moe.experts.24.w1", "model.layers.18.block_sparse_moe.experts.25.w1", "model.layers.18.block_sparse_moe.experts.26.w1", "model.layers.18.block_sparse_moe.experts.27.w1", "model.layers.18.block_sparse_moe.experts.28.w1", "model.layers.18.block_sparse_moe.experts.29.w1", "model.layers.18.block_sparse_moe.experts.30.w1", "model.layers.18.block_sparse_moe.experts.31.w1", "model.layers.18.block_sparse_moe.experts.32.w1", "model.layers.18.block_sparse_moe.experts.33.w1", "model.layers.18.block_sparse_moe.experts.34.w1", "model.layers.18.block_sparse_moe.experts.35.w1", "model.layers.18.block_sparse_moe.experts.36.w1", "model.layers.18.block_sparse_moe.experts.37.w1", "model.layers.18.block_sparse_moe.experts.38.w1", "model.layers.18.block_sparse_moe.experts.39.w1", "model.layers.18.block_sparse_moe.experts.40.w1", "model.layers.18.block_sparse_moe.experts.41.w1", "model.layers.18.block_sparse_moe.experts.42.w1", "model.layers.18.block_sparse_moe.experts.43.w1", "model.layers.18.block_sparse_moe.experts.44.w1", "model.layers.18.block_sparse_moe.experts.45.w1", "model.layers.18.block_sparse_moe.experts.46.w1", "model.layers.18.block_sparse_moe.experts.47.w1", "model.layers.18.block_sparse_moe.experts.48.w1", "model.layers.18.block_sparse_moe.experts.49.w1", "model.layers.18.block_sparse_moe.experts.50.w1", "model.layers.18.block_sparse_moe.experts.51.w1", "model.layers.18.block_sparse_moe.experts.52.w1", "model.layers.18.block_sparse_moe.experts.53.w1", "model.layers.18.block_sparse_moe.experts.54.w1", "model.layers.18.block_sparse_moe.experts.55.w1", "model.layers.18.block_sparse_moe.experts.56.w1", "model.layers.18.block_sparse_moe.experts.57.w1", "model.layers.18.block_sparse_moe.experts.58.w1", "model.layers.18.block_sparse_moe.experts.59.w1", "model.layers.18.block_sparse_moe.experts.60.w1", "model.layers.18.block_sparse_moe.experts.61.w1", "model.layers.18.block_sparse_moe.experts.62.w1", "model.layers.18.block_sparse_moe.experts.63.w1", "model.layers.18.block_sparse_moe.experts.64.w1", "model.layers.18.block_sparse_moe.experts.65.w1", "model.layers.18.block_sparse_moe.experts.66.w1", "model.layers.18.block_sparse_moe.experts.67.w1", "model.layers.18.block_sparse_moe.experts.68.w1", "model.layers.18.block_sparse_moe.experts.69.w1", "model.layers.18.block_sparse_moe.experts.70.w1", "model.layers.18.block_sparse_moe.experts.71.w1", "model.layers.18.block_sparse_moe.experts.72.w1", "model.layers.18.block_sparse_moe.experts.73.w1", "model.layers.18.block_sparse_moe.experts.74.w1", "model.layers.18.block_sparse_moe.experts.75.w1", "model.layers.18.block_sparse_moe.experts.76.w1", "model.layers.18.block_sparse_moe.experts.77.w1", "model.layers.18.block_sparse_moe.experts.78.w1", "model.layers.18.block_sparse_moe.experts.79.w1", "model.layers.18.block_sparse_moe.experts.80.w1", "model.layers.18.block_sparse_moe.experts.81.w1", "model.layers.18.block_sparse_moe.experts.82.w1", "model.layers.18.block_sparse_moe.experts.83.w1", "model.layers.18.block_sparse_moe.experts.84.w1", "model.layers.18.block_sparse_moe.experts.85.w1", "model.layers.18.block_sparse_moe.experts.86.w1", "model.layers.18.block_sparse_moe.experts.87.w1", "model.layers.18.block_sparse_moe.experts.88.w1", "model.layers.18.block_sparse_moe.experts.89.w1", "model.layers.18.block_sparse_moe.experts.90.w1", "model.layers.18.block_sparse_moe.experts.91.w1", "model.layers.18.block_sparse_moe.experts.92.w1", "model.layers.18.block_sparse_moe.experts.93.w1", "model.layers.18.block_sparse_moe.experts.94.w1", "model.layers.18.block_sparse_moe.experts.95.w1", "model.layers.18.block_sparse_moe.experts.96.w1", "model.layers.18.block_sparse_moe.experts.97.w1", "model.layers.18.block_sparse_moe.experts.98.w1", "model.layers.18.block_sparse_moe.experts.99.w1", "model.layers.18.block_sparse_moe.experts.100.w1", "model.layers.18.block_sparse_moe.experts.101.w1", "model.layers.18.block_sparse_moe.experts.102.w1", "model.layers.18.block_sparse_moe.experts.103.w1", "model.layers.18.block_sparse_moe.experts.104.w1", "model.layers.18.block_sparse_moe.experts.105.w1", "model.layers.18.block_sparse_moe.experts.106.w1", "model.layers.18.block_sparse_moe.experts.107.w1", "model.layers.18.block_sparse_moe.experts.108.w1", "model.layers.18.block_sparse_moe.experts.109.w1", "model.layers.18.block_sparse_moe.experts.110.w1", "model.layers.18.block_sparse_moe.experts.111.w1", "model.layers.18.block_sparse_moe.experts.112.w1", "model.layers.18.block_sparse_moe.experts.113.w1", "model.layers.18.block_sparse_moe.experts.114.w1", "model.layers.18.block_sparse_moe.experts.115.w1", "model.layers.18.block_sparse_moe.experts.116.w1", "model.layers.18.block_sparse_moe.experts.117.w1", "model.layers.18.block_sparse_moe.experts.118.w1", "model.layers.18.block_sparse_moe.experts.119.w1", "model.layers.18.block_sparse_moe.experts.120.w1", "model.layers.18.block_sparse_moe.experts.121.w1", "model.layers.18.block_sparse_moe.experts.122.w1", "model.layers.18.block_sparse_moe.experts.123.w1", "model.layers.18.block_sparse_moe.experts.124.w1", "model.layers.18.block_sparse_moe.experts.125.w1", "model.layers.18.block_sparse_moe.experts.126.w1", "model.layers.18.block_sparse_moe.experts.127.w1", "model.layers.18.block_sparse_moe.experts.128.w1", "model.layers.18.block_sparse_moe.experts.129.w1", "model.layers.18.block_sparse_moe.experts.130.w1", "model.layers.18.block_sparse_moe.experts.131.w1", "model.layers.18.block_sparse_moe.experts.132.w1", "model.layers.18.block_sparse_moe.experts.133.w1", "model.layers.18.block_sparse_moe.experts.134.w1", "model.layers.18.block_sparse_moe.experts.135.w1", "model.layers.18.block_sparse_moe.experts.136.w1", "model.layers.18.block_sparse_moe.experts.137.w1", "model.layers.18.block_sparse_moe.experts.138.w1", "model.layers.18.block_sparse_moe.experts.139.w1", "model.layers.18.block_sparse_moe.experts.140.w1", "model.layers.18.block_sparse_moe.experts.141.w1", "model.layers.18.block_sparse_moe.experts.142.w1", "model.layers.18.block_sparse_moe.experts.143.w1", "model.layers.18.block_sparse_moe.experts.144.w1", "model.layers.18.block_sparse_moe.experts.145.w1", "model.layers.18.block_sparse_moe.experts.146.w1", "model.layers.18.block_sparse_moe.experts.147.w1", "model.layers.18.block_sparse_moe.experts.148.w1", "model.layers.18.block_sparse_moe.experts.149.w1", "model.layers.18.block_sparse_moe.experts.150.w1", "model.layers.18.block_sparse_moe.experts.151.w1", "model.layers.18.block_sparse_moe.experts.152.w1", "model.layers.18.block_sparse_moe.experts.153.w1", "model.layers.18.block_sparse_moe.experts.154.w1", "model.layers.18.block_sparse_moe.experts.155.w1", "model.layers.18.block_sparse_moe.experts.156.w1", "model.layers.18.block_sparse_moe.experts.157.w1", "model.layers.18.block_sparse_moe.experts.158.w1", "model.layers.18.block_sparse_moe.experts.159.w1", "model.layers.18.block_sparse_moe.experts.160.w1", "model.layers.18.block_sparse_moe.experts.161.w1", "model.layers.18.block_sparse_moe.experts.162.w1", "model.layers.18.block_sparse_moe.experts.163.w1", "model.layers.18.block_sparse_moe.experts.164.w1", "model.layers.18.block_sparse_moe.experts.165.w1", "model.layers.18.block_sparse_moe.experts.166.w1", "model.layers.18.block_sparse_moe.experts.167.w1", "model.layers.18.block_sparse_moe.experts.168.w1", "model.layers.18.block_sparse_moe.experts.169.w1", "model.layers.18.block_sparse_moe.experts.170.w1", "model.layers.18.block_sparse_moe.experts.171.w1", "model.layers.18.block_sparse_moe.experts.172.w1", "model.layers.18.block_sparse_moe.experts.173.w1", "model.layers.18.block_sparse_moe.experts.174.w1", "model.layers.18.block_sparse_moe.experts.175.w1", "model.layers.18.block_sparse_moe.experts.176.w1", "model.layers.18.block_sparse_moe.experts.177.w1", "model.layers.18.block_sparse_moe.experts.178.w1", "model.layers.18.block_sparse_moe.experts.179.w1", "model.layers.18.block_sparse_moe.experts.180.w1", "model.layers.18.block_sparse_moe.experts.181.w1", "model.layers.18.block_sparse_moe.experts.182.w1", "model.layers.18.block_sparse_moe.experts.183.w1", "model.layers.18.block_sparse_moe.experts.184.w1", "model.layers.18.block_sparse_moe.experts.185.w1", "model.layers.18.block_sparse_moe.experts.186.w1", "model.layers.18.block_sparse_moe.experts.187.w1", "model.layers.18.block_sparse_moe.experts.188.w1", "model.layers.18.block_sparse_moe.experts.189.w1", "model.layers.18.block_sparse_moe.experts.190.w1", "model.layers.18.block_sparse_moe.experts.191.w1", "model.layers.18.block_sparse_moe.experts.192.w1", "model.layers.18.block_sparse_moe.experts.193.w1", "model.layers.18.block_sparse_moe.experts.194.w1", "model.layers.18.block_sparse_moe.experts.195.w1", "model.layers.18.block_sparse_moe.experts.196.w1", "model.layers.18.block_sparse_moe.experts.197.w1", "model.layers.18.block_sparse_moe.experts.198.w1", "model.layers.18.block_sparse_moe.experts.199.w1", "model.layers.18.block_sparse_moe.experts.200.w1", "model.layers.18.block_sparse_moe.experts.201.w1", "model.layers.18.block_sparse_moe.experts.202.w1", "model.layers.18.block_sparse_moe.experts.203.w1", "model.layers.18.block_sparse_moe.experts.204.w1", "model.layers.18.block_sparse_moe.experts.205.w1", "model.layers.18.block_sparse_moe.experts.206.w1", "model.layers.18.block_sparse_moe.experts.207.w1", "model.layers.18.block_sparse_moe.experts.208.w1", "model.layers.18.block_sparse_moe.experts.209.w1", "model.layers.18.block_sparse_moe.experts.210.w1", "model.layers.18.block_sparse_moe.experts.211.w1", "model.layers.18.block_sparse_moe.experts.212.w1", "model.layers.18.block_sparse_moe.experts.213.w1", "model.layers.18.block_sparse_moe.experts.214.w1", "model.layers.18.block_sparse_moe.experts.215.w1", "model.layers.18.block_sparse_moe.experts.216.w1", "model.layers.18.block_sparse_moe.experts.217.w1", "model.layers.18.block_sparse_moe.experts.218.w1", "model.layers.18.block_sparse_moe.experts.219.w1", "model.layers.18.block_sparse_moe.experts.220.w1", "model.layers.18.block_sparse_moe.experts.221.w1", "model.layers.18.block_sparse_moe.experts.222.w1", "model.layers.18.block_sparse_moe.experts.223.w1", "model.layers.18.block_sparse_moe.experts.224.w1", "model.layers.18.block_sparse_moe.experts.225.w1", "model.layers.18.block_sparse_moe.experts.226.w1", "model.layers.18.block_sparse_moe.experts.227.w1", "model.layers.18.block_sparse_moe.experts.228.w1", "model.layers.18.block_sparse_moe.experts.229.w1", "model.layers.18.block_sparse_moe.experts.230.w1", "model.layers.18.block_sparse_moe.experts.231.w1", "model.layers.18.block_sparse_moe.experts.232.w1", "model.layers.18.block_sparse_moe.experts.233.w1", "model.layers.18.block_sparse_moe.experts.234.w1", "model.layers.18.block_sparse_moe.experts.235.w1", "model.layers.18.block_sparse_moe.experts.236.w1", "model.layers.18.block_sparse_moe.experts.237.w1", "model.layers.18.block_sparse_moe.experts.238.w1", "model.layers.18.block_sparse_moe.experts.239.w1", "model.layers.18.block_sparse_moe.experts.240.w1", "model.layers.18.block_sparse_moe.experts.241.w1", "model.layers.18.block_sparse_moe.experts.242.w1", "model.layers.18.block_sparse_moe.experts.243.w1", "model.layers.18.block_sparse_moe.experts.244.w1", "model.layers.18.block_sparse_moe.experts.245.w1", "model.layers.18.block_sparse_moe.experts.246.w1", "model.layers.18.block_sparse_moe.experts.247.w1", "model.layers.18.block_sparse_moe.experts.248.w1", "model.layers.18.block_sparse_moe.experts.249.w1", "model.layers.18.block_sparse_moe.experts.250.w1", "model.layers.18.block_sparse_moe.experts.251.w1", "model.layers.18.block_sparse_moe.experts.252.w1", "model.layers.18.block_sparse_moe.experts.253.w1", "model.layers.18.block_sparse_moe.experts.254.w1", "model.layers.18.block_sparse_moe.experts.255.w1", "model.layers.18.block_sparse_moe.experts.0.w3", "model.layers.18.block_sparse_moe.experts.1.w3", "model.layers.18.block_sparse_moe.experts.2.w3", "model.layers.18.block_sparse_moe.experts.3.w3", "model.layers.18.block_sparse_moe.experts.4.w3", "model.layers.18.block_sparse_moe.experts.5.w3", "model.layers.18.block_sparse_moe.experts.6.w3", "model.layers.18.block_sparse_moe.experts.7.w3", "model.layers.18.block_sparse_moe.experts.8.w3", "model.layers.18.block_sparse_moe.experts.9.w3", "model.layers.18.block_sparse_moe.experts.10.w3", "model.layers.18.block_sparse_moe.experts.11.w3", "model.layers.18.block_sparse_moe.experts.12.w3", "model.layers.18.block_sparse_moe.experts.13.w3", "model.layers.18.block_sparse_moe.experts.14.w3", "model.layers.18.block_sparse_moe.experts.15.w3", "model.layers.18.block_sparse_moe.experts.16.w3", "model.layers.18.block_sparse_moe.experts.17.w3", "model.layers.18.block_sparse_moe.experts.18.w3", "model.layers.18.block_sparse_moe.experts.19.w3", "model.layers.18.block_sparse_moe.experts.20.w3", "model.layers.18.block_sparse_moe.experts.21.w3", "model.layers.18.block_sparse_moe.experts.22.w3", "model.layers.18.block_sparse_moe.experts.23.w3", "model.layers.18.block_sparse_moe.experts.24.w3", "model.layers.18.block_sparse_moe.experts.25.w3", "model.layers.18.block_sparse_moe.experts.26.w3", "model.layers.18.block_sparse_moe.experts.27.w3", "model.layers.18.block_sparse_moe.experts.28.w3", "model.layers.18.block_sparse_moe.experts.29.w3", "model.layers.18.block_sparse_moe.experts.30.w3", "model.layers.18.block_sparse_moe.experts.31.w3", "model.layers.18.block_sparse_moe.experts.32.w3", "model.layers.18.block_sparse_moe.experts.33.w3", "model.layers.18.block_sparse_moe.experts.34.w3", "model.layers.18.block_sparse_moe.experts.35.w3", "model.layers.18.block_sparse_moe.experts.36.w3", "model.layers.18.block_sparse_moe.experts.37.w3", "model.layers.18.block_sparse_moe.experts.38.w3", "model.layers.18.block_sparse_moe.experts.39.w3", "model.layers.18.block_sparse_moe.experts.40.w3", "model.layers.18.block_sparse_moe.experts.41.w3", "model.layers.18.block_sparse_moe.experts.42.w3", "model.layers.18.block_sparse_moe.experts.43.w3", "model.layers.18.block_sparse_moe.experts.44.w3", "model.layers.18.block_sparse_moe.experts.45.w3", "model.layers.18.block_sparse_moe.experts.46.w3", "model.layers.18.block_sparse_moe.experts.47.w3", "model.layers.18.block_sparse_moe.experts.48.w3", "model.layers.18.block_sparse_moe.experts.49.w3", "model.layers.18.block_sparse_moe.experts.50.w3", "model.layers.18.block_sparse_moe.experts.51.w3", "model.layers.18.block_sparse_moe.experts.52.w3", "model.layers.18.block_sparse_moe.experts.53.w3", "model.layers.18.block_sparse_moe.experts.54.w3", "model.layers.18.block_sparse_moe.experts.55.w3", "model.layers.18.block_sparse_moe.experts.56.w3", "model.layers.18.block_sparse_moe.experts.57.w3", "model.layers.18.block_sparse_moe.experts.58.w3", "model.layers.18.block_sparse_moe.experts.59.w3", "model.layers.18.block_sparse_moe.experts.60.w3", "model.layers.18.block_sparse_moe.experts.61.w3", "model.layers.18.block_sparse_moe.experts.62.w3", "model.layers.18.block_sparse_moe.experts.63.w3", "model.layers.18.block_sparse_moe.experts.64.w3", "model.layers.18.block_sparse_moe.experts.65.w3", "model.layers.18.block_sparse_moe.experts.66.w3", "model.layers.18.block_sparse_moe.experts.67.w3", "model.layers.18.block_sparse_moe.experts.68.w3", "model.layers.18.block_sparse_moe.experts.69.w3", "model.layers.18.block_sparse_moe.experts.70.w3", "model.layers.18.block_sparse_moe.experts.71.w3", "model.layers.18.block_sparse_moe.experts.72.w3", "model.layers.18.block_sparse_moe.experts.73.w3", "model.layers.18.block_sparse_moe.experts.74.w3", "model.layers.18.block_sparse_moe.experts.75.w3", "model.layers.18.block_sparse_moe.experts.76.w3", "model.layers.18.block_sparse_moe.experts.77.w3", "model.layers.18.block_sparse_moe.experts.78.w3", "model.layers.18.block_sparse_moe.experts.79.w3", "model.layers.18.block_sparse_moe.experts.80.w3", "model.layers.18.block_sparse_moe.experts.81.w3", "model.layers.18.block_sparse_moe.experts.82.w3", "model.layers.18.block_sparse_moe.experts.83.w3", "model.layers.18.block_sparse_moe.experts.84.w3", "model.layers.18.block_sparse_moe.experts.85.w3", "model.layers.18.block_sparse_moe.experts.86.w3", "model.layers.18.block_sparse_moe.experts.87.w3", "model.layers.18.block_sparse_moe.experts.88.w3", "model.layers.18.block_sparse_moe.experts.89.w3", "model.layers.18.block_sparse_moe.experts.90.w3", "model.layers.18.block_sparse_moe.experts.91.w3", "model.layers.18.block_sparse_moe.experts.92.w3", "model.layers.18.block_sparse_moe.experts.93.w3", "model.layers.18.block_sparse_moe.experts.94.w3", "model.layers.18.block_sparse_moe.experts.95.w3", "model.layers.18.block_sparse_moe.experts.96.w3", "model.layers.18.block_sparse_moe.experts.97.w3", "model.layers.18.block_sparse_moe.experts.98.w3", "model.layers.18.block_sparse_moe.experts.99.w3", "model.layers.18.block_sparse_moe.experts.100.w3", "model.layers.18.block_sparse_moe.experts.101.w3", "model.layers.18.block_sparse_moe.experts.102.w3", "model.layers.18.block_sparse_moe.experts.103.w3", "model.layers.18.block_sparse_moe.experts.104.w3", "model.layers.18.block_sparse_moe.experts.105.w3", "model.layers.18.block_sparse_moe.experts.106.w3", "model.layers.18.block_sparse_moe.experts.107.w3", "model.layers.18.block_sparse_moe.experts.108.w3", "model.layers.18.block_sparse_moe.experts.109.w3", "model.layers.18.block_sparse_moe.experts.110.w3", "model.layers.18.block_sparse_moe.experts.111.w3", "model.layers.18.block_sparse_moe.experts.112.w3", "model.layers.18.block_sparse_moe.experts.113.w3", "model.layers.18.block_sparse_moe.experts.114.w3", "model.layers.18.block_sparse_moe.experts.115.w3", "model.layers.18.block_sparse_moe.experts.116.w3", "model.layers.18.block_sparse_moe.experts.117.w3", "model.layers.18.block_sparse_moe.experts.118.w3", "model.layers.18.block_sparse_moe.experts.119.w3", "model.layers.18.block_sparse_moe.experts.120.w3", "model.layers.18.block_sparse_moe.experts.121.w3", "model.layers.18.block_sparse_moe.experts.122.w3", "model.layers.18.block_sparse_moe.experts.123.w3", "model.layers.18.block_sparse_moe.experts.124.w3", "model.layers.18.block_sparse_moe.experts.125.w3", "model.layers.18.block_sparse_moe.experts.126.w3", "model.layers.18.block_sparse_moe.experts.127.w3", "model.layers.18.block_sparse_moe.experts.128.w3", "model.layers.18.block_sparse_moe.experts.129.w3", "model.layers.18.block_sparse_moe.experts.130.w3", "model.layers.18.block_sparse_moe.experts.131.w3", "model.layers.18.block_sparse_moe.experts.132.w3", "model.layers.18.block_sparse_moe.experts.133.w3", "model.layers.18.block_sparse_moe.experts.134.w3", "model.layers.18.block_sparse_moe.experts.135.w3", "model.layers.18.block_sparse_moe.experts.136.w3", "model.layers.18.block_sparse_moe.experts.137.w3", "model.layers.18.block_sparse_moe.experts.138.w3", "model.layers.18.block_sparse_moe.experts.139.w3", "model.layers.18.block_sparse_moe.experts.140.w3", "model.layers.18.block_sparse_moe.experts.141.w3", "model.layers.18.block_sparse_moe.experts.142.w3", "model.layers.18.block_sparse_moe.experts.143.w3", "model.layers.18.block_sparse_moe.experts.144.w3", "model.layers.18.block_sparse_moe.experts.145.w3", "model.layers.18.block_sparse_moe.experts.146.w3", "model.layers.18.block_sparse_moe.experts.147.w3", "model.layers.18.block_sparse_moe.experts.148.w3", "model.layers.18.block_sparse_moe.experts.149.w3", "model.layers.18.block_sparse_moe.experts.150.w3", "model.layers.18.block_sparse_moe.experts.151.w3", "model.layers.18.block_sparse_moe.experts.152.w3", "model.layers.18.block_sparse_moe.experts.153.w3", "model.layers.18.block_sparse_moe.experts.154.w3", "model.layers.18.block_sparse_moe.experts.155.w3", "model.layers.18.block_sparse_moe.experts.156.w3", "model.layers.18.block_sparse_moe.experts.157.w3", "model.layers.18.block_sparse_moe.experts.158.w3", "model.layers.18.block_sparse_moe.experts.159.w3", "model.layers.18.block_sparse_moe.experts.160.w3", "model.layers.18.block_sparse_moe.experts.161.w3", "model.layers.18.block_sparse_moe.experts.162.w3", "model.layers.18.block_sparse_moe.experts.163.w3", "model.layers.18.block_sparse_moe.experts.164.w3", "model.layers.18.block_sparse_moe.experts.165.w3", "model.layers.18.block_sparse_moe.experts.166.w3", "model.layers.18.block_sparse_moe.experts.167.w3", "model.layers.18.block_sparse_moe.experts.168.w3", "model.layers.18.block_sparse_moe.experts.169.w3", "model.layers.18.block_sparse_moe.experts.170.w3", "model.layers.18.block_sparse_moe.experts.171.w3", "model.layers.18.block_sparse_moe.experts.172.w3", "model.layers.18.block_sparse_moe.experts.173.w3", "model.layers.18.block_sparse_moe.experts.174.w3", "model.layers.18.block_sparse_moe.experts.175.w3", "model.layers.18.block_sparse_moe.experts.176.w3", "model.layers.18.block_sparse_moe.experts.177.w3", "model.layers.18.block_sparse_moe.experts.178.w3", "model.layers.18.block_sparse_moe.experts.179.w3", "model.layers.18.block_sparse_moe.experts.180.w3", "model.layers.18.block_sparse_moe.experts.181.w3", "model.layers.18.block_sparse_moe.experts.182.w3", "model.layers.18.block_sparse_moe.experts.183.w3", "model.layers.18.block_sparse_moe.experts.184.w3", "model.layers.18.block_sparse_moe.experts.185.w3", "model.layers.18.block_sparse_moe.experts.186.w3", "model.layers.18.block_sparse_moe.experts.187.w3", "model.layers.18.block_sparse_moe.experts.188.w3", "model.layers.18.block_sparse_moe.experts.189.w3", "model.layers.18.block_sparse_moe.experts.190.w3", "model.layers.18.block_sparse_moe.experts.191.w3", "model.layers.18.block_sparse_moe.experts.192.w3", "model.layers.18.block_sparse_moe.experts.193.w3", "model.layers.18.block_sparse_moe.experts.194.w3", "model.layers.18.block_sparse_moe.experts.195.w3", "model.layers.18.block_sparse_moe.experts.196.w3", "model.layers.18.block_sparse_moe.experts.197.w3", "model.layers.18.block_sparse_moe.experts.198.w3", "model.layers.18.block_sparse_moe.experts.199.w3", "model.layers.18.block_sparse_moe.experts.200.w3", "model.layers.18.block_sparse_moe.experts.201.w3", "model.layers.18.block_sparse_moe.experts.202.w3", "model.layers.18.block_sparse_moe.experts.203.w3", "model.layers.18.block_sparse_moe.experts.204.w3", "model.layers.18.block_sparse_moe.experts.205.w3", "model.layers.18.block_sparse_moe.experts.206.w3", "model.layers.18.block_sparse_moe.experts.207.w3", "model.layers.18.block_sparse_moe.experts.208.w3", "model.layers.18.block_sparse_moe.experts.209.w3", "model.layers.18.block_sparse_moe.experts.210.w3", "model.layers.18.block_sparse_moe.experts.211.w3", "model.layers.18.block_sparse_moe.experts.212.w3", "model.layers.18.block_sparse_moe.experts.213.w3", "model.layers.18.block_sparse_moe.experts.214.w3", "model.layers.18.block_sparse_moe.experts.215.w3", "model.layers.18.block_sparse_moe.experts.216.w3", "model.layers.18.block_sparse_moe.experts.217.w3", "model.layers.18.block_sparse_moe.experts.218.w3", "model.layers.18.block_sparse_moe.experts.219.w3", "model.layers.18.block_sparse_moe.experts.220.w3", "model.layers.18.block_sparse_moe.experts.221.w3", "model.layers.18.block_sparse_moe.experts.222.w3", "model.layers.18.block_sparse_moe.experts.223.w3", "model.layers.18.block_sparse_moe.experts.224.w3", "model.layers.18.block_sparse_moe.experts.225.w3", "model.layers.18.block_sparse_moe.experts.226.w3", "model.layers.18.block_sparse_moe.experts.227.w3", "model.layers.18.block_sparse_moe.experts.228.w3", "model.layers.18.block_sparse_moe.experts.229.w3", "model.layers.18.block_sparse_moe.experts.230.w3", "model.layers.18.block_sparse_moe.experts.231.w3", "model.layers.18.block_sparse_moe.experts.232.w3", "model.layers.18.block_sparse_moe.experts.233.w3", "model.layers.18.block_sparse_moe.experts.234.w3", "model.layers.18.block_sparse_moe.experts.235.w3", "model.layers.18.block_sparse_moe.experts.236.w3", "model.layers.18.block_sparse_moe.experts.237.w3", "model.layers.18.block_sparse_moe.experts.238.w3", "model.layers.18.block_sparse_moe.experts.239.w3", "model.layers.18.block_sparse_moe.experts.240.w3", "model.layers.18.block_sparse_moe.experts.241.w3", "model.layers.18.block_sparse_moe.experts.242.w3", "model.layers.18.block_sparse_moe.experts.243.w3", "model.layers.18.block_sparse_moe.experts.244.w3", "model.layers.18.block_sparse_moe.experts.245.w3", "model.layers.18.block_sparse_moe.experts.246.w3", "model.layers.18.block_sparse_moe.experts.247.w3", "model.layers.18.block_sparse_moe.experts.248.w3", "model.layers.18.block_sparse_moe.experts.249.w3", "model.layers.18.block_sparse_moe.experts.250.w3", "model.layers.18.block_sparse_moe.experts.251.w3", "model.layers.18.block_sparse_moe.experts.252.w3", "model.layers.18.block_sparse_moe.experts.253.w3", "model.layers.18.block_sparse_moe.experts.254.w3", "model.layers.18.block_sparse_moe.experts.255.w3", "model.layers.18.block_sparse_moe.experts.0.w2", "model.layers.18.block_sparse_moe.experts.1.w2", "model.layers.18.block_sparse_moe.experts.2.w2", "model.layers.18.block_sparse_moe.experts.3.w2", "model.layers.18.block_sparse_moe.experts.4.w2", "model.layers.18.block_sparse_moe.experts.5.w2", "model.layers.18.block_sparse_moe.experts.6.w2", "model.layers.18.block_sparse_moe.experts.7.w2", "model.layers.18.block_sparse_moe.experts.8.w2", "model.layers.18.block_sparse_moe.experts.9.w2", "model.layers.18.block_sparse_moe.experts.10.w2", "model.layers.18.block_sparse_moe.experts.11.w2", "model.layers.18.block_sparse_moe.experts.12.w2", "model.layers.18.block_sparse_moe.experts.13.w2", "model.layers.18.block_sparse_moe.experts.14.w2", "model.layers.18.block_sparse_moe.experts.15.w2", "model.layers.18.block_sparse_moe.experts.16.w2", "model.layers.18.block_sparse_moe.experts.17.w2", "model.layers.18.block_sparse_moe.experts.18.w2", "model.layers.18.block_sparse_moe.experts.19.w2", "model.layers.18.block_sparse_moe.experts.20.w2", "model.layers.18.block_sparse_moe.experts.21.w2", "model.layers.18.block_sparse_moe.experts.22.w2", "model.layers.18.block_sparse_moe.experts.23.w2", "model.layers.18.block_sparse_moe.experts.24.w2", "model.layers.18.block_sparse_moe.experts.25.w2", "model.layers.18.block_sparse_moe.experts.26.w2", "model.layers.18.block_sparse_moe.experts.27.w2", "model.layers.18.block_sparse_moe.experts.28.w2", "model.layers.18.block_sparse_moe.experts.29.w2", "model.layers.18.block_sparse_moe.experts.30.w2", "model.layers.18.block_sparse_moe.experts.31.w2", "model.layers.18.block_sparse_moe.experts.32.w2", "model.layers.18.block_sparse_moe.experts.33.w2", "model.layers.18.block_sparse_moe.experts.34.w2", "model.layers.18.block_sparse_moe.experts.35.w2", "model.layers.18.block_sparse_moe.experts.36.w2", "model.layers.18.block_sparse_moe.experts.37.w2", "model.layers.18.block_sparse_moe.experts.38.w2", "model.layers.18.block_sparse_moe.experts.39.w2", "model.layers.18.block_sparse_moe.experts.40.w2", "model.layers.18.block_sparse_moe.experts.41.w2", "model.layers.18.block_sparse_moe.experts.42.w2", "model.layers.18.block_sparse_moe.experts.43.w2", "model.layers.18.block_sparse_moe.experts.44.w2", "model.layers.18.block_sparse_moe.experts.45.w2", "model.layers.18.block_sparse_moe.experts.46.w2", "model.layers.18.block_sparse_moe.experts.47.w2", "model.layers.18.block_sparse_moe.experts.48.w2", "model.layers.18.block_sparse_moe.experts.49.w2", "model.layers.18.block_sparse_moe.experts.50.w2", "model.layers.18.block_sparse_moe.experts.51.w2", "model.layers.18.block_sparse_moe.experts.52.w2", "model.layers.18.block_sparse_moe.experts.53.w2", "model.layers.18.block_sparse_moe.experts.54.w2", "model.layers.18.block_sparse_moe.experts.55.w2", "model.layers.18.block_sparse_moe.experts.56.w2", "model.layers.18.block_sparse_moe.experts.57.w2", "model.layers.18.block_sparse_moe.experts.58.w2", "model.layers.18.block_sparse_moe.experts.59.w2", "model.layers.18.block_sparse_moe.experts.60.w2", "model.layers.18.block_sparse_moe.experts.61.w2", "model.layers.18.block_sparse_moe.experts.62.w2", "model.layers.18.block_sparse_moe.experts.63.w2", "model.layers.18.block_sparse_moe.experts.64.w2", "model.layers.18.block_sparse_moe.experts.65.w2", "model.layers.18.block_sparse_moe.experts.66.w2", "model.layers.18.block_sparse_moe.experts.67.w2", "model.layers.18.block_sparse_moe.experts.68.w2", "model.layers.18.block_sparse_moe.experts.69.w2", "model.layers.18.block_sparse_moe.experts.70.w2", "model.layers.18.block_sparse_moe.experts.71.w2", "model.layers.18.block_sparse_moe.experts.72.w2", "model.layers.18.block_sparse_moe.experts.73.w2", "model.layers.18.block_sparse_moe.experts.74.w2", "model.layers.18.block_sparse_moe.experts.75.w2", "model.layers.18.block_sparse_moe.experts.76.w2", "model.layers.18.block_sparse_moe.experts.77.w2", "model.layers.18.block_sparse_moe.experts.78.w2", "model.layers.18.block_sparse_moe.experts.79.w2", "model.layers.18.block_sparse_moe.experts.80.w2", "model.layers.18.block_sparse_moe.experts.81.w2", "model.layers.18.block_sparse_moe.experts.82.w2", "model.layers.18.block_sparse_moe.experts.83.w2", "model.layers.18.block_sparse_moe.experts.84.w2", "model.layers.18.block_sparse_moe.experts.85.w2", "model.layers.18.block_sparse_moe.experts.86.w2", "model.layers.18.block_sparse_moe.experts.87.w2", "model.layers.18.block_sparse_moe.experts.88.w2", "model.layers.18.block_sparse_moe.experts.89.w2", "model.layers.18.block_sparse_moe.experts.90.w2", "model.layers.18.block_sparse_moe.experts.91.w2", "model.layers.18.block_sparse_moe.experts.92.w2", "model.layers.18.block_sparse_moe.experts.93.w2", "model.layers.18.block_sparse_moe.experts.94.w2", "model.layers.18.block_sparse_moe.experts.95.w2", "model.layers.18.block_sparse_moe.experts.96.w2", "model.layers.18.block_sparse_moe.experts.97.w2", "model.layers.18.block_sparse_moe.experts.98.w2", "model.layers.18.block_sparse_moe.experts.99.w2", "model.layers.18.block_sparse_moe.experts.100.w2", "model.layers.18.block_sparse_moe.experts.101.w2", "model.layers.18.block_sparse_moe.experts.102.w2", "model.layers.18.block_sparse_moe.experts.103.w2", "model.layers.18.block_sparse_moe.experts.104.w2", "model.layers.18.block_sparse_moe.experts.105.w2", "model.layers.18.block_sparse_moe.experts.106.w2", "model.layers.18.block_sparse_moe.experts.107.w2", "model.layers.18.block_sparse_moe.experts.108.w2", "model.layers.18.block_sparse_moe.experts.109.w2", "model.layers.18.block_sparse_moe.experts.110.w2", "model.layers.18.block_sparse_moe.experts.111.w2", "model.layers.18.block_sparse_moe.experts.112.w2", "model.layers.18.block_sparse_moe.experts.113.w2", "model.layers.18.block_sparse_moe.experts.114.w2", "model.layers.18.block_sparse_moe.experts.115.w2", "model.layers.18.block_sparse_moe.experts.116.w2", "model.layers.18.block_sparse_moe.experts.117.w2", "model.layers.18.block_sparse_moe.experts.118.w2", "model.layers.18.block_sparse_moe.experts.119.w2", "model.layers.18.block_sparse_moe.experts.120.w2", "model.layers.18.block_sparse_moe.experts.121.w2", "model.layers.18.block_sparse_moe.experts.122.w2", "model.layers.18.block_sparse_moe.experts.123.w2", "model.layers.18.block_sparse_moe.experts.124.w2", "model.layers.18.block_sparse_moe.experts.125.w2", "model.layers.18.block_sparse_moe.experts.126.w2", "model.layers.18.block_sparse_moe.experts.127.w2", "model.layers.18.block_sparse_moe.experts.128.w2", "model.layers.18.block_sparse_moe.experts.129.w2", "model.layers.18.block_sparse_moe.experts.130.w2", "model.layers.18.block_sparse_moe.experts.131.w2", "model.layers.18.block_sparse_moe.experts.132.w2", "model.layers.18.block_sparse_moe.experts.133.w2", "model.layers.18.block_sparse_moe.experts.134.w2", "model.layers.18.block_sparse_moe.experts.135.w2", "model.layers.18.block_sparse_moe.experts.136.w2", "model.layers.18.block_sparse_moe.experts.137.w2", "model.layers.18.block_sparse_moe.experts.138.w2", "model.layers.18.block_sparse_moe.experts.139.w2", "model.layers.18.block_sparse_moe.experts.140.w2", "model.layers.18.block_sparse_moe.experts.141.w2", "model.layers.18.block_sparse_moe.experts.142.w2", "model.layers.18.block_sparse_moe.experts.143.w2", "model.layers.18.block_sparse_moe.experts.144.w2", "model.layers.18.block_sparse_moe.experts.145.w2", "model.layers.18.block_sparse_moe.experts.146.w2", "model.layers.18.block_sparse_moe.experts.147.w2", "model.layers.18.block_sparse_moe.experts.148.w2", "model.layers.18.block_sparse_moe.experts.149.w2", "model.layers.18.block_sparse_moe.experts.150.w2", "model.layers.18.block_sparse_moe.experts.151.w2", "model.layers.18.block_sparse_moe.experts.152.w2", "model.layers.18.block_sparse_moe.experts.153.w2", "model.layers.18.block_sparse_moe.experts.154.w2", "model.layers.18.block_sparse_moe.experts.155.w2", "model.layers.18.block_sparse_moe.experts.156.w2", "model.layers.18.block_sparse_moe.experts.157.w2", "model.layers.18.block_sparse_moe.experts.158.w2", "model.layers.18.block_sparse_moe.experts.159.w2", "model.layers.18.block_sparse_moe.experts.160.w2", "model.layers.18.block_sparse_moe.experts.161.w2", "model.layers.18.block_sparse_moe.experts.162.w2", "model.layers.18.block_sparse_moe.experts.163.w2", "model.layers.18.block_sparse_moe.experts.164.w2", "model.layers.18.block_sparse_moe.experts.165.w2", "model.layers.18.block_sparse_moe.experts.166.w2", "model.layers.18.block_sparse_moe.experts.167.w2", "model.layers.18.block_sparse_moe.experts.168.w2", "model.layers.18.block_sparse_moe.experts.169.w2", "model.layers.18.block_sparse_moe.experts.170.w2", "model.layers.18.block_sparse_moe.experts.171.w2", "model.layers.18.block_sparse_moe.experts.172.w2", "model.layers.18.block_sparse_moe.experts.173.w2", "model.layers.18.block_sparse_moe.experts.174.w2", "model.layers.18.block_sparse_moe.experts.175.w2", "model.layers.18.block_sparse_moe.experts.176.w2", "model.layers.18.block_sparse_moe.experts.177.w2", "model.layers.18.block_sparse_moe.experts.178.w2", "model.layers.18.block_sparse_moe.experts.179.w2", "model.layers.18.block_sparse_moe.experts.180.w2", "model.layers.18.block_sparse_moe.experts.181.w2", "model.layers.18.block_sparse_moe.experts.182.w2", "model.layers.18.block_sparse_moe.experts.183.w2", "model.layers.18.block_sparse_moe.experts.184.w2", "model.layers.18.block_sparse_moe.experts.185.w2", "model.layers.18.block_sparse_moe.experts.186.w2", "model.layers.18.block_sparse_moe.experts.187.w2", "model.layers.18.block_sparse_moe.experts.188.w2", "model.layers.18.block_sparse_moe.experts.189.w2", "model.layers.18.block_sparse_moe.experts.190.w2", "model.layers.18.block_sparse_moe.experts.191.w2", "model.layers.18.block_sparse_moe.experts.192.w2", "model.layers.18.block_sparse_moe.experts.193.w2", "model.layers.18.block_sparse_moe.experts.194.w2", "model.layers.18.block_sparse_moe.experts.195.w2", "model.layers.18.block_sparse_moe.experts.196.w2", "model.layers.18.block_sparse_moe.experts.197.w2", "model.layers.18.block_sparse_moe.experts.198.w2", "model.layers.18.block_sparse_moe.experts.199.w2", "model.layers.18.block_sparse_moe.experts.200.w2", "model.layers.18.block_sparse_moe.experts.201.w2", "model.layers.18.block_sparse_moe.experts.202.w2", "model.layers.18.block_sparse_moe.experts.203.w2", "model.layers.18.block_sparse_moe.experts.204.w2", "model.layers.18.block_sparse_moe.experts.205.w2", "model.layers.18.block_sparse_moe.experts.206.w2", "model.layers.18.block_sparse_moe.experts.207.w2", "model.layers.18.block_sparse_moe.experts.208.w2", "model.layers.18.block_sparse_moe.experts.209.w2", "model.layers.18.block_sparse_moe.experts.210.w2", "model.layers.18.block_sparse_moe.experts.211.w2", "model.layers.18.block_sparse_moe.experts.212.w2", "model.layers.18.block_sparse_moe.experts.213.w2", "model.layers.18.block_sparse_moe.experts.214.w2", "model.layers.18.block_sparse_moe.experts.215.w2", "model.layers.18.block_sparse_moe.experts.216.w2", "model.layers.18.block_sparse_moe.experts.217.w2", "model.layers.18.block_sparse_moe.experts.218.w2", "model.layers.18.block_sparse_moe.experts.219.w2", "model.layers.18.block_sparse_moe.experts.220.w2", "model.layers.18.block_sparse_moe.experts.221.w2", "model.layers.18.block_sparse_moe.experts.222.w2", "model.layers.18.block_sparse_moe.experts.223.w2", "model.layers.18.block_sparse_moe.experts.224.w2", "model.layers.18.block_sparse_moe.experts.225.w2", "model.layers.18.block_sparse_moe.experts.226.w2", "model.layers.18.block_sparse_moe.experts.227.w2", "model.layers.18.block_sparse_moe.experts.228.w2", "model.layers.18.block_sparse_moe.experts.229.w2", "model.layers.18.block_sparse_moe.experts.230.w2", "model.layers.18.block_sparse_moe.experts.231.w2", "model.layers.18.block_sparse_moe.experts.232.w2", "model.layers.18.block_sparse_moe.experts.233.w2", "model.layers.18.block_sparse_moe.experts.234.w2", "model.layers.18.block_sparse_moe.experts.235.w2", "model.layers.18.block_sparse_moe.experts.236.w2", "model.layers.18.block_sparse_moe.experts.237.w2", "model.layers.18.block_sparse_moe.experts.238.w2", "model.layers.18.block_sparse_moe.experts.239.w2", "model.layers.18.block_sparse_moe.experts.240.w2", "model.layers.18.block_sparse_moe.experts.241.w2", "model.layers.18.block_sparse_moe.experts.242.w2", "model.layers.18.block_sparse_moe.experts.243.w2", "model.layers.18.block_sparse_moe.experts.244.w2", "model.layers.18.block_sparse_moe.experts.245.w2", "model.layers.18.block_sparse_moe.experts.246.w2", "model.layers.18.block_sparse_moe.experts.247.w2", "model.layers.18.block_sparse_moe.experts.248.w2", "model.layers.18.block_sparse_moe.experts.249.w2", "model.layers.18.block_sparse_moe.experts.250.w2", "model.layers.18.block_sparse_moe.experts.251.w2", "model.layers.18.block_sparse_moe.experts.252.w2", "model.layers.18.block_sparse_moe.experts.253.w2", "model.layers.18.block_sparse_moe.experts.254.w2", "model.layers.18.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0007432155311107302, "dbits": 3623878656 } ] }, { "idx": 38, "layers": [ "model.layers.19.self_attn.q_proj", "model.layers.19.self_attn.k_proj", "model.layers.19.self_attn.v_proj", "model.layers.19.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0002934331074357366, "dbits": 44040192 } ] }, { "idx": 39, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w1", "model.layers.19.block_sparse_moe.experts.1.w1", "model.layers.19.block_sparse_moe.experts.2.w1", "model.layers.19.block_sparse_moe.experts.3.w1", "model.layers.19.block_sparse_moe.experts.4.w1", "model.layers.19.block_sparse_moe.experts.5.w1", "model.layers.19.block_sparse_moe.experts.6.w1", "model.layers.19.block_sparse_moe.experts.7.w1", "model.layers.19.block_sparse_moe.experts.8.w1", "model.layers.19.block_sparse_moe.experts.9.w1", "model.layers.19.block_sparse_moe.experts.10.w1", "model.layers.19.block_sparse_moe.experts.11.w1", "model.layers.19.block_sparse_moe.experts.12.w1", "model.layers.19.block_sparse_moe.experts.13.w1", "model.layers.19.block_sparse_moe.experts.14.w1", "model.layers.19.block_sparse_moe.experts.15.w1", "model.layers.19.block_sparse_moe.experts.16.w1", "model.layers.19.block_sparse_moe.experts.17.w1", "model.layers.19.block_sparse_moe.experts.18.w1", "model.layers.19.block_sparse_moe.experts.19.w1", "model.layers.19.block_sparse_moe.experts.20.w1", "model.layers.19.block_sparse_moe.experts.21.w1", "model.layers.19.block_sparse_moe.experts.22.w1", "model.layers.19.block_sparse_moe.experts.23.w1", "model.layers.19.block_sparse_moe.experts.24.w1", "model.layers.19.block_sparse_moe.experts.25.w1", "model.layers.19.block_sparse_moe.experts.26.w1", "model.layers.19.block_sparse_moe.experts.27.w1", "model.layers.19.block_sparse_moe.experts.28.w1", "model.layers.19.block_sparse_moe.experts.29.w1", "model.layers.19.block_sparse_moe.experts.30.w1", "model.layers.19.block_sparse_moe.experts.31.w1", "model.layers.19.block_sparse_moe.experts.32.w1", "model.layers.19.block_sparse_moe.experts.33.w1", "model.layers.19.block_sparse_moe.experts.34.w1", "model.layers.19.block_sparse_moe.experts.35.w1", "model.layers.19.block_sparse_moe.experts.36.w1", "model.layers.19.block_sparse_moe.experts.37.w1", "model.layers.19.block_sparse_moe.experts.38.w1", "model.layers.19.block_sparse_moe.experts.39.w1", "model.layers.19.block_sparse_moe.experts.40.w1", "model.layers.19.block_sparse_moe.experts.41.w1", "model.layers.19.block_sparse_moe.experts.42.w1", "model.layers.19.block_sparse_moe.experts.43.w1", "model.layers.19.block_sparse_moe.experts.44.w1", "model.layers.19.block_sparse_moe.experts.45.w1", "model.layers.19.block_sparse_moe.experts.46.w1", "model.layers.19.block_sparse_moe.experts.47.w1", "model.layers.19.block_sparse_moe.experts.48.w1", "model.layers.19.block_sparse_moe.experts.49.w1", "model.layers.19.block_sparse_moe.experts.50.w1", "model.layers.19.block_sparse_moe.experts.51.w1", "model.layers.19.block_sparse_moe.experts.52.w1", "model.layers.19.block_sparse_moe.experts.53.w1", "model.layers.19.block_sparse_moe.experts.54.w1", "model.layers.19.block_sparse_moe.experts.55.w1", "model.layers.19.block_sparse_moe.experts.56.w1", "model.layers.19.block_sparse_moe.experts.57.w1", "model.layers.19.block_sparse_moe.experts.58.w1", "model.layers.19.block_sparse_moe.experts.59.w1", "model.layers.19.block_sparse_moe.experts.60.w1", "model.layers.19.block_sparse_moe.experts.61.w1", "model.layers.19.block_sparse_moe.experts.62.w1", "model.layers.19.block_sparse_moe.experts.63.w1", "model.layers.19.block_sparse_moe.experts.64.w1", "model.layers.19.block_sparse_moe.experts.65.w1", "model.layers.19.block_sparse_moe.experts.66.w1", "model.layers.19.block_sparse_moe.experts.67.w1", "model.layers.19.block_sparse_moe.experts.68.w1", "model.layers.19.block_sparse_moe.experts.69.w1", "model.layers.19.block_sparse_moe.experts.70.w1", "model.layers.19.block_sparse_moe.experts.71.w1", "model.layers.19.block_sparse_moe.experts.72.w1", "model.layers.19.block_sparse_moe.experts.73.w1", "model.layers.19.block_sparse_moe.experts.74.w1", "model.layers.19.block_sparse_moe.experts.75.w1", "model.layers.19.block_sparse_moe.experts.76.w1", "model.layers.19.block_sparse_moe.experts.77.w1", "model.layers.19.block_sparse_moe.experts.78.w1", "model.layers.19.block_sparse_moe.experts.79.w1", "model.layers.19.block_sparse_moe.experts.80.w1", "model.layers.19.block_sparse_moe.experts.81.w1", "model.layers.19.block_sparse_moe.experts.82.w1", "model.layers.19.block_sparse_moe.experts.83.w1", "model.layers.19.block_sparse_moe.experts.84.w1", "model.layers.19.block_sparse_moe.experts.85.w1", "model.layers.19.block_sparse_moe.experts.86.w1", "model.layers.19.block_sparse_moe.experts.87.w1", "model.layers.19.block_sparse_moe.experts.88.w1", "model.layers.19.block_sparse_moe.experts.89.w1", "model.layers.19.block_sparse_moe.experts.90.w1", "model.layers.19.block_sparse_moe.experts.91.w1", "model.layers.19.block_sparse_moe.experts.92.w1", "model.layers.19.block_sparse_moe.experts.93.w1", "model.layers.19.block_sparse_moe.experts.94.w1", "model.layers.19.block_sparse_moe.experts.95.w1", "model.layers.19.block_sparse_moe.experts.96.w1", "model.layers.19.block_sparse_moe.experts.97.w1", "model.layers.19.block_sparse_moe.experts.98.w1", "model.layers.19.block_sparse_moe.experts.99.w1", "model.layers.19.block_sparse_moe.experts.100.w1", "model.layers.19.block_sparse_moe.experts.101.w1", "model.layers.19.block_sparse_moe.experts.102.w1", "model.layers.19.block_sparse_moe.experts.103.w1", "model.layers.19.block_sparse_moe.experts.104.w1", "model.layers.19.block_sparse_moe.experts.105.w1", "model.layers.19.block_sparse_moe.experts.106.w1", "model.layers.19.block_sparse_moe.experts.107.w1", "model.layers.19.block_sparse_moe.experts.108.w1", "model.layers.19.block_sparse_moe.experts.109.w1", "model.layers.19.block_sparse_moe.experts.110.w1", "model.layers.19.block_sparse_moe.experts.111.w1", "model.layers.19.block_sparse_moe.experts.112.w1", "model.layers.19.block_sparse_moe.experts.113.w1", "model.layers.19.block_sparse_moe.experts.114.w1", "model.layers.19.block_sparse_moe.experts.115.w1", "model.layers.19.block_sparse_moe.experts.116.w1", "model.layers.19.block_sparse_moe.experts.117.w1", "model.layers.19.block_sparse_moe.experts.118.w1", "model.layers.19.block_sparse_moe.experts.119.w1", "model.layers.19.block_sparse_moe.experts.120.w1", "model.layers.19.block_sparse_moe.experts.121.w1", "model.layers.19.block_sparse_moe.experts.122.w1", "model.layers.19.block_sparse_moe.experts.123.w1", "model.layers.19.block_sparse_moe.experts.124.w1", "model.layers.19.block_sparse_moe.experts.125.w1", "model.layers.19.block_sparse_moe.experts.126.w1", "model.layers.19.block_sparse_moe.experts.127.w1", "model.layers.19.block_sparse_moe.experts.128.w1", "model.layers.19.block_sparse_moe.experts.129.w1", "model.layers.19.block_sparse_moe.experts.130.w1", "model.layers.19.block_sparse_moe.experts.131.w1", "model.layers.19.block_sparse_moe.experts.132.w1", "model.layers.19.block_sparse_moe.experts.133.w1", "model.layers.19.block_sparse_moe.experts.134.w1", "model.layers.19.block_sparse_moe.experts.135.w1", "model.layers.19.block_sparse_moe.experts.136.w1", "model.layers.19.block_sparse_moe.experts.137.w1", "model.layers.19.block_sparse_moe.experts.138.w1", "model.layers.19.block_sparse_moe.experts.139.w1", "model.layers.19.block_sparse_moe.experts.140.w1", "model.layers.19.block_sparse_moe.experts.141.w1", "model.layers.19.block_sparse_moe.experts.142.w1", "model.layers.19.block_sparse_moe.experts.143.w1", "model.layers.19.block_sparse_moe.experts.144.w1", "model.layers.19.block_sparse_moe.experts.145.w1", "model.layers.19.block_sparse_moe.experts.146.w1", "model.layers.19.block_sparse_moe.experts.147.w1", "model.layers.19.block_sparse_moe.experts.148.w1", "model.layers.19.block_sparse_moe.experts.149.w1", "model.layers.19.block_sparse_moe.experts.150.w1", "model.layers.19.block_sparse_moe.experts.151.w1", "model.layers.19.block_sparse_moe.experts.152.w1", "model.layers.19.block_sparse_moe.experts.153.w1", "model.layers.19.block_sparse_moe.experts.154.w1", "model.layers.19.block_sparse_moe.experts.155.w1", "model.layers.19.block_sparse_moe.experts.156.w1", "model.layers.19.block_sparse_moe.experts.157.w1", "model.layers.19.block_sparse_moe.experts.158.w1", "model.layers.19.block_sparse_moe.experts.159.w1", "model.layers.19.block_sparse_moe.experts.160.w1", "model.layers.19.block_sparse_moe.experts.161.w1", "model.layers.19.block_sparse_moe.experts.162.w1", "model.layers.19.block_sparse_moe.experts.163.w1", "model.layers.19.block_sparse_moe.experts.164.w1", "model.layers.19.block_sparse_moe.experts.165.w1", "model.layers.19.block_sparse_moe.experts.166.w1", "model.layers.19.block_sparse_moe.experts.167.w1", "model.layers.19.block_sparse_moe.experts.168.w1", "model.layers.19.block_sparse_moe.experts.169.w1", "model.layers.19.block_sparse_moe.experts.170.w1", "model.layers.19.block_sparse_moe.experts.171.w1", "model.layers.19.block_sparse_moe.experts.172.w1", "model.layers.19.block_sparse_moe.experts.173.w1", "model.layers.19.block_sparse_moe.experts.174.w1", "model.layers.19.block_sparse_moe.experts.175.w1", "model.layers.19.block_sparse_moe.experts.176.w1", "model.layers.19.block_sparse_moe.experts.177.w1", "model.layers.19.block_sparse_moe.experts.178.w1", "model.layers.19.block_sparse_moe.experts.179.w1", "model.layers.19.block_sparse_moe.experts.180.w1", "model.layers.19.block_sparse_moe.experts.181.w1", "model.layers.19.block_sparse_moe.experts.182.w1", "model.layers.19.block_sparse_moe.experts.183.w1", "model.layers.19.block_sparse_moe.experts.184.w1", "model.layers.19.block_sparse_moe.experts.185.w1", "model.layers.19.block_sparse_moe.experts.186.w1", "model.layers.19.block_sparse_moe.experts.187.w1", "model.layers.19.block_sparse_moe.experts.188.w1", "model.layers.19.block_sparse_moe.experts.189.w1", "model.layers.19.block_sparse_moe.experts.190.w1", "model.layers.19.block_sparse_moe.experts.191.w1", "model.layers.19.block_sparse_moe.experts.192.w1", "model.layers.19.block_sparse_moe.experts.193.w1", "model.layers.19.block_sparse_moe.experts.194.w1", "model.layers.19.block_sparse_moe.experts.195.w1", "model.layers.19.block_sparse_moe.experts.196.w1", "model.layers.19.block_sparse_moe.experts.197.w1", "model.layers.19.block_sparse_moe.experts.198.w1", "model.layers.19.block_sparse_moe.experts.199.w1", "model.layers.19.block_sparse_moe.experts.200.w1", "model.layers.19.block_sparse_moe.experts.201.w1", "model.layers.19.block_sparse_moe.experts.202.w1", "model.layers.19.block_sparse_moe.experts.203.w1", "model.layers.19.block_sparse_moe.experts.204.w1", "model.layers.19.block_sparse_moe.experts.205.w1", "model.layers.19.block_sparse_moe.experts.206.w1", "model.layers.19.block_sparse_moe.experts.207.w1", "model.layers.19.block_sparse_moe.experts.208.w1", "model.layers.19.block_sparse_moe.experts.209.w1", "model.layers.19.block_sparse_moe.experts.210.w1", "model.layers.19.block_sparse_moe.experts.211.w1", "model.layers.19.block_sparse_moe.experts.212.w1", "model.layers.19.block_sparse_moe.experts.213.w1", "model.layers.19.block_sparse_moe.experts.214.w1", "model.layers.19.block_sparse_moe.experts.215.w1", "model.layers.19.block_sparse_moe.experts.216.w1", "model.layers.19.block_sparse_moe.experts.217.w1", "model.layers.19.block_sparse_moe.experts.218.w1", "model.layers.19.block_sparse_moe.experts.219.w1", "model.layers.19.block_sparse_moe.experts.220.w1", "model.layers.19.block_sparse_moe.experts.221.w1", "model.layers.19.block_sparse_moe.experts.222.w1", "model.layers.19.block_sparse_moe.experts.223.w1", "model.layers.19.block_sparse_moe.experts.224.w1", "model.layers.19.block_sparse_moe.experts.225.w1", "model.layers.19.block_sparse_moe.experts.226.w1", "model.layers.19.block_sparse_moe.experts.227.w1", "model.layers.19.block_sparse_moe.experts.228.w1", "model.layers.19.block_sparse_moe.experts.229.w1", "model.layers.19.block_sparse_moe.experts.230.w1", "model.layers.19.block_sparse_moe.experts.231.w1", "model.layers.19.block_sparse_moe.experts.232.w1", "model.layers.19.block_sparse_moe.experts.233.w1", "model.layers.19.block_sparse_moe.experts.234.w1", "model.layers.19.block_sparse_moe.experts.235.w1", "model.layers.19.block_sparse_moe.experts.236.w1", "model.layers.19.block_sparse_moe.experts.237.w1", "model.layers.19.block_sparse_moe.experts.238.w1", "model.layers.19.block_sparse_moe.experts.239.w1", "model.layers.19.block_sparse_moe.experts.240.w1", "model.layers.19.block_sparse_moe.experts.241.w1", "model.layers.19.block_sparse_moe.experts.242.w1", "model.layers.19.block_sparse_moe.experts.243.w1", "model.layers.19.block_sparse_moe.experts.244.w1", "model.layers.19.block_sparse_moe.experts.245.w1", "model.layers.19.block_sparse_moe.experts.246.w1", "model.layers.19.block_sparse_moe.experts.247.w1", "model.layers.19.block_sparse_moe.experts.248.w1", "model.layers.19.block_sparse_moe.experts.249.w1", "model.layers.19.block_sparse_moe.experts.250.w1", "model.layers.19.block_sparse_moe.experts.251.w1", "model.layers.19.block_sparse_moe.experts.252.w1", "model.layers.19.block_sparse_moe.experts.253.w1", "model.layers.19.block_sparse_moe.experts.254.w1", "model.layers.19.block_sparse_moe.experts.255.w1", "model.layers.19.block_sparse_moe.experts.0.w3", "model.layers.19.block_sparse_moe.experts.1.w3", "model.layers.19.block_sparse_moe.experts.2.w3", "model.layers.19.block_sparse_moe.experts.3.w3", "model.layers.19.block_sparse_moe.experts.4.w3", "model.layers.19.block_sparse_moe.experts.5.w3", "model.layers.19.block_sparse_moe.experts.6.w3", "model.layers.19.block_sparse_moe.experts.7.w3", "model.layers.19.block_sparse_moe.experts.8.w3", "model.layers.19.block_sparse_moe.experts.9.w3", "model.layers.19.block_sparse_moe.experts.10.w3", "model.layers.19.block_sparse_moe.experts.11.w3", "model.layers.19.block_sparse_moe.experts.12.w3", "model.layers.19.block_sparse_moe.experts.13.w3", "model.layers.19.block_sparse_moe.experts.14.w3", "model.layers.19.block_sparse_moe.experts.15.w3", "model.layers.19.block_sparse_moe.experts.16.w3", "model.layers.19.block_sparse_moe.experts.17.w3", "model.layers.19.block_sparse_moe.experts.18.w3", "model.layers.19.block_sparse_moe.experts.19.w3", "model.layers.19.block_sparse_moe.experts.20.w3", "model.layers.19.block_sparse_moe.experts.21.w3", "model.layers.19.block_sparse_moe.experts.22.w3", "model.layers.19.block_sparse_moe.experts.23.w3", "model.layers.19.block_sparse_moe.experts.24.w3", "model.layers.19.block_sparse_moe.experts.25.w3", "model.layers.19.block_sparse_moe.experts.26.w3", "model.layers.19.block_sparse_moe.experts.27.w3", "model.layers.19.block_sparse_moe.experts.28.w3", "model.layers.19.block_sparse_moe.experts.29.w3", "model.layers.19.block_sparse_moe.experts.30.w3", "model.layers.19.block_sparse_moe.experts.31.w3", "model.layers.19.block_sparse_moe.experts.32.w3", "model.layers.19.block_sparse_moe.experts.33.w3", "model.layers.19.block_sparse_moe.experts.34.w3", "model.layers.19.block_sparse_moe.experts.35.w3", "model.layers.19.block_sparse_moe.experts.36.w3", "model.layers.19.block_sparse_moe.experts.37.w3", "model.layers.19.block_sparse_moe.experts.38.w3", "model.layers.19.block_sparse_moe.experts.39.w3", "model.layers.19.block_sparse_moe.experts.40.w3", "model.layers.19.block_sparse_moe.experts.41.w3", "model.layers.19.block_sparse_moe.experts.42.w3", "model.layers.19.block_sparse_moe.experts.43.w3", "model.layers.19.block_sparse_moe.experts.44.w3", "model.layers.19.block_sparse_moe.experts.45.w3", "model.layers.19.block_sparse_moe.experts.46.w3", "model.layers.19.block_sparse_moe.experts.47.w3", "model.layers.19.block_sparse_moe.experts.48.w3", "model.layers.19.block_sparse_moe.experts.49.w3", "model.layers.19.block_sparse_moe.experts.50.w3", "model.layers.19.block_sparse_moe.experts.51.w3", "model.layers.19.block_sparse_moe.experts.52.w3", "model.layers.19.block_sparse_moe.experts.53.w3", "model.layers.19.block_sparse_moe.experts.54.w3", "model.layers.19.block_sparse_moe.experts.55.w3", "model.layers.19.block_sparse_moe.experts.56.w3", "model.layers.19.block_sparse_moe.experts.57.w3", "model.layers.19.block_sparse_moe.experts.58.w3", "model.layers.19.block_sparse_moe.experts.59.w3", "model.layers.19.block_sparse_moe.experts.60.w3", "model.layers.19.block_sparse_moe.experts.61.w3", "model.layers.19.block_sparse_moe.experts.62.w3", "model.layers.19.block_sparse_moe.experts.63.w3", "model.layers.19.block_sparse_moe.experts.64.w3", "model.layers.19.block_sparse_moe.experts.65.w3", "model.layers.19.block_sparse_moe.experts.66.w3", "model.layers.19.block_sparse_moe.experts.67.w3", "model.layers.19.block_sparse_moe.experts.68.w3", "model.layers.19.block_sparse_moe.experts.69.w3", "model.layers.19.block_sparse_moe.experts.70.w3", "model.layers.19.block_sparse_moe.experts.71.w3", "model.layers.19.block_sparse_moe.experts.72.w3", "model.layers.19.block_sparse_moe.experts.73.w3", "model.layers.19.block_sparse_moe.experts.74.w3", "model.layers.19.block_sparse_moe.experts.75.w3", "model.layers.19.block_sparse_moe.experts.76.w3", "model.layers.19.block_sparse_moe.experts.77.w3", "model.layers.19.block_sparse_moe.experts.78.w3", "model.layers.19.block_sparse_moe.experts.79.w3", "model.layers.19.block_sparse_moe.experts.80.w3", "model.layers.19.block_sparse_moe.experts.81.w3", "model.layers.19.block_sparse_moe.experts.82.w3", "model.layers.19.block_sparse_moe.experts.83.w3", "model.layers.19.block_sparse_moe.experts.84.w3", "model.layers.19.block_sparse_moe.experts.85.w3", "model.layers.19.block_sparse_moe.experts.86.w3", "model.layers.19.block_sparse_moe.experts.87.w3", "model.layers.19.block_sparse_moe.experts.88.w3", "model.layers.19.block_sparse_moe.experts.89.w3", "model.layers.19.block_sparse_moe.experts.90.w3", "model.layers.19.block_sparse_moe.experts.91.w3", "model.layers.19.block_sparse_moe.experts.92.w3", "model.layers.19.block_sparse_moe.experts.93.w3", "model.layers.19.block_sparse_moe.experts.94.w3", "model.layers.19.block_sparse_moe.experts.95.w3", "model.layers.19.block_sparse_moe.experts.96.w3", "model.layers.19.block_sparse_moe.experts.97.w3", "model.layers.19.block_sparse_moe.experts.98.w3", "model.layers.19.block_sparse_moe.experts.99.w3", "model.layers.19.block_sparse_moe.experts.100.w3", "model.layers.19.block_sparse_moe.experts.101.w3", "model.layers.19.block_sparse_moe.experts.102.w3", "model.layers.19.block_sparse_moe.experts.103.w3", "model.layers.19.block_sparse_moe.experts.104.w3", "model.layers.19.block_sparse_moe.experts.105.w3", "model.layers.19.block_sparse_moe.experts.106.w3", "model.layers.19.block_sparse_moe.experts.107.w3", "model.layers.19.block_sparse_moe.experts.108.w3", "model.layers.19.block_sparse_moe.experts.109.w3", "model.layers.19.block_sparse_moe.experts.110.w3", "model.layers.19.block_sparse_moe.experts.111.w3", "model.layers.19.block_sparse_moe.experts.112.w3", "model.layers.19.block_sparse_moe.experts.113.w3", "model.layers.19.block_sparse_moe.experts.114.w3", "model.layers.19.block_sparse_moe.experts.115.w3", "model.layers.19.block_sparse_moe.experts.116.w3", "model.layers.19.block_sparse_moe.experts.117.w3", "model.layers.19.block_sparse_moe.experts.118.w3", "model.layers.19.block_sparse_moe.experts.119.w3", "model.layers.19.block_sparse_moe.experts.120.w3", "model.layers.19.block_sparse_moe.experts.121.w3", "model.layers.19.block_sparse_moe.experts.122.w3", "model.layers.19.block_sparse_moe.experts.123.w3", "model.layers.19.block_sparse_moe.experts.124.w3", "model.layers.19.block_sparse_moe.experts.125.w3", "model.layers.19.block_sparse_moe.experts.126.w3", "model.layers.19.block_sparse_moe.experts.127.w3", "model.layers.19.block_sparse_moe.experts.128.w3", "model.layers.19.block_sparse_moe.experts.129.w3", "model.layers.19.block_sparse_moe.experts.130.w3", "model.layers.19.block_sparse_moe.experts.131.w3", "model.layers.19.block_sparse_moe.experts.132.w3", "model.layers.19.block_sparse_moe.experts.133.w3", "model.layers.19.block_sparse_moe.experts.134.w3", "model.layers.19.block_sparse_moe.experts.135.w3", "model.layers.19.block_sparse_moe.experts.136.w3", "model.layers.19.block_sparse_moe.experts.137.w3", "model.layers.19.block_sparse_moe.experts.138.w3", "model.layers.19.block_sparse_moe.experts.139.w3", "model.layers.19.block_sparse_moe.experts.140.w3", "model.layers.19.block_sparse_moe.experts.141.w3", "model.layers.19.block_sparse_moe.experts.142.w3", "model.layers.19.block_sparse_moe.experts.143.w3", "model.layers.19.block_sparse_moe.experts.144.w3", "model.layers.19.block_sparse_moe.experts.145.w3", "model.layers.19.block_sparse_moe.experts.146.w3", "model.layers.19.block_sparse_moe.experts.147.w3", "model.layers.19.block_sparse_moe.experts.148.w3", "model.layers.19.block_sparse_moe.experts.149.w3", "model.layers.19.block_sparse_moe.experts.150.w3", "model.layers.19.block_sparse_moe.experts.151.w3", "model.layers.19.block_sparse_moe.experts.152.w3", "model.layers.19.block_sparse_moe.experts.153.w3", "model.layers.19.block_sparse_moe.experts.154.w3", "model.layers.19.block_sparse_moe.experts.155.w3", "model.layers.19.block_sparse_moe.experts.156.w3", "model.layers.19.block_sparse_moe.experts.157.w3", "model.layers.19.block_sparse_moe.experts.158.w3", "model.layers.19.block_sparse_moe.experts.159.w3", "model.layers.19.block_sparse_moe.experts.160.w3", "model.layers.19.block_sparse_moe.experts.161.w3", "model.layers.19.block_sparse_moe.experts.162.w3", "model.layers.19.block_sparse_moe.experts.163.w3", "model.layers.19.block_sparse_moe.experts.164.w3", "model.layers.19.block_sparse_moe.experts.165.w3", "model.layers.19.block_sparse_moe.experts.166.w3", "model.layers.19.block_sparse_moe.experts.167.w3", "model.layers.19.block_sparse_moe.experts.168.w3", "model.layers.19.block_sparse_moe.experts.169.w3", "model.layers.19.block_sparse_moe.experts.170.w3", "model.layers.19.block_sparse_moe.experts.171.w3", "model.layers.19.block_sparse_moe.experts.172.w3", "model.layers.19.block_sparse_moe.experts.173.w3", "model.layers.19.block_sparse_moe.experts.174.w3", "model.layers.19.block_sparse_moe.experts.175.w3", "model.layers.19.block_sparse_moe.experts.176.w3", "model.layers.19.block_sparse_moe.experts.177.w3", "model.layers.19.block_sparse_moe.experts.178.w3", "model.layers.19.block_sparse_moe.experts.179.w3", "model.layers.19.block_sparse_moe.experts.180.w3", "model.layers.19.block_sparse_moe.experts.181.w3", "model.layers.19.block_sparse_moe.experts.182.w3", "model.layers.19.block_sparse_moe.experts.183.w3", "model.layers.19.block_sparse_moe.experts.184.w3", "model.layers.19.block_sparse_moe.experts.185.w3", "model.layers.19.block_sparse_moe.experts.186.w3", "model.layers.19.block_sparse_moe.experts.187.w3", "model.layers.19.block_sparse_moe.experts.188.w3", "model.layers.19.block_sparse_moe.experts.189.w3", "model.layers.19.block_sparse_moe.experts.190.w3", "model.layers.19.block_sparse_moe.experts.191.w3", "model.layers.19.block_sparse_moe.experts.192.w3", "model.layers.19.block_sparse_moe.experts.193.w3", "model.layers.19.block_sparse_moe.experts.194.w3", "model.layers.19.block_sparse_moe.experts.195.w3", "model.layers.19.block_sparse_moe.experts.196.w3", "model.layers.19.block_sparse_moe.experts.197.w3", "model.layers.19.block_sparse_moe.experts.198.w3", "model.layers.19.block_sparse_moe.experts.199.w3", "model.layers.19.block_sparse_moe.experts.200.w3", "model.layers.19.block_sparse_moe.experts.201.w3", "model.layers.19.block_sparse_moe.experts.202.w3", "model.layers.19.block_sparse_moe.experts.203.w3", "model.layers.19.block_sparse_moe.experts.204.w3", "model.layers.19.block_sparse_moe.experts.205.w3", "model.layers.19.block_sparse_moe.experts.206.w3", "model.layers.19.block_sparse_moe.experts.207.w3", "model.layers.19.block_sparse_moe.experts.208.w3", "model.layers.19.block_sparse_moe.experts.209.w3", "model.layers.19.block_sparse_moe.experts.210.w3", "model.layers.19.block_sparse_moe.experts.211.w3", "model.layers.19.block_sparse_moe.experts.212.w3", "model.layers.19.block_sparse_moe.experts.213.w3", "model.layers.19.block_sparse_moe.experts.214.w3", "model.layers.19.block_sparse_moe.experts.215.w3", "model.layers.19.block_sparse_moe.experts.216.w3", "model.layers.19.block_sparse_moe.experts.217.w3", "model.layers.19.block_sparse_moe.experts.218.w3", "model.layers.19.block_sparse_moe.experts.219.w3", "model.layers.19.block_sparse_moe.experts.220.w3", "model.layers.19.block_sparse_moe.experts.221.w3", "model.layers.19.block_sparse_moe.experts.222.w3", "model.layers.19.block_sparse_moe.experts.223.w3", "model.layers.19.block_sparse_moe.experts.224.w3", "model.layers.19.block_sparse_moe.experts.225.w3", "model.layers.19.block_sparse_moe.experts.226.w3", "model.layers.19.block_sparse_moe.experts.227.w3", "model.layers.19.block_sparse_moe.experts.228.w3", "model.layers.19.block_sparse_moe.experts.229.w3", "model.layers.19.block_sparse_moe.experts.230.w3", "model.layers.19.block_sparse_moe.experts.231.w3", "model.layers.19.block_sparse_moe.experts.232.w3", "model.layers.19.block_sparse_moe.experts.233.w3", "model.layers.19.block_sparse_moe.experts.234.w3", "model.layers.19.block_sparse_moe.experts.235.w3", "model.layers.19.block_sparse_moe.experts.236.w3", "model.layers.19.block_sparse_moe.experts.237.w3", "model.layers.19.block_sparse_moe.experts.238.w3", "model.layers.19.block_sparse_moe.experts.239.w3", "model.layers.19.block_sparse_moe.experts.240.w3", "model.layers.19.block_sparse_moe.experts.241.w3", "model.layers.19.block_sparse_moe.experts.242.w3", "model.layers.19.block_sparse_moe.experts.243.w3", "model.layers.19.block_sparse_moe.experts.244.w3", "model.layers.19.block_sparse_moe.experts.245.w3", "model.layers.19.block_sparse_moe.experts.246.w3", "model.layers.19.block_sparse_moe.experts.247.w3", "model.layers.19.block_sparse_moe.experts.248.w3", "model.layers.19.block_sparse_moe.experts.249.w3", "model.layers.19.block_sparse_moe.experts.250.w3", "model.layers.19.block_sparse_moe.experts.251.w3", "model.layers.19.block_sparse_moe.experts.252.w3", "model.layers.19.block_sparse_moe.experts.253.w3", "model.layers.19.block_sparse_moe.experts.254.w3", "model.layers.19.block_sparse_moe.experts.255.w3", "model.layers.19.block_sparse_moe.experts.0.w2", "model.layers.19.block_sparse_moe.experts.1.w2", "model.layers.19.block_sparse_moe.experts.2.w2", "model.layers.19.block_sparse_moe.experts.3.w2", "model.layers.19.block_sparse_moe.experts.4.w2", "model.layers.19.block_sparse_moe.experts.5.w2", "model.layers.19.block_sparse_moe.experts.6.w2", "model.layers.19.block_sparse_moe.experts.7.w2", "model.layers.19.block_sparse_moe.experts.8.w2", "model.layers.19.block_sparse_moe.experts.9.w2", "model.layers.19.block_sparse_moe.experts.10.w2", "model.layers.19.block_sparse_moe.experts.11.w2", "model.layers.19.block_sparse_moe.experts.12.w2", "model.layers.19.block_sparse_moe.experts.13.w2", "model.layers.19.block_sparse_moe.experts.14.w2", "model.layers.19.block_sparse_moe.experts.15.w2", "model.layers.19.block_sparse_moe.experts.16.w2", "model.layers.19.block_sparse_moe.experts.17.w2", "model.layers.19.block_sparse_moe.experts.18.w2", "model.layers.19.block_sparse_moe.experts.19.w2", "model.layers.19.block_sparse_moe.experts.20.w2", "model.layers.19.block_sparse_moe.experts.21.w2", "model.layers.19.block_sparse_moe.experts.22.w2", "model.layers.19.block_sparse_moe.experts.23.w2", "model.layers.19.block_sparse_moe.experts.24.w2", "model.layers.19.block_sparse_moe.experts.25.w2", "model.layers.19.block_sparse_moe.experts.26.w2", "model.layers.19.block_sparse_moe.experts.27.w2", "model.layers.19.block_sparse_moe.experts.28.w2", "model.layers.19.block_sparse_moe.experts.29.w2", "model.layers.19.block_sparse_moe.experts.30.w2", "model.layers.19.block_sparse_moe.experts.31.w2", "model.layers.19.block_sparse_moe.experts.32.w2", "model.layers.19.block_sparse_moe.experts.33.w2", "model.layers.19.block_sparse_moe.experts.34.w2", "model.layers.19.block_sparse_moe.experts.35.w2", "model.layers.19.block_sparse_moe.experts.36.w2", "model.layers.19.block_sparse_moe.experts.37.w2", "model.layers.19.block_sparse_moe.experts.38.w2", "model.layers.19.block_sparse_moe.experts.39.w2", "model.layers.19.block_sparse_moe.experts.40.w2", "model.layers.19.block_sparse_moe.experts.41.w2", "model.layers.19.block_sparse_moe.experts.42.w2", "model.layers.19.block_sparse_moe.experts.43.w2", "model.layers.19.block_sparse_moe.experts.44.w2", "model.layers.19.block_sparse_moe.experts.45.w2", "model.layers.19.block_sparse_moe.experts.46.w2", "model.layers.19.block_sparse_moe.experts.47.w2", "model.layers.19.block_sparse_moe.experts.48.w2", "model.layers.19.block_sparse_moe.experts.49.w2", "model.layers.19.block_sparse_moe.experts.50.w2", "model.layers.19.block_sparse_moe.experts.51.w2", "model.layers.19.block_sparse_moe.experts.52.w2", "model.layers.19.block_sparse_moe.experts.53.w2", "model.layers.19.block_sparse_moe.experts.54.w2", "model.layers.19.block_sparse_moe.experts.55.w2", "model.layers.19.block_sparse_moe.experts.56.w2", "model.layers.19.block_sparse_moe.experts.57.w2", "model.layers.19.block_sparse_moe.experts.58.w2", "model.layers.19.block_sparse_moe.experts.59.w2", "model.layers.19.block_sparse_moe.experts.60.w2", "model.layers.19.block_sparse_moe.experts.61.w2", "model.layers.19.block_sparse_moe.experts.62.w2", "model.layers.19.block_sparse_moe.experts.63.w2", "model.layers.19.block_sparse_moe.experts.64.w2", "model.layers.19.block_sparse_moe.experts.65.w2", "model.layers.19.block_sparse_moe.experts.66.w2", "model.layers.19.block_sparse_moe.experts.67.w2", "model.layers.19.block_sparse_moe.experts.68.w2", "model.layers.19.block_sparse_moe.experts.69.w2", "model.layers.19.block_sparse_moe.experts.70.w2", "model.layers.19.block_sparse_moe.experts.71.w2", "model.layers.19.block_sparse_moe.experts.72.w2", "model.layers.19.block_sparse_moe.experts.73.w2", "model.layers.19.block_sparse_moe.experts.74.w2", "model.layers.19.block_sparse_moe.experts.75.w2", "model.layers.19.block_sparse_moe.experts.76.w2", "model.layers.19.block_sparse_moe.experts.77.w2", "model.layers.19.block_sparse_moe.experts.78.w2", "model.layers.19.block_sparse_moe.experts.79.w2", "model.layers.19.block_sparse_moe.experts.80.w2", "model.layers.19.block_sparse_moe.experts.81.w2", "model.layers.19.block_sparse_moe.experts.82.w2", "model.layers.19.block_sparse_moe.experts.83.w2", "model.layers.19.block_sparse_moe.experts.84.w2", "model.layers.19.block_sparse_moe.experts.85.w2", "model.layers.19.block_sparse_moe.experts.86.w2", "model.layers.19.block_sparse_moe.experts.87.w2", "model.layers.19.block_sparse_moe.experts.88.w2", "model.layers.19.block_sparse_moe.experts.89.w2", "model.layers.19.block_sparse_moe.experts.90.w2", "model.layers.19.block_sparse_moe.experts.91.w2", "model.layers.19.block_sparse_moe.experts.92.w2", "model.layers.19.block_sparse_moe.experts.93.w2", "model.layers.19.block_sparse_moe.experts.94.w2", "model.layers.19.block_sparse_moe.experts.95.w2", "model.layers.19.block_sparse_moe.experts.96.w2", "model.layers.19.block_sparse_moe.experts.97.w2", "model.layers.19.block_sparse_moe.experts.98.w2", "model.layers.19.block_sparse_moe.experts.99.w2", "model.layers.19.block_sparse_moe.experts.100.w2", "model.layers.19.block_sparse_moe.experts.101.w2", "model.layers.19.block_sparse_moe.experts.102.w2", "model.layers.19.block_sparse_moe.experts.103.w2", "model.layers.19.block_sparse_moe.experts.104.w2", "model.layers.19.block_sparse_moe.experts.105.w2", "model.layers.19.block_sparse_moe.experts.106.w2", "model.layers.19.block_sparse_moe.experts.107.w2", "model.layers.19.block_sparse_moe.experts.108.w2", "model.layers.19.block_sparse_moe.experts.109.w2", "model.layers.19.block_sparse_moe.experts.110.w2", "model.layers.19.block_sparse_moe.experts.111.w2", "model.layers.19.block_sparse_moe.experts.112.w2", "model.layers.19.block_sparse_moe.experts.113.w2", "model.layers.19.block_sparse_moe.experts.114.w2", "model.layers.19.block_sparse_moe.experts.115.w2", "model.layers.19.block_sparse_moe.experts.116.w2", "model.layers.19.block_sparse_moe.experts.117.w2", "model.layers.19.block_sparse_moe.experts.118.w2", "model.layers.19.block_sparse_moe.experts.119.w2", "model.layers.19.block_sparse_moe.experts.120.w2", "model.layers.19.block_sparse_moe.experts.121.w2", "model.layers.19.block_sparse_moe.experts.122.w2", "model.layers.19.block_sparse_moe.experts.123.w2", "model.layers.19.block_sparse_moe.experts.124.w2", "model.layers.19.block_sparse_moe.experts.125.w2", "model.layers.19.block_sparse_moe.experts.126.w2", "model.layers.19.block_sparse_moe.experts.127.w2", "model.layers.19.block_sparse_moe.experts.128.w2", "model.layers.19.block_sparse_moe.experts.129.w2", "model.layers.19.block_sparse_moe.experts.130.w2", "model.layers.19.block_sparse_moe.experts.131.w2", "model.layers.19.block_sparse_moe.experts.132.w2", "model.layers.19.block_sparse_moe.experts.133.w2", "model.layers.19.block_sparse_moe.experts.134.w2", "model.layers.19.block_sparse_moe.experts.135.w2", "model.layers.19.block_sparse_moe.experts.136.w2", "model.layers.19.block_sparse_moe.experts.137.w2", "model.layers.19.block_sparse_moe.experts.138.w2", "model.layers.19.block_sparse_moe.experts.139.w2", "model.layers.19.block_sparse_moe.experts.140.w2", "model.layers.19.block_sparse_moe.experts.141.w2", "model.layers.19.block_sparse_moe.experts.142.w2", "model.layers.19.block_sparse_moe.experts.143.w2", "model.layers.19.block_sparse_moe.experts.144.w2", "model.layers.19.block_sparse_moe.experts.145.w2", "model.layers.19.block_sparse_moe.experts.146.w2", "model.layers.19.block_sparse_moe.experts.147.w2", "model.layers.19.block_sparse_moe.experts.148.w2", "model.layers.19.block_sparse_moe.experts.149.w2", "model.layers.19.block_sparse_moe.experts.150.w2", "model.layers.19.block_sparse_moe.experts.151.w2", "model.layers.19.block_sparse_moe.experts.152.w2", "model.layers.19.block_sparse_moe.experts.153.w2", "model.layers.19.block_sparse_moe.experts.154.w2", "model.layers.19.block_sparse_moe.experts.155.w2", "model.layers.19.block_sparse_moe.experts.156.w2", "model.layers.19.block_sparse_moe.experts.157.w2", "model.layers.19.block_sparse_moe.experts.158.w2", "model.layers.19.block_sparse_moe.experts.159.w2", "model.layers.19.block_sparse_moe.experts.160.w2", "model.layers.19.block_sparse_moe.experts.161.w2", "model.layers.19.block_sparse_moe.experts.162.w2", "model.layers.19.block_sparse_moe.experts.163.w2", "model.layers.19.block_sparse_moe.experts.164.w2", "model.layers.19.block_sparse_moe.experts.165.w2", "model.layers.19.block_sparse_moe.experts.166.w2", "model.layers.19.block_sparse_moe.experts.167.w2", "model.layers.19.block_sparse_moe.experts.168.w2", "model.layers.19.block_sparse_moe.experts.169.w2", "model.layers.19.block_sparse_moe.experts.170.w2", "model.layers.19.block_sparse_moe.experts.171.w2", "model.layers.19.block_sparse_moe.experts.172.w2", "model.layers.19.block_sparse_moe.experts.173.w2", "model.layers.19.block_sparse_moe.experts.174.w2", "model.layers.19.block_sparse_moe.experts.175.w2", "model.layers.19.block_sparse_moe.experts.176.w2", "model.layers.19.block_sparse_moe.experts.177.w2", "model.layers.19.block_sparse_moe.experts.178.w2", "model.layers.19.block_sparse_moe.experts.179.w2", "model.layers.19.block_sparse_moe.experts.180.w2", "model.layers.19.block_sparse_moe.experts.181.w2", "model.layers.19.block_sparse_moe.experts.182.w2", "model.layers.19.block_sparse_moe.experts.183.w2", "model.layers.19.block_sparse_moe.experts.184.w2", "model.layers.19.block_sparse_moe.experts.185.w2", "model.layers.19.block_sparse_moe.experts.186.w2", "model.layers.19.block_sparse_moe.experts.187.w2", "model.layers.19.block_sparse_moe.experts.188.w2", "model.layers.19.block_sparse_moe.experts.189.w2", "model.layers.19.block_sparse_moe.experts.190.w2", "model.layers.19.block_sparse_moe.experts.191.w2", "model.layers.19.block_sparse_moe.experts.192.w2", "model.layers.19.block_sparse_moe.experts.193.w2", "model.layers.19.block_sparse_moe.experts.194.w2", "model.layers.19.block_sparse_moe.experts.195.w2", "model.layers.19.block_sparse_moe.experts.196.w2", "model.layers.19.block_sparse_moe.experts.197.w2", "model.layers.19.block_sparse_moe.experts.198.w2", "model.layers.19.block_sparse_moe.experts.199.w2", "model.layers.19.block_sparse_moe.experts.200.w2", "model.layers.19.block_sparse_moe.experts.201.w2", "model.layers.19.block_sparse_moe.experts.202.w2", "model.layers.19.block_sparse_moe.experts.203.w2", "model.layers.19.block_sparse_moe.experts.204.w2", "model.layers.19.block_sparse_moe.experts.205.w2", "model.layers.19.block_sparse_moe.experts.206.w2", "model.layers.19.block_sparse_moe.experts.207.w2", "model.layers.19.block_sparse_moe.experts.208.w2", "model.layers.19.block_sparse_moe.experts.209.w2", "model.layers.19.block_sparse_moe.experts.210.w2", "model.layers.19.block_sparse_moe.experts.211.w2", "model.layers.19.block_sparse_moe.experts.212.w2", "model.layers.19.block_sparse_moe.experts.213.w2", "model.layers.19.block_sparse_moe.experts.214.w2", "model.layers.19.block_sparse_moe.experts.215.w2", "model.layers.19.block_sparse_moe.experts.216.w2", "model.layers.19.block_sparse_moe.experts.217.w2", "model.layers.19.block_sparse_moe.experts.218.w2", "model.layers.19.block_sparse_moe.experts.219.w2", "model.layers.19.block_sparse_moe.experts.220.w2", "model.layers.19.block_sparse_moe.experts.221.w2", "model.layers.19.block_sparse_moe.experts.222.w2", "model.layers.19.block_sparse_moe.experts.223.w2", "model.layers.19.block_sparse_moe.experts.224.w2", "model.layers.19.block_sparse_moe.experts.225.w2", "model.layers.19.block_sparse_moe.experts.226.w2", "model.layers.19.block_sparse_moe.experts.227.w2", "model.layers.19.block_sparse_moe.experts.228.w2", "model.layers.19.block_sparse_moe.experts.229.w2", "model.layers.19.block_sparse_moe.experts.230.w2", "model.layers.19.block_sparse_moe.experts.231.w2", "model.layers.19.block_sparse_moe.experts.232.w2", "model.layers.19.block_sparse_moe.experts.233.w2", "model.layers.19.block_sparse_moe.experts.234.w2", "model.layers.19.block_sparse_moe.experts.235.w2", "model.layers.19.block_sparse_moe.experts.236.w2", "model.layers.19.block_sparse_moe.experts.237.w2", "model.layers.19.block_sparse_moe.experts.238.w2", "model.layers.19.block_sparse_moe.experts.239.w2", "model.layers.19.block_sparse_moe.experts.240.w2", "model.layers.19.block_sparse_moe.experts.241.w2", "model.layers.19.block_sparse_moe.experts.242.w2", "model.layers.19.block_sparse_moe.experts.243.w2", "model.layers.19.block_sparse_moe.experts.244.w2", "model.layers.19.block_sparse_moe.experts.245.w2", "model.layers.19.block_sparse_moe.experts.246.w2", "model.layers.19.block_sparse_moe.experts.247.w2", "model.layers.19.block_sparse_moe.experts.248.w2", "model.layers.19.block_sparse_moe.experts.249.w2", "model.layers.19.block_sparse_moe.experts.250.w2", "model.layers.19.block_sparse_moe.experts.251.w2", "model.layers.19.block_sparse_moe.experts.252.w2", "model.layers.19.block_sparse_moe.experts.253.w2", "model.layers.19.block_sparse_moe.experts.254.w2", "model.layers.19.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -9.830743074418224e-05, "dbits": 3623878656 } ] }, { "idx": 40, "layers": [ "model.layers.20.self_attn.q_proj", "model.layers.20.self_attn.k_proj", "model.layers.20.self_attn.v_proj", "model.layers.20.self_attn.o_proj" ], "candidates": [ { "dkld": -0.010205120965838421, "dbits": 44040192 } ] }, { "idx": 41, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w1", "model.layers.20.block_sparse_moe.experts.1.w1", "model.layers.20.block_sparse_moe.experts.2.w1", "model.layers.20.block_sparse_moe.experts.3.w1", "model.layers.20.block_sparse_moe.experts.4.w1", "model.layers.20.block_sparse_moe.experts.5.w1", "model.layers.20.block_sparse_moe.experts.6.w1", "model.layers.20.block_sparse_moe.experts.7.w1", "model.layers.20.block_sparse_moe.experts.8.w1", "model.layers.20.block_sparse_moe.experts.9.w1", "model.layers.20.block_sparse_moe.experts.10.w1", "model.layers.20.block_sparse_moe.experts.11.w1", "model.layers.20.block_sparse_moe.experts.12.w1", "model.layers.20.block_sparse_moe.experts.13.w1", "model.layers.20.block_sparse_moe.experts.14.w1", "model.layers.20.block_sparse_moe.experts.15.w1", "model.layers.20.block_sparse_moe.experts.16.w1", "model.layers.20.block_sparse_moe.experts.17.w1", "model.layers.20.block_sparse_moe.experts.18.w1", "model.layers.20.block_sparse_moe.experts.19.w1", "model.layers.20.block_sparse_moe.experts.20.w1", "model.layers.20.block_sparse_moe.experts.21.w1", "model.layers.20.block_sparse_moe.experts.22.w1", "model.layers.20.block_sparse_moe.experts.23.w1", "model.layers.20.block_sparse_moe.experts.24.w1", "model.layers.20.block_sparse_moe.experts.25.w1", "model.layers.20.block_sparse_moe.experts.26.w1", "model.layers.20.block_sparse_moe.experts.27.w1", "model.layers.20.block_sparse_moe.experts.28.w1", "model.layers.20.block_sparse_moe.experts.29.w1", "model.layers.20.block_sparse_moe.experts.30.w1", "model.layers.20.block_sparse_moe.experts.31.w1", "model.layers.20.block_sparse_moe.experts.32.w1", "model.layers.20.block_sparse_moe.experts.33.w1", "model.layers.20.block_sparse_moe.experts.34.w1", "model.layers.20.block_sparse_moe.experts.35.w1", "model.layers.20.block_sparse_moe.experts.36.w1", "model.layers.20.block_sparse_moe.experts.37.w1", "model.layers.20.block_sparse_moe.experts.38.w1", "model.layers.20.block_sparse_moe.experts.39.w1", "model.layers.20.block_sparse_moe.experts.40.w1", "model.layers.20.block_sparse_moe.experts.41.w1", "model.layers.20.block_sparse_moe.experts.42.w1", "model.layers.20.block_sparse_moe.experts.43.w1", "model.layers.20.block_sparse_moe.experts.44.w1", "model.layers.20.block_sparse_moe.experts.45.w1", "model.layers.20.block_sparse_moe.experts.46.w1", "model.layers.20.block_sparse_moe.experts.47.w1", "model.layers.20.block_sparse_moe.experts.48.w1", "model.layers.20.block_sparse_moe.experts.49.w1", "model.layers.20.block_sparse_moe.experts.50.w1", "model.layers.20.block_sparse_moe.experts.51.w1", "model.layers.20.block_sparse_moe.experts.52.w1", "model.layers.20.block_sparse_moe.experts.53.w1", "model.layers.20.block_sparse_moe.experts.54.w1", "model.layers.20.block_sparse_moe.experts.55.w1", "model.layers.20.block_sparse_moe.experts.56.w1", "model.layers.20.block_sparse_moe.experts.57.w1", "model.layers.20.block_sparse_moe.experts.58.w1", "model.layers.20.block_sparse_moe.experts.59.w1", "model.layers.20.block_sparse_moe.experts.60.w1", "model.layers.20.block_sparse_moe.experts.61.w1", "model.layers.20.block_sparse_moe.experts.62.w1", "model.layers.20.block_sparse_moe.experts.63.w1", "model.layers.20.block_sparse_moe.experts.64.w1", "model.layers.20.block_sparse_moe.experts.65.w1", "model.layers.20.block_sparse_moe.experts.66.w1", "model.layers.20.block_sparse_moe.experts.67.w1", "model.layers.20.block_sparse_moe.experts.68.w1", "model.layers.20.block_sparse_moe.experts.69.w1", "model.layers.20.block_sparse_moe.experts.70.w1", "model.layers.20.block_sparse_moe.experts.71.w1", "model.layers.20.block_sparse_moe.experts.72.w1", "model.layers.20.block_sparse_moe.experts.73.w1", "model.layers.20.block_sparse_moe.experts.74.w1", "model.layers.20.block_sparse_moe.experts.75.w1", "model.layers.20.block_sparse_moe.experts.76.w1", "model.layers.20.block_sparse_moe.experts.77.w1", "model.layers.20.block_sparse_moe.experts.78.w1", "model.layers.20.block_sparse_moe.experts.79.w1", "model.layers.20.block_sparse_moe.experts.80.w1", "model.layers.20.block_sparse_moe.experts.81.w1", "model.layers.20.block_sparse_moe.experts.82.w1", "model.layers.20.block_sparse_moe.experts.83.w1", "model.layers.20.block_sparse_moe.experts.84.w1", "model.layers.20.block_sparse_moe.experts.85.w1", "model.layers.20.block_sparse_moe.experts.86.w1", "model.layers.20.block_sparse_moe.experts.87.w1", "model.layers.20.block_sparse_moe.experts.88.w1", "model.layers.20.block_sparse_moe.experts.89.w1", "model.layers.20.block_sparse_moe.experts.90.w1", "model.layers.20.block_sparse_moe.experts.91.w1", "model.layers.20.block_sparse_moe.experts.92.w1", "model.layers.20.block_sparse_moe.experts.93.w1", "model.layers.20.block_sparse_moe.experts.94.w1", "model.layers.20.block_sparse_moe.experts.95.w1", "model.layers.20.block_sparse_moe.experts.96.w1", "model.layers.20.block_sparse_moe.experts.97.w1", "model.layers.20.block_sparse_moe.experts.98.w1", "model.layers.20.block_sparse_moe.experts.99.w1", "model.layers.20.block_sparse_moe.experts.100.w1", "model.layers.20.block_sparse_moe.experts.101.w1", "model.layers.20.block_sparse_moe.experts.102.w1", "model.layers.20.block_sparse_moe.experts.103.w1", "model.layers.20.block_sparse_moe.experts.104.w1", "model.layers.20.block_sparse_moe.experts.105.w1", "model.layers.20.block_sparse_moe.experts.106.w1", "model.layers.20.block_sparse_moe.experts.107.w1", "model.layers.20.block_sparse_moe.experts.108.w1", "model.layers.20.block_sparse_moe.experts.109.w1", "model.layers.20.block_sparse_moe.experts.110.w1", "model.layers.20.block_sparse_moe.experts.111.w1", "model.layers.20.block_sparse_moe.experts.112.w1", "model.layers.20.block_sparse_moe.experts.113.w1", "model.layers.20.block_sparse_moe.experts.114.w1", "model.layers.20.block_sparse_moe.experts.115.w1", "model.layers.20.block_sparse_moe.experts.116.w1", "model.layers.20.block_sparse_moe.experts.117.w1", "model.layers.20.block_sparse_moe.experts.118.w1", "model.layers.20.block_sparse_moe.experts.119.w1", "model.layers.20.block_sparse_moe.experts.120.w1", "model.layers.20.block_sparse_moe.experts.121.w1", "model.layers.20.block_sparse_moe.experts.122.w1", "model.layers.20.block_sparse_moe.experts.123.w1", "model.layers.20.block_sparse_moe.experts.124.w1", "model.layers.20.block_sparse_moe.experts.125.w1", "model.layers.20.block_sparse_moe.experts.126.w1", "model.layers.20.block_sparse_moe.experts.127.w1", "model.layers.20.block_sparse_moe.experts.128.w1", "model.layers.20.block_sparse_moe.experts.129.w1", "model.layers.20.block_sparse_moe.experts.130.w1", "model.layers.20.block_sparse_moe.experts.131.w1", "model.layers.20.block_sparse_moe.experts.132.w1", "model.layers.20.block_sparse_moe.experts.133.w1", "model.layers.20.block_sparse_moe.experts.134.w1", "model.layers.20.block_sparse_moe.experts.135.w1", "model.layers.20.block_sparse_moe.experts.136.w1", "model.layers.20.block_sparse_moe.experts.137.w1", "model.layers.20.block_sparse_moe.experts.138.w1", "model.layers.20.block_sparse_moe.experts.139.w1", "model.layers.20.block_sparse_moe.experts.140.w1", "model.layers.20.block_sparse_moe.experts.141.w1", "model.layers.20.block_sparse_moe.experts.142.w1", "model.layers.20.block_sparse_moe.experts.143.w1", "model.layers.20.block_sparse_moe.experts.144.w1", "model.layers.20.block_sparse_moe.experts.145.w1", "model.layers.20.block_sparse_moe.experts.146.w1", "model.layers.20.block_sparse_moe.experts.147.w1", "model.layers.20.block_sparse_moe.experts.148.w1", "model.layers.20.block_sparse_moe.experts.149.w1", "model.layers.20.block_sparse_moe.experts.150.w1", "model.layers.20.block_sparse_moe.experts.151.w1", "model.layers.20.block_sparse_moe.experts.152.w1", "model.layers.20.block_sparse_moe.experts.153.w1", "model.layers.20.block_sparse_moe.experts.154.w1", "model.layers.20.block_sparse_moe.experts.155.w1", "model.layers.20.block_sparse_moe.experts.156.w1", "model.layers.20.block_sparse_moe.experts.157.w1", "model.layers.20.block_sparse_moe.experts.158.w1", "model.layers.20.block_sparse_moe.experts.159.w1", "model.layers.20.block_sparse_moe.experts.160.w1", "model.layers.20.block_sparse_moe.experts.161.w1", "model.layers.20.block_sparse_moe.experts.162.w1", "model.layers.20.block_sparse_moe.experts.163.w1", "model.layers.20.block_sparse_moe.experts.164.w1", "model.layers.20.block_sparse_moe.experts.165.w1", "model.layers.20.block_sparse_moe.experts.166.w1", "model.layers.20.block_sparse_moe.experts.167.w1", "model.layers.20.block_sparse_moe.experts.168.w1", "model.layers.20.block_sparse_moe.experts.169.w1", "model.layers.20.block_sparse_moe.experts.170.w1", "model.layers.20.block_sparse_moe.experts.171.w1", "model.layers.20.block_sparse_moe.experts.172.w1", "model.layers.20.block_sparse_moe.experts.173.w1", "model.layers.20.block_sparse_moe.experts.174.w1", "model.layers.20.block_sparse_moe.experts.175.w1", "model.layers.20.block_sparse_moe.experts.176.w1", "model.layers.20.block_sparse_moe.experts.177.w1", "model.layers.20.block_sparse_moe.experts.178.w1", "model.layers.20.block_sparse_moe.experts.179.w1", "model.layers.20.block_sparse_moe.experts.180.w1", "model.layers.20.block_sparse_moe.experts.181.w1", "model.layers.20.block_sparse_moe.experts.182.w1", "model.layers.20.block_sparse_moe.experts.183.w1", "model.layers.20.block_sparse_moe.experts.184.w1", "model.layers.20.block_sparse_moe.experts.185.w1", "model.layers.20.block_sparse_moe.experts.186.w1", "model.layers.20.block_sparse_moe.experts.187.w1", "model.layers.20.block_sparse_moe.experts.188.w1", "model.layers.20.block_sparse_moe.experts.189.w1", "model.layers.20.block_sparse_moe.experts.190.w1", "model.layers.20.block_sparse_moe.experts.191.w1", "model.layers.20.block_sparse_moe.experts.192.w1", "model.layers.20.block_sparse_moe.experts.193.w1", "model.layers.20.block_sparse_moe.experts.194.w1", "model.layers.20.block_sparse_moe.experts.195.w1", "model.layers.20.block_sparse_moe.experts.196.w1", "model.layers.20.block_sparse_moe.experts.197.w1", "model.layers.20.block_sparse_moe.experts.198.w1", "model.layers.20.block_sparse_moe.experts.199.w1", "model.layers.20.block_sparse_moe.experts.200.w1", "model.layers.20.block_sparse_moe.experts.201.w1", "model.layers.20.block_sparse_moe.experts.202.w1", "model.layers.20.block_sparse_moe.experts.203.w1", "model.layers.20.block_sparse_moe.experts.204.w1", "model.layers.20.block_sparse_moe.experts.205.w1", "model.layers.20.block_sparse_moe.experts.206.w1", "model.layers.20.block_sparse_moe.experts.207.w1", "model.layers.20.block_sparse_moe.experts.208.w1", "model.layers.20.block_sparse_moe.experts.209.w1", "model.layers.20.block_sparse_moe.experts.210.w1", "model.layers.20.block_sparse_moe.experts.211.w1", "model.layers.20.block_sparse_moe.experts.212.w1", "model.layers.20.block_sparse_moe.experts.213.w1", "model.layers.20.block_sparse_moe.experts.214.w1", "model.layers.20.block_sparse_moe.experts.215.w1", "model.layers.20.block_sparse_moe.experts.216.w1", "model.layers.20.block_sparse_moe.experts.217.w1", "model.layers.20.block_sparse_moe.experts.218.w1", "model.layers.20.block_sparse_moe.experts.219.w1", "model.layers.20.block_sparse_moe.experts.220.w1", "model.layers.20.block_sparse_moe.experts.221.w1", "model.layers.20.block_sparse_moe.experts.222.w1", "model.layers.20.block_sparse_moe.experts.223.w1", "model.layers.20.block_sparse_moe.experts.224.w1", "model.layers.20.block_sparse_moe.experts.225.w1", "model.layers.20.block_sparse_moe.experts.226.w1", "model.layers.20.block_sparse_moe.experts.227.w1", "model.layers.20.block_sparse_moe.experts.228.w1", "model.layers.20.block_sparse_moe.experts.229.w1", "model.layers.20.block_sparse_moe.experts.230.w1", "model.layers.20.block_sparse_moe.experts.231.w1", "model.layers.20.block_sparse_moe.experts.232.w1", "model.layers.20.block_sparse_moe.experts.233.w1", "model.layers.20.block_sparse_moe.experts.234.w1", "model.layers.20.block_sparse_moe.experts.235.w1", "model.layers.20.block_sparse_moe.experts.236.w1", "model.layers.20.block_sparse_moe.experts.237.w1", "model.layers.20.block_sparse_moe.experts.238.w1", "model.layers.20.block_sparse_moe.experts.239.w1", "model.layers.20.block_sparse_moe.experts.240.w1", "model.layers.20.block_sparse_moe.experts.241.w1", "model.layers.20.block_sparse_moe.experts.242.w1", "model.layers.20.block_sparse_moe.experts.243.w1", "model.layers.20.block_sparse_moe.experts.244.w1", "model.layers.20.block_sparse_moe.experts.245.w1", "model.layers.20.block_sparse_moe.experts.246.w1", "model.layers.20.block_sparse_moe.experts.247.w1", "model.layers.20.block_sparse_moe.experts.248.w1", "model.layers.20.block_sparse_moe.experts.249.w1", "model.layers.20.block_sparse_moe.experts.250.w1", "model.layers.20.block_sparse_moe.experts.251.w1", "model.layers.20.block_sparse_moe.experts.252.w1", "model.layers.20.block_sparse_moe.experts.253.w1", "model.layers.20.block_sparse_moe.experts.254.w1", "model.layers.20.block_sparse_moe.experts.255.w1", "model.layers.20.block_sparse_moe.experts.0.w3", "model.layers.20.block_sparse_moe.experts.1.w3", "model.layers.20.block_sparse_moe.experts.2.w3", "model.layers.20.block_sparse_moe.experts.3.w3", "model.layers.20.block_sparse_moe.experts.4.w3", "model.layers.20.block_sparse_moe.experts.5.w3", "model.layers.20.block_sparse_moe.experts.6.w3", "model.layers.20.block_sparse_moe.experts.7.w3", "model.layers.20.block_sparse_moe.experts.8.w3", "model.layers.20.block_sparse_moe.experts.9.w3", "model.layers.20.block_sparse_moe.experts.10.w3", "model.layers.20.block_sparse_moe.experts.11.w3", "model.layers.20.block_sparse_moe.experts.12.w3", "model.layers.20.block_sparse_moe.experts.13.w3", "model.layers.20.block_sparse_moe.experts.14.w3", "model.layers.20.block_sparse_moe.experts.15.w3", "model.layers.20.block_sparse_moe.experts.16.w3", "model.layers.20.block_sparse_moe.experts.17.w3", "model.layers.20.block_sparse_moe.experts.18.w3", "model.layers.20.block_sparse_moe.experts.19.w3", "model.layers.20.block_sparse_moe.experts.20.w3", "model.layers.20.block_sparse_moe.experts.21.w3", "model.layers.20.block_sparse_moe.experts.22.w3", "model.layers.20.block_sparse_moe.experts.23.w3", "model.layers.20.block_sparse_moe.experts.24.w3", "model.layers.20.block_sparse_moe.experts.25.w3", "model.layers.20.block_sparse_moe.experts.26.w3", "model.layers.20.block_sparse_moe.experts.27.w3", "model.layers.20.block_sparse_moe.experts.28.w3", "model.layers.20.block_sparse_moe.experts.29.w3", "model.layers.20.block_sparse_moe.experts.30.w3", "model.layers.20.block_sparse_moe.experts.31.w3", "model.layers.20.block_sparse_moe.experts.32.w3", "model.layers.20.block_sparse_moe.experts.33.w3", "model.layers.20.block_sparse_moe.experts.34.w3", "model.layers.20.block_sparse_moe.experts.35.w3", "model.layers.20.block_sparse_moe.experts.36.w3", "model.layers.20.block_sparse_moe.experts.37.w3", "model.layers.20.block_sparse_moe.experts.38.w3", "model.layers.20.block_sparse_moe.experts.39.w3", "model.layers.20.block_sparse_moe.experts.40.w3", "model.layers.20.block_sparse_moe.experts.41.w3", "model.layers.20.block_sparse_moe.experts.42.w3", "model.layers.20.block_sparse_moe.experts.43.w3", "model.layers.20.block_sparse_moe.experts.44.w3", "model.layers.20.block_sparse_moe.experts.45.w3", "model.layers.20.block_sparse_moe.experts.46.w3", "model.layers.20.block_sparse_moe.experts.47.w3", "model.layers.20.block_sparse_moe.experts.48.w3", "model.layers.20.block_sparse_moe.experts.49.w3", "model.layers.20.block_sparse_moe.experts.50.w3", "model.layers.20.block_sparse_moe.experts.51.w3", "model.layers.20.block_sparse_moe.experts.52.w3", "model.layers.20.block_sparse_moe.experts.53.w3", "model.layers.20.block_sparse_moe.experts.54.w3", "model.layers.20.block_sparse_moe.experts.55.w3", "model.layers.20.block_sparse_moe.experts.56.w3", "model.layers.20.block_sparse_moe.experts.57.w3", "model.layers.20.block_sparse_moe.experts.58.w3", "model.layers.20.block_sparse_moe.experts.59.w3", "model.layers.20.block_sparse_moe.experts.60.w3", "model.layers.20.block_sparse_moe.experts.61.w3", "model.layers.20.block_sparse_moe.experts.62.w3", "model.layers.20.block_sparse_moe.experts.63.w3", "model.layers.20.block_sparse_moe.experts.64.w3", "model.layers.20.block_sparse_moe.experts.65.w3", "model.layers.20.block_sparse_moe.experts.66.w3", "model.layers.20.block_sparse_moe.experts.67.w3", "model.layers.20.block_sparse_moe.experts.68.w3", "model.layers.20.block_sparse_moe.experts.69.w3", "model.layers.20.block_sparse_moe.experts.70.w3", "model.layers.20.block_sparse_moe.experts.71.w3", "model.layers.20.block_sparse_moe.experts.72.w3", "model.layers.20.block_sparse_moe.experts.73.w3", "model.layers.20.block_sparse_moe.experts.74.w3", "model.layers.20.block_sparse_moe.experts.75.w3", "model.layers.20.block_sparse_moe.experts.76.w3", "model.layers.20.block_sparse_moe.experts.77.w3", "model.layers.20.block_sparse_moe.experts.78.w3", "model.layers.20.block_sparse_moe.experts.79.w3", "model.layers.20.block_sparse_moe.experts.80.w3", "model.layers.20.block_sparse_moe.experts.81.w3", "model.layers.20.block_sparse_moe.experts.82.w3", "model.layers.20.block_sparse_moe.experts.83.w3", "model.layers.20.block_sparse_moe.experts.84.w3", "model.layers.20.block_sparse_moe.experts.85.w3", "model.layers.20.block_sparse_moe.experts.86.w3", "model.layers.20.block_sparse_moe.experts.87.w3", "model.layers.20.block_sparse_moe.experts.88.w3", "model.layers.20.block_sparse_moe.experts.89.w3", "model.layers.20.block_sparse_moe.experts.90.w3", "model.layers.20.block_sparse_moe.experts.91.w3", "model.layers.20.block_sparse_moe.experts.92.w3", "model.layers.20.block_sparse_moe.experts.93.w3", "model.layers.20.block_sparse_moe.experts.94.w3", "model.layers.20.block_sparse_moe.experts.95.w3", "model.layers.20.block_sparse_moe.experts.96.w3", "model.layers.20.block_sparse_moe.experts.97.w3", "model.layers.20.block_sparse_moe.experts.98.w3", "model.layers.20.block_sparse_moe.experts.99.w3", "model.layers.20.block_sparse_moe.experts.100.w3", "model.layers.20.block_sparse_moe.experts.101.w3", "model.layers.20.block_sparse_moe.experts.102.w3", "model.layers.20.block_sparse_moe.experts.103.w3", "model.layers.20.block_sparse_moe.experts.104.w3", "model.layers.20.block_sparse_moe.experts.105.w3", "model.layers.20.block_sparse_moe.experts.106.w3", "model.layers.20.block_sparse_moe.experts.107.w3", "model.layers.20.block_sparse_moe.experts.108.w3", "model.layers.20.block_sparse_moe.experts.109.w3", "model.layers.20.block_sparse_moe.experts.110.w3", "model.layers.20.block_sparse_moe.experts.111.w3", "model.layers.20.block_sparse_moe.experts.112.w3", "model.layers.20.block_sparse_moe.experts.113.w3", "model.layers.20.block_sparse_moe.experts.114.w3", "model.layers.20.block_sparse_moe.experts.115.w3", "model.layers.20.block_sparse_moe.experts.116.w3", "model.layers.20.block_sparse_moe.experts.117.w3", "model.layers.20.block_sparse_moe.experts.118.w3", "model.layers.20.block_sparse_moe.experts.119.w3", "model.layers.20.block_sparse_moe.experts.120.w3", "model.layers.20.block_sparse_moe.experts.121.w3", "model.layers.20.block_sparse_moe.experts.122.w3", "model.layers.20.block_sparse_moe.experts.123.w3", "model.layers.20.block_sparse_moe.experts.124.w3", "model.layers.20.block_sparse_moe.experts.125.w3", "model.layers.20.block_sparse_moe.experts.126.w3", "model.layers.20.block_sparse_moe.experts.127.w3", "model.layers.20.block_sparse_moe.experts.128.w3", "model.layers.20.block_sparse_moe.experts.129.w3", "model.layers.20.block_sparse_moe.experts.130.w3", "model.layers.20.block_sparse_moe.experts.131.w3", "model.layers.20.block_sparse_moe.experts.132.w3", "model.layers.20.block_sparse_moe.experts.133.w3", "model.layers.20.block_sparse_moe.experts.134.w3", "model.layers.20.block_sparse_moe.experts.135.w3", "model.layers.20.block_sparse_moe.experts.136.w3", "model.layers.20.block_sparse_moe.experts.137.w3", "model.layers.20.block_sparse_moe.experts.138.w3", "model.layers.20.block_sparse_moe.experts.139.w3", "model.layers.20.block_sparse_moe.experts.140.w3", "model.layers.20.block_sparse_moe.experts.141.w3", "model.layers.20.block_sparse_moe.experts.142.w3", "model.layers.20.block_sparse_moe.experts.143.w3", "model.layers.20.block_sparse_moe.experts.144.w3", "model.layers.20.block_sparse_moe.experts.145.w3", "model.layers.20.block_sparse_moe.experts.146.w3", "model.layers.20.block_sparse_moe.experts.147.w3", "model.layers.20.block_sparse_moe.experts.148.w3", "model.layers.20.block_sparse_moe.experts.149.w3", "model.layers.20.block_sparse_moe.experts.150.w3", "model.layers.20.block_sparse_moe.experts.151.w3", "model.layers.20.block_sparse_moe.experts.152.w3", "model.layers.20.block_sparse_moe.experts.153.w3", "model.layers.20.block_sparse_moe.experts.154.w3", "model.layers.20.block_sparse_moe.experts.155.w3", "model.layers.20.block_sparse_moe.experts.156.w3", "model.layers.20.block_sparse_moe.experts.157.w3", "model.layers.20.block_sparse_moe.experts.158.w3", "model.layers.20.block_sparse_moe.experts.159.w3", "model.layers.20.block_sparse_moe.experts.160.w3", "model.layers.20.block_sparse_moe.experts.161.w3", "model.layers.20.block_sparse_moe.experts.162.w3", "model.layers.20.block_sparse_moe.experts.163.w3", "model.layers.20.block_sparse_moe.experts.164.w3", "model.layers.20.block_sparse_moe.experts.165.w3", "model.layers.20.block_sparse_moe.experts.166.w3", "model.layers.20.block_sparse_moe.experts.167.w3", "model.layers.20.block_sparse_moe.experts.168.w3", "model.layers.20.block_sparse_moe.experts.169.w3", "model.layers.20.block_sparse_moe.experts.170.w3", "model.layers.20.block_sparse_moe.experts.171.w3", "model.layers.20.block_sparse_moe.experts.172.w3", "model.layers.20.block_sparse_moe.experts.173.w3", "model.layers.20.block_sparse_moe.experts.174.w3", "model.layers.20.block_sparse_moe.experts.175.w3", "model.layers.20.block_sparse_moe.experts.176.w3", "model.layers.20.block_sparse_moe.experts.177.w3", "model.layers.20.block_sparse_moe.experts.178.w3", "model.layers.20.block_sparse_moe.experts.179.w3", "model.layers.20.block_sparse_moe.experts.180.w3", "model.layers.20.block_sparse_moe.experts.181.w3", "model.layers.20.block_sparse_moe.experts.182.w3", "model.layers.20.block_sparse_moe.experts.183.w3", "model.layers.20.block_sparse_moe.experts.184.w3", "model.layers.20.block_sparse_moe.experts.185.w3", "model.layers.20.block_sparse_moe.experts.186.w3", "model.layers.20.block_sparse_moe.experts.187.w3", "model.layers.20.block_sparse_moe.experts.188.w3", "model.layers.20.block_sparse_moe.experts.189.w3", "model.layers.20.block_sparse_moe.experts.190.w3", "model.layers.20.block_sparse_moe.experts.191.w3", "model.layers.20.block_sparse_moe.experts.192.w3", "model.layers.20.block_sparse_moe.experts.193.w3", "model.layers.20.block_sparse_moe.experts.194.w3", "model.layers.20.block_sparse_moe.experts.195.w3", "model.layers.20.block_sparse_moe.experts.196.w3", "model.layers.20.block_sparse_moe.experts.197.w3", "model.layers.20.block_sparse_moe.experts.198.w3", "model.layers.20.block_sparse_moe.experts.199.w3", "model.layers.20.block_sparse_moe.experts.200.w3", "model.layers.20.block_sparse_moe.experts.201.w3", "model.layers.20.block_sparse_moe.experts.202.w3", "model.layers.20.block_sparse_moe.experts.203.w3", "model.layers.20.block_sparse_moe.experts.204.w3", "model.layers.20.block_sparse_moe.experts.205.w3", "model.layers.20.block_sparse_moe.experts.206.w3", "model.layers.20.block_sparse_moe.experts.207.w3", "model.layers.20.block_sparse_moe.experts.208.w3", "model.layers.20.block_sparse_moe.experts.209.w3", "model.layers.20.block_sparse_moe.experts.210.w3", "model.layers.20.block_sparse_moe.experts.211.w3", "model.layers.20.block_sparse_moe.experts.212.w3", "model.layers.20.block_sparse_moe.experts.213.w3", "model.layers.20.block_sparse_moe.experts.214.w3", "model.layers.20.block_sparse_moe.experts.215.w3", "model.layers.20.block_sparse_moe.experts.216.w3", "model.layers.20.block_sparse_moe.experts.217.w3", "model.layers.20.block_sparse_moe.experts.218.w3", "model.layers.20.block_sparse_moe.experts.219.w3", "model.layers.20.block_sparse_moe.experts.220.w3", "model.layers.20.block_sparse_moe.experts.221.w3", "model.layers.20.block_sparse_moe.experts.222.w3", "model.layers.20.block_sparse_moe.experts.223.w3", "model.layers.20.block_sparse_moe.experts.224.w3", "model.layers.20.block_sparse_moe.experts.225.w3", "model.layers.20.block_sparse_moe.experts.226.w3", "model.layers.20.block_sparse_moe.experts.227.w3", "model.layers.20.block_sparse_moe.experts.228.w3", "model.layers.20.block_sparse_moe.experts.229.w3", "model.layers.20.block_sparse_moe.experts.230.w3", "model.layers.20.block_sparse_moe.experts.231.w3", "model.layers.20.block_sparse_moe.experts.232.w3", "model.layers.20.block_sparse_moe.experts.233.w3", "model.layers.20.block_sparse_moe.experts.234.w3", "model.layers.20.block_sparse_moe.experts.235.w3", "model.layers.20.block_sparse_moe.experts.236.w3", "model.layers.20.block_sparse_moe.experts.237.w3", "model.layers.20.block_sparse_moe.experts.238.w3", "model.layers.20.block_sparse_moe.experts.239.w3", "model.layers.20.block_sparse_moe.experts.240.w3", "model.layers.20.block_sparse_moe.experts.241.w3", "model.layers.20.block_sparse_moe.experts.242.w3", "model.layers.20.block_sparse_moe.experts.243.w3", "model.layers.20.block_sparse_moe.experts.244.w3", "model.layers.20.block_sparse_moe.experts.245.w3", "model.layers.20.block_sparse_moe.experts.246.w3", "model.layers.20.block_sparse_moe.experts.247.w3", "model.layers.20.block_sparse_moe.experts.248.w3", "model.layers.20.block_sparse_moe.experts.249.w3", "model.layers.20.block_sparse_moe.experts.250.w3", "model.layers.20.block_sparse_moe.experts.251.w3", "model.layers.20.block_sparse_moe.experts.252.w3", "model.layers.20.block_sparse_moe.experts.253.w3", "model.layers.20.block_sparse_moe.experts.254.w3", "model.layers.20.block_sparse_moe.experts.255.w3", "model.layers.20.block_sparse_moe.experts.0.w2", "model.layers.20.block_sparse_moe.experts.1.w2", "model.layers.20.block_sparse_moe.experts.2.w2", "model.layers.20.block_sparse_moe.experts.3.w2", "model.layers.20.block_sparse_moe.experts.4.w2", "model.layers.20.block_sparse_moe.experts.5.w2", "model.layers.20.block_sparse_moe.experts.6.w2", "model.layers.20.block_sparse_moe.experts.7.w2", "model.layers.20.block_sparse_moe.experts.8.w2", "model.layers.20.block_sparse_moe.experts.9.w2", "model.layers.20.block_sparse_moe.experts.10.w2", "model.layers.20.block_sparse_moe.experts.11.w2", "model.layers.20.block_sparse_moe.experts.12.w2", "model.layers.20.block_sparse_moe.experts.13.w2", "model.layers.20.block_sparse_moe.experts.14.w2", "model.layers.20.block_sparse_moe.experts.15.w2", "model.layers.20.block_sparse_moe.experts.16.w2", "model.layers.20.block_sparse_moe.experts.17.w2", "model.layers.20.block_sparse_moe.experts.18.w2", "model.layers.20.block_sparse_moe.experts.19.w2", "model.layers.20.block_sparse_moe.experts.20.w2", "model.layers.20.block_sparse_moe.experts.21.w2", "model.layers.20.block_sparse_moe.experts.22.w2", "model.layers.20.block_sparse_moe.experts.23.w2", "model.layers.20.block_sparse_moe.experts.24.w2", "model.layers.20.block_sparse_moe.experts.25.w2", "model.layers.20.block_sparse_moe.experts.26.w2", "model.layers.20.block_sparse_moe.experts.27.w2", "model.layers.20.block_sparse_moe.experts.28.w2", "model.layers.20.block_sparse_moe.experts.29.w2", "model.layers.20.block_sparse_moe.experts.30.w2", "model.layers.20.block_sparse_moe.experts.31.w2", "model.layers.20.block_sparse_moe.experts.32.w2", "model.layers.20.block_sparse_moe.experts.33.w2", "model.layers.20.block_sparse_moe.experts.34.w2", "model.layers.20.block_sparse_moe.experts.35.w2", "model.layers.20.block_sparse_moe.experts.36.w2", "model.layers.20.block_sparse_moe.experts.37.w2", "model.layers.20.block_sparse_moe.experts.38.w2", "model.layers.20.block_sparse_moe.experts.39.w2", "model.layers.20.block_sparse_moe.experts.40.w2", "model.layers.20.block_sparse_moe.experts.41.w2", "model.layers.20.block_sparse_moe.experts.42.w2", "model.layers.20.block_sparse_moe.experts.43.w2", "model.layers.20.block_sparse_moe.experts.44.w2", "model.layers.20.block_sparse_moe.experts.45.w2", "model.layers.20.block_sparse_moe.experts.46.w2", "model.layers.20.block_sparse_moe.experts.47.w2", "model.layers.20.block_sparse_moe.experts.48.w2", "model.layers.20.block_sparse_moe.experts.49.w2", "model.layers.20.block_sparse_moe.experts.50.w2", "model.layers.20.block_sparse_moe.experts.51.w2", "model.layers.20.block_sparse_moe.experts.52.w2", "model.layers.20.block_sparse_moe.experts.53.w2", "model.layers.20.block_sparse_moe.experts.54.w2", "model.layers.20.block_sparse_moe.experts.55.w2", "model.layers.20.block_sparse_moe.experts.56.w2", "model.layers.20.block_sparse_moe.experts.57.w2", "model.layers.20.block_sparse_moe.experts.58.w2", "model.layers.20.block_sparse_moe.experts.59.w2", "model.layers.20.block_sparse_moe.experts.60.w2", "model.layers.20.block_sparse_moe.experts.61.w2", "model.layers.20.block_sparse_moe.experts.62.w2", "model.layers.20.block_sparse_moe.experts.63.w2", "model.layers.20.block_sparse_moe.experts.64.w2", "model.layers.20.block_sparse_moe.experts.65.w2", "model.layers.20.block_sparse_moe.experts.66.w2", "model.layers.20.block_sparse_moe.experts.67.w2", "model.layers.20.block_sparse_moe.experts.68.w2", "model.layers.20.block_sparse_moe.experts.69.w2", "model.layers.20.block_sparse_moe.experts.70.w2", "model.layers.20.block_sparse_moe.experts.71.w2", "model.layers.20.block_sparse_moe.experts.72.w2", "model.layers.20.block_sparse_moe.experts.73.w2", "model.layers.20.block_sparse_moe.experts.74.w2", "model.layers.20.block_sparse_moe.experts.75.w2", "model.layers.20.block_sparse_moe.experts.76.w2", "model.layers.20.block_sparse_moe.experts.77.w2", "model.layers.20.block_sparse_moe.experts.78.w2", "model.layers.20.block_sparse_moe.experts.79.w2", "model.layers.20.block_sparse_moe.experts.80.w2", "model.layers.20.block_sparse_moe.experts.81.w2", "model.layers.20.block_sparse_moe.experts.82.w2", "model.layers.20.block_sparse_moe.experts.83.w2", "model.layers.20.block_sparse_moe.experts.84.w2", "model.layers.20.block_sparse_moe.experts.85.w2", "model.layers.20.block_sparse_moe.experts.86.w2", "model.layers.20.block_sparse_moe.experts.87.w2", "model.layers.20.block_sparse_moe.experts.88.w2", "model.layers.20.block_sparse_moe.experts.89.w2", "model.layers.20.block_sparse_moe.experts.90.w2", "model.layers.20.block_sparse_moe.experts.91.w2", "model.layers.20.block_sparse_moe.experts.92.w2", "model.layers.20.block_sparse_moe.experts.93.w2", "model.layers.20.block_sparse_moe.experts.94.w2", "model.layers.20.block_sparse_moe.experts.95.w2", "model.layers.20.block_sparse_moe.experts.96.w2", "model.layers.20.block_sparse_moe.experts.97.w2", "model.layers.20.block_sparse_moe.experts.98.w2", "model.layers.20.block_sparse_moe.experts.99.w2", "model.layers.20.block_sparse_moe.experts.100.w2", "model.layers.20.block_sparse_moe.experts.101.w2", "model.layers.20.block_sparse_moe.experts.102.w2", "model.layers.20.block_sparse_moe.experts.103.w2", "model.layers.20.block_sparse_moe.experts.104.w2", "model.layers.20.block_sparse_moe.experts.105.w2", "model.layers.20.block_sparse_moe.experts.106.w2", "model.layers.20.block_sparse_moe.experts.107.w2", "model.layers.20.block_sparse_moe.experts.108.w2", "model.layers.20.block_sparse_moe.experts.109.w2", "model.layers.20.block_sparse_moe.experts.110.w2", "model.layers.20.block_sparse_moe.experts.111.w2", "model.layers.20.block_sparse_moe.experts.112.w2", "model.layers.20.block_sparse_moe.experts.113.w2", "model.layers.20.block_sparse_moe.experts.114.w2", "model.layers.20.block_sparse_moe.experts.115.w2", "model.layers.20.block_sparse_moe.experts.116.w2", "model.layers.20.block_sparse_moe.experts.117.w2", "model.layers.20.block_sparse_moe.experts.118.w2", "model.layers.20.block_sparse_moe.experts.119.w2", "model.layers.20.block_sparse_moe.experts.120.w2", "model.layers.20.block_sparse_moe.experts.121.w2", "model.layers.20.block_sparse_moe.experts.122.w2", "model.layers.20.block_sparse_moe.experts.123.w2", "model.layers.20.block_sparse_moe.experts.124.w2", "model.layers.20.block_sparse_moe.experts.125.w2", "model.layers.20.block_sparse_moe.experts.126.w2", "model.layers.20.block_sparse_moe.experts.127.w2", "model.layers.20.block_sparse_moe.experts.128.w2", "model.layers.20.block_sparse_moe.experts.129.w2", "model.layers.20.block_sparse_moe.experts.130.w2", "model.layers.20.block_sparse_moe.experts.131.w2", "model.layers.20.block_sparse_moe.experts.132.w2", "model.layers.20.block_sparse_moe.experts.133.w2", "model.layers.20.block_sparse_moe.experts.134.w2", "model.layers.20.block_sparse_moe.experts.135.w2", "model.layers.20.block_sparse_moe.experts.136.w2", "model.layers.20.block_sparse_moe.experts.137.w2", "model.layers.20.block_sparse_moe.experts.138.w2", "model.layers.20.block_sparse_moe.experts.139.w2", "model.layers.20.block_sparse_moe.experts.140.w2", "model.layers.20.block_sparse_moe.experts.141.w2", "model.layers.20.block_sparse_moe.experts.142.w2", "model.layers.20.block_sparse_moe.experts.143.w2", "model.layers.20.block_sparse_moe.experts.144.w2", "model.layers.20.block_sparse_moe.experts.145.w2", "model.layers.20.block_sparse_moe.experts.146.w2", "model.layers.20.block_sparse_moe.experts.147.w2", "model.layers.20.block_sparse_moe.experts.148.w2", "model.layers.20.block_sparse_moe.experts.149.w2", "model.layers.20.block_sparse_moe.experts.150.w2", "model.layers.20.block_sparse_moe.experts.151.w2", "model.layers.20.block_sparse_moe.experts.152.w2", "model.layers.20.block_sparse_moe.experts.153.w2", "model.layers.20.block_sparse_moe.experts.154.w2", "model.layers.20.block_sparse_moe.experts.155.w2", "model.layers.20.block_sparse_moe.experts.156.w2", "model.layers.20.block_sparse_moe.experts.157.w2", "model.layers.20.block_sparse_moe.experts.158.w2", "model.layers.20.block_sparse_moe.experts.159.w2", "model.layers.20.block_sparse_moe.experts.160.w2", "model.layers.20.block_sparse_moe.experts.161.w2", "model.layers.20.block_sparse_moe.experts.162.w2", "model.layers.20.block_sparse_moe.experts.163.w2", "model.layers.20.block_sparse_moe.experts.164.w2", "model.layers.20.block_sparse_moe.experts.165.w2", "model.layers.20.block_sparse_moe.experts.166.w2", "model.layers.20.block_sparse_moe.experts.167.w2", "model.layers.20.block_sparse_moe.experts.168.w2", "model.layers.20.block_sparse_moe.experts.169.w2", "model.layers.20.block_sparse_moe.experts.170.w2", "model.layers.20.block_sparse_moe.experts.171.w2", "model.layers.20.block_sparse_moe.experts.172.w2", "model.layers.20.block_sparse_moe.experts.173.w2", "model.layers.20.block_sparse_moe.experts.174.w2", "model.layers.20.block_sparse_moe.experts.175.w2", "model.layers.20.block_sparse_moe.experts.176.w2", "model.layers.20.block_sparse_moe.experts.177.w2", "model.layers.20.block_sparse_moe.experts.178.w2", "model.layers.20.block_sparse_moe.experts.179.w2", "model.layers.20.block_sparse_moe.experts.180.w2", "model.layers.20.block_sparse_moe.experts.181.w2", "model.layers.20.block_sparse_moe.experts.182.w2", "model.layers.20.block_sparse_moe.experts.183.w2", "model.layers.20.block_sparse_moe.experts.184.w2", "model.layers.20.block_sparse_moe.experts.185.w2", "model.layers.20.block_sparse_moe.experts.186.w2", "model.layers.20.block_sparse_moe.experts.187.w2", "model.layers.20.block_sparse_moe.experts.188.w2", "model.layers.20.block_sparse_moe.experts.189.w2", "model.layers.20.block_sparse_moe.experts.190.w2", "model.layers.20.block_sparse_moe.experts.191.w2", "model.layers.20.block_sparse_moe.experts.192.w2", "model.layers.20.block_sparse_moe.experts.193.w2", "model.layers.20.block_sparse_moe.experts.194.w2", "model.layers.20.block_sparse_moe.experts.195.w2", "model.layers.20.block_sparse_moe.experts.196.w2", "model.layers.20.block_sparse_moe.experts.197.w2", "model.layers.20.block_sparse_moe.experts.198.w2", "model.layers.20.block_sparse_moe.experts.199.w2", "model.layers.20.block_sparse_moe.experts.200.w2", "model.layers.20.block_sparse_moe.experts.201.w2", "model.layers.20.block_sparse_moe.experts.202.w2", "model.layers.20.block_sparse_moe.experts.203.w2", "model.layers.20.block_sparse_moe.experts.204.w2", "model.layers.20.block_sparse_moe.experts.205.w2", "model.layers.20.block_sparse_moe.experts.206.w2", "model.layers.20.block_sparse_moe.experts.207.w2", "model.layers.20.block_sparse_moe.experts.208.w2", "model.layers.20.block_sparse_moe.experts.209.w2", "model.layers.20.block_sparse_moe.experts.210.w2", "model.layers.20.block_sparse_moe.experts.211.w2", "model.layers.20.block_sparse_moe.experts.212.w2", "model.layers.20.block_sparse_moe.experts.213.w2", "model.layers.20.block_sparse_moe.experts.214.w2", "model.layers.20.block_sparse_moe.experts.215.w2", "model.layers.20.block_sparse_moe.experts.216.w2", "model.layers.20.block_sparse_moe.experts.217.w2", "model.layers.20.block_sparse_moe.experts.218.w2", "model.layers.20.block_sparse_moe.experts.219.w2", "model.layers.20.block_sparse_moe.experts.220.w2", "model.layers.20.block_sparse_moe.experts.221.w2", "model.layers.20.block_sparse_moe.experts.222.w2", "model.layers.20.block_sparse_moe.experts.223.w2", "model.layers.20.block_sparse_moe.experts.224.w2", "model.layers.20.block_sparse_moe.experts.225.w2", "model.layers.20.block_sparse_moe.experts.226.w2", "model.layers.20.block_sparse_moe.experts.227.w2", "model.layers.20.block_sparse_moe.experts.228.w2", "model.layers.20.block_sparse_moe.experts.229.w2", "model.layers.20.block_sparse_moe.experts.230.w2", "model.layers.20.block_sparse_moe.experts.231.w2", "model.layers.20.block_sparse_moe.experts.232.w2", "model.layers.20.block_sparse_moe.experts.233.w2", "model.layers.20.block_sparse_moe.experts.234.w2", "model.layers.20.block_sparse_moe.experts.235.w2", "model.layers.20.block_sparse_moe.experts.236.w2", "model.layers.20.block_sparse_moe.experts.237.w2", "model.layers.20.block_sparse_moe.experts.238.w2", "model.layers.20.block_sparse_moe.experts.239.w2", "model.layers.20.block_sparse_moe.experts.240.w2", "model.layers.20.block_sparse_moe.experts.241.w2", "model.layers.20.block_sparse_moe.experts.242.w2", "model.layers.20.block_sparse_moe.experts.243.w2", "model.layers.20.block_sparse_moe.experts.244.w2", "model.layers.20.block_sparse_moe.experts.245.w2", "model.layers.20.block_sparse_moe.experts.246.w2", "model.layers.20.block_sparse_moe.experts.247.w2", "model.layers.20.block_sparse_moe.experts.248.w2", "model.layers.20.block_sparse_moe.experts.249.w2", "model.layers.20.block_sparse_moe.experts.250.w2", "model.layers.20.block_sparse_moe.experts.251.w2", "model.layers.20.block_sparse_moe.experts.252.w2", "model.layers.20.block_sparse_moe.experts.253.w2", "model.layers.20.block_sparse_moe.experts.254.w2", "model.layers.20.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00046071112155915417, "dbits": 3623878656 } ] }, { "idx": 42, "layers": [ "model.layers.21.self_attn.q_proj", "model.layers.21.self_attn.k_proj", "model.layers.21.self_attn.v_proj", "model.layers.21.self_attn.o_proj" ], "candidates": [ { "dkld": -0.003140948712825775, "dbits": 44040192 } ] }, { "idx": 43, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w1", "model.layers.21.block_sparse_moe.experts.1.w1", "model.layers.21.block_sparse_moe.experts.2.w1", "model.layers.21.block_sparse_moe.experts.3.w1", "model.layers.21.block_sparse_moe.experts.4.w1", "model.layers.21.block_sparse_moe.experts.5.w1", "model.layers.21.block_sparse_moe.experts.6.w1", "model.layers.21.block_sparse_moe.experts.7.w1", "model.layers.21.block_sparse_moe.experts.8.w1", "model.layers.21.block_sparse_moe.experts.9.w1", "model.layers.21.block_sparse_moe.experts.10.w1", "model.layers.21.block_sparse_moe.experts.11.w1", "model.layers.21.block_sparse_moe.experts.12.w1", "model.layers.21.block_sparse_moe.experts.13.w1", "model.layers.21.block_sparse_moe.experts.14.w1", "model.layers.21.block_sparse_moe.experts.15.w1", "model.layers.21.block_sparse_moe.experts.16.w1", "model.layers.21.block_sparse_moe.experts.17.w1", "model.layers.21.block_sparse_moe.experts.18.w1", "model.layers.21.block_sparse_moe.experts.19.w1", "model.layers.21.block_sparse_moe.experts.20.w1", "model.layers.21.block_sparse_moe.experts.21.w1", "model.layers.21.block_sparse_moe.experts.22.w1", "model.layers.21.block_sparse_moe.experts.23.w1", "model.layers.21.block_sparse_moe.experts.24.w1", "model.layers.21.block_sparse_moe.experts.25.w1", "model.layers.21.block_sparse_moe.experts.26.w1", "model.layers.21.block_sparse_moe.experts.27.w1", "model.layers.21.block_sparse_moe.experts.28.w1", "model.layers.21.block_sparse_moe.experts.29.w1", "model.layers.21.block_sparse_moe.experts.30.w1", "model.layers.21.block_sparse_moe.experts.31.w1", "model.layers.21.block_sparse_moe.experts.32.w1", "model.layers.21.block_sparse_moe.experts.33.w1", "model.layers.21.block_sparse_moe.experts.34.w1", "model.layers.21.block_sparse_moe.experts.35.w1", "model.layers.21.block_sparse_moe.experts.36.w1", "model.layers.21.block_sparse_moe.experts.37.w1", "model.layers.21.block_sparse_moe.experts.38.w1", "model.layers.21.block_sparse_moe.experts.39.w1", "model.layers.21.block_sparse_moe.experts.40.w1", "model.layers.21.block_sparse_moe.experts.41.w1", "model.layers.21.block_sparse_moe.experts.42.w1", "model.layers.21.block_sparse_moe.experts.43.w1", "model.layers.21.block_sparse_moe.experts.44.w1", "model.layers.21.block_sparse_moe.experts.45.w1", "model.layers.21.block_sparse_moe.experts.46.w1", "model.layers.21.block_sparse_moe.experts.47.w1", "model.layers.21.block_sparse_moe.experts.48.w1", "model.layers.21.block_sparse_moe.experts.49.w1", "model.layers.21.block_sparse_moe.experts.50.w1", "model.layers.21.block_sparse_moe.experts.51.w1", "model.layers.21.block_sparse_moe.experts.52.w1", "model.layers.21.block_sparse_moe.experts.53.w1", "model.layers.21.block_sparse_moe.experts.54.w1", "model.layers.21.block_sparse_moe.experts.55.w1", "model.layers.21.block_sparse_moe.experts.56.w1", "model.layers.21.block_sparse_moe.experts.57.w1", "model.layers.21.block_sparse_moe.experts.58.w1", "model.layers.21.block_sparse_moe.experts.59.w1", "model.layers.21.block_sparse_moe.experts.60.w1", "model.layers.21.block_sparse_moe.experts.61.w1", "model.layers.21.block_sparse_moe.experts.62.w1", "model.layers.21.block_sparse_moe.experts.63.w1", "model.layers.21.block_sparse_moe.experts.64.w1", "model.layers.21.block_sparse_moe.experts.65.w1", "model.layers.21.block_sparse_moe.experts.66.w1", "model.layers.21.block_sparse_moe.experts.67.w1", "model.layers.21.block_sparse_moe.experts.68.w1", "model.layers.21.block_sparse_moe.experts.69.w1", "model.layers.21.block_sparse_moe.experts.70.w1", "model.layers.21.block_sparse_moe.experts.71.w1", "model.layers.21.block_sparse_moe.experts.72.w1", "model.layers.21.block_sparse_moe.experts.73.w1", "model.layers.21.block_sparse_moe.experts.74.w1", "model.layers.21.block_sparse_moe.experts.75.w1", "model.layers.21.block_sparse_moe.experts.76.w1", "model.layers.21.block_sparse_moe.experts.77.w1", "model.layers.21.block_sparse_moe.experts.78.w1", "model.layers.21.block_sparse_moe.experts.79.w1", "model.layers.21.block_sparse_moe.experts.80.w1", "model.layers.21.block_sparse_moe.experts.81.w1", "model.layers.21.block_sparse_moe.experts.82.w1", "model.layers.21.block_sparse_moe.experts.83.w1", "model.layers.21.block_sparse_moe.experts.84.w1", "model.layers.21.block_sparse_moe.experts.85.w1", "model.layers.21.block_sparse_moe.experts.86.w1", "model.layers.21.block_sparse_moe.experts.87.w1", "model.layers.21.block_sparse_moe.experts.88.w1", "model.layers.21.block_sparse_moe.experts.89.w1", "model.layers.21.block_sparse_moe.experts.90.w1", "model.layers.21.block_sparse_moe.experts.91.w1", "model.layers.21.block_sparse_moe.experts.92.w1", "model.layers.21.block_sparse_moe.experts.93.w1", "model.layers.21.block_sparse_moe.experts.94.w1", "model.layers.21.block_sparse_moe.experts.95.w1", "model.layers.21.block_sparse_moe.experts.96.w1", "model.layers.21.block_sparse_moe.experts.97.w1", "model.layers.21.block_sparse_moe.experts.98.w1", "model.layers.21.block_sparse_moe.experts.99.w1", "model.layers.21.block_sparse_moe.experts.100.w1", "model.layers.21.block_sparse_moe.experts.101.w1", "model.layers.21.block_sparse_moe.experts.102.w1", "model.layers.21.block_sparse_moe.experts.103.w1", "model.layers.21.block_sparse_moe.experts.104.w1", "model.layers.21.block_sparse_moe.experts.105.w1", "model.layers.21.block_sparse_moe.experts.106.w1", "model.layers.21.block_sparse_moe.experts.107.w1", "model.layers.21.block_sparse_moe.experts.108.w1", "model.layers.21.block_sparse_moe.experts.109.w1", "model.layers.21.block_sparse_moe.experts.110.w1", "model.layers.21.block_sparse_moe.experts.111.w1", "model.layers.21.block_sparse_moe.experts.112.w1", "model.layers.21.block_sparse_moe.experts.113.w1", "model.layers.21.block_sparse_moe.experts.114.w1", "model.layers.21.block_sparse_moe.experts.115.w1", "model.layers.21.block_sparse_moe.experts.116.w1", "model.layers.21.block_sparse_moe.experts.117.w1", "model.layers.21.block_sparse_moe.experts.118.w1", "model.layers.21.block_sparse_moe.experts.119.w1", "model.layers.21.block_sparse_moe.experts.120.w1", "model.layers.21.block_sparse_moe.experts.121.w1", "model.layers.21.block_sparse_moe.experts.122.w1", "model.layers.21.block_sparse_moe.experts.123.w1", "model.layers.21.block_sparse_moe.experts.124.w1", "model.layers.21.block_sparse_moe.experts.125.w1", "model.layers.21.block_sparse_moe.experts.126.w1", "model.layers.21.block_sparse_moe.experts.127.w1", "model.layers.21.block_sparse_moe.experts.128.w1", "model.layers.21.block_sparse_moe.experts.129.w1", "model.layers.21.block_sparse_moe.experts.130.w1", "model.layers.21.block_sparse_moe.experts.131.w1", "model.layers.21.block_sparse_moe.experts.132.w1", "model.layers.21.block_sparse_moe.experts.133.w1", "model.layers.21.block_sparse_moe.experts.134.w1", "model.layers.21.block_sparse_moe.experts.135.w1", "model.layers.21.block_sparse_moe.experts.136.w1", "model.layers.21.block_sparse_moe.experts.137.w1", "model.layers.21.block_sparse_moe.experts.138.w1", "model.layers.21.block_sparse_moe.experts.139.w1", "model.layers.21.block_sparse_moe.experts.140.w1", "model.layers.21.block_sparse_moe.experts.141.w1", "model.layers.21.block_sparse_moe.experts.142.w1", "model.layers.21.block_sparse_moe.experts.143.w1", "model.layers.21.block_sparse_moe.experts.144.w1", "model.layers.21.block_sparse_moe.experts.145.w1", "model.layers.21.block_sparse_moe.experts.146.w1", "model.layers.21.block_sparse_moe.experts.147.w1", "model.layers.21.block_sparse_moe.experts.148.w1", "model.layers.21.block_sparse_moe.experts.149.w1", "model.layers.21.block_sparse_moe.experts.150.w1", "model.layers.21.block_sparse_moe.experts.151.w1", "model.layers.21.block_sparse_moe.experts.152.w1", "model.layers.21.block_sparse_moe.experts.153.w1", "model.layers.21.block_sparse_moe.experts.154.w1", "model.layers.21.block_sparse_moe.experts.155.w1", "model.layers.21.block_sparse_moe.experts.156.w1", "model.layers.21.block_sparse_moe.experts.157.w1", "model.layers.21.block_sparse_moe.experts.158.w1", "model.layers.21.block_sparse_moe.experts.159.w1", "model.layers.21.block_sparse_moe.experts.160.w1", "model.layers.21.block_sparse_moe.experts.161.w1", "model.layers.21.block_sparse_moe.experts.162.w1", "model.layers.21.block_sparse_moe.experts.163.w1", "model.layers.21.block_sparse_moe.experts.164.w1", "model.layers.21.block_sparse_moe.experts.165.w1", "model.layers.21.block_sparse_moe.experts.166.w1", "model.layers.21.block_sparse_moe.experts.167.w1", "model.layers.21.block_sparse_moe.experts.168.w1", "model.layers.21.block_sparse_moe.experts.169.w1", "model.layers.21.block_sparse_moe.experts.170.w1", "model.layers.21.block_sparse_moe.experts.171.w1", "model.layers.21.block_sparse_moe.experts.172.w1", "model.layers.21.block_sparse_moe.experts.173.w1", "model.layers.21.block_sparse_moe.experts.174.w1", "model.layers.21.block_sparse_moe.experts.175.w1", "model.layers.21.block_sparse_moe.experts.176.w1", "model.layers.21.block_sparse_moe.experts.177.w1", "model.layers.21.block_sparse_moe.experts.178.w1", "model.layers.21.block_sparse_moe.experts.179.w1", "model.layers.21.block_sparse_moe.experts.180.w1", "model.layers.21.block_sparse_moe.experts.181.w1", "model.layers.21.block_sparse_moe.experts.182.w1", "model.layers.21.block_sparse_moe.experts.183.w1", "model.layers.21.block_sparse_moe.experts.184.w1", "model.layers.21.block_sparse_moe.experts.185.w1", "model.layers.21.block_sparse_moe.experts.186.w1", "model.layers.21.block_sparse_moe.experts.187.w1", "model.layers.21.block_sparse_moe.experts.188.w1", "model.layers.21.block_sparse_moe.experts.189.w1", "model.layers.21.block_sparse_moe.experts.190.w1", "model.layers.21.block_sparse_moe.experts.191.w1", "model.layers.21.block_sparse_moe.experts.192.w1", "model.layers.21.block_sparse_moe.experts.193.w1", "model.layers.21.block_sparse_moe.experts.194.w1", "model.layers.21.block_sparse_moe.experts.195.w1", "model.layers.21.block_sparse_moe.experts.196.w1", "model.layers.21.block_sparse_moe.experts.197.w1", "model.layers.21.block_sparse_moe.experts.198.w1", "model.layers.21.block_sparse_moe.experts.199.w1", "model.layers.21.block_sparse_moe.experts.200.w1", "model.layers.21.block_sparse_moe.experts.201.w1", "model.layers.21.block_sparse_moe.experts.202.w1", "model.layers.21.block_sparse_moe.experts.203.w1", "model.layers.21.block_sparse_moe.experts.204.w1", "model.layers.21.block_sparse_moe.experts.205.w1", "model.layers.21.block_sparse_moe.experts.206.w1", "model.layers.21.block_sparse_moe.experts.207.w1", "model.layers.21.block_sparse_moe.experts.208.w1", "model.layers.21.block_sparse_moe.experts.209.w1", "model.layers.21.block_sparse_moe.experts.210.w1", "model.layers.21.block_sparse_moe.experts.211.w1", "model.layers.21.block_sparse_moe.experts.212.w1", "model.layers.21.block_sparse_moe.experts.213.w1", "model.layers.21.block_sparse_moe.experts.214.w1", "model.layers.21.block_sparse_moe.experts.215.w1", "model.layers.21.block_sparse_moe.experts.216.w1", "model.layers.21.block_sparse_moe.experts.217.w1", "model.layers.21.block_sparse_moe.experts.218.w1", "model.layers.21.block_sparse_moe.experts.219.w1", "model.layers.21.block_sparse_moe.experts.220.w1", "model.layers.21.block_sparse_moe.experts.221.w1", "model.layers.21.block_sparse_moe.experts.222.w1", "model.layers.21.block_sparse_moe.experts.223.w1", "model.layers.21.block_sparse_moe.experts.224.w1", "model.layers.21.block_sparse_moe.experts.225.w1", "model.layers.21.block_sparse_moe.experts.226.w1", "model.layers.21.block_sparse_moe.experts.227.w1", "model.layers.21.block_sparse_moe.experts.228.w1", "model.layers.21.block_sparse_moe.experts.229.w1", "model.layers.21.block_sparse_moe.experts.230.w1", "model.layers.21.block_sparse_moe.experts.231.w1", "model.layers.21.block_sparse_moe.experts.232.w1", "model.layers.21.block_sparse_moe.experts.233.w1", "model.layers.21.block_sparse_moe.experts.234.w1", "model.layers.21.block_sparse_moe.experts.235.w1", "model.layers.21.block_sparse_moe.experts.236.w1", "model.layers.21.block_sparse_moe.experts.237.w1", "model.layers.21.block_sparse_moe.experts.238.w1", "model.layers.21.block_sparse_moe.experts.239.w1", "model.layers.21.block_sparse_moe.experts.240.w1", "model.layers.21.block_sparse_moe.experts.241.w1", "model.layers.21.block_sparse_moe.experts.242.w1", "model.layers.21.block_sparse_moe.experts.243.w1", "model.layers.21.block_sparse_moe.experts.244.w1", "model.layers.21.block_sparse_moe.experts.245.w1", "model.layers.21.block_sparse_moe.experts.246.w1", "model.layers.21.block_sparse_moe.experts.247.w1", "model.layers.21.block_sparse_moe.experts.248.w1", "model.layers.21.block_sparse_moe.experts.249.w1", "model.layers.21.block_sparse_moe.experts.250.w1", "model.layers.21.block_sparse_moe.experts.251.w1", "model.layers.21.block_sparse_moe.experts.252.w1", "model.layers.21.block_sparse_moe.experts.253.w1", "model.layers.21.block_sparse_moe.experts.254.w1", "model.layers.21.block_sparse_moe.experts.255.w1", "model.layers.21.block_sparse_moe.experts.0.w3", "model.layers.21.block_sparse_moe.experts.1.w3", "model.layers.21.block_sparse_moe.experts.2.w3", "model.layers.21.block_sparse_moe.experts.3.w3", "model.layers.21.block_sparse_moe.experts.4.w3", "model.layers.21.block_sparse_moe.experts.5.w3", "model.layers.21.block_sparse_moe.experts.6.w3", "model.layers.21.block_sparse_moe.experts.7.w3", "model.layers.21.block_sparse_moe.experts.8.w3", "model.layers.21.block_sparse_moe.experts.9.w3", "model.layers.21.block_sparse_moe.experts.10.w3", "model.layers.21.block_sparse_moe.experts.11.w3", "model.layers.21.block_sparse_moe.experts.12.w3", "model.layers.21.block_sparse_moe.experts.13.w3", "model.layers.21.block_sparse_moe.experts.14.w3", "model.layers.21.block_sparse_moe.experts.15.w3", "model.layers.21.block_sparse_moe.experts.16.w3", "model.layers.21.block_sparse_moe.experts.17.w3", "model.layers.21.block_sparse_moe.experts.18.w3", "model.layers.21.block_sparse_moe.experts.19.w3", "model.layers.21.block_sparse_moe.experts.20.w3", "model.layers.21.block_sparse_moe.experts.21.w3", "model.layers.21.block_sparse_moe.experts.22.w3", "model.layers.21.block_sparse_moe.experts.23.w3", "model.layers.21.block_sparse_moe.experts.24.w3", "model.layers.21.block_sparse_moe.experts.25.w3", "model.layers.21.block_sparse_moe.experts.26.w3", "model.layers.21.block_sparse_moe.experts.27.w3", "model.layers.21.block_sparse_moe.experts.28.w3", "model.layers.21.block_sparse_moe.experts.29.w3", "model.layers.21.block_sparse_moe.experts.30.w3", "model.layers.21.block_sparse_moe.experts.31.w3", "model.layers.21.block_sparse_moe.experts.32.w3", "model.layers.21.block_sparse_moe.experts.33.w3", "model.layers.21.block_sparse_moe.experts.34.w3", "model.layers.21.block_sparse_moe.experts.35.w3", "model.layers.21.block_sparse_moe.experts.36.w3", "model.layers.21.block_sparse_moe.experts.37.w3", "model.layers.21.block_sparse_moe.experts.38.w3", "model.layers.21.block_sparse_moe.experts.39.w3", "model.layers.21.block_sparse_moe.experts.40.w3", "model.layers.21.block_sparse_moe.experts.41.w3", "model.layers.21.block_sparse_moe.experts.42.w3", "model.layers.21.block_sparse_moe.experts.43.w3", "model.layers.21.block_sparse_moe.experts.44.w3", "model.layers.21.block_sparse_moe.experts.45.w3", "model.layers.21.block_sparse_moe.experts.46.w3", "model.layers.21.block_sparse_moe.experts.47.w3", "model.layers.21.block_sparse_moe.experts.48.w3", "model.layers.21.block_sparse_moe.experts.49.w3", "model.layers.21.block_sparse_moe.experts.50.w3", "model.layers.21.block_sparse_moe.experts.51.w3", "model.layers.21.block_sparse_moe.experts.52.w3", "model.layers.21.block_sparse_moe.experts.53.w3", "model.layers.21.block_sparse_moe.experts.54.w3", "model.layers.21.block_sparse_moe.experts.55.w3", "model.layers.21.block_sparse_moe.experts.56.w3", "model.layers.21.block_sparse_moe.experts.57.w3", "model.layers.21.block_sparse_moe.experts.58.w3", "model.layers.21.block_sparse_moe.experts.59.w3", "model.layers.21.block_sparse_moe.experts.60.w3", "model.layers.21.block_sparse_moe.experts.61.w3", "model.layers.21.block_sparse_moe.experts.62.w3", "model.layers.21.block_sparse_moe.experts.63.w3", "model.layers.21.block_sparse_moe.experts.64.w3", "model.layers.21.block_sparse_moe.experts.65.w3", "model.layers.21.block_sparse_moe.experts.66.w3", "model.layers.21.block_sparse_moe.experts.67.w3", "model.layers.21.block_sparse_moe.experts.68.w3", "model.layers.21.block_sparse_moe.experts.69.w3", "model.layers.21.block_sparse_moe.experts.70.w3", "model.layers.21.block_sparse_moe.experts.71.w3", "model.layers.21.block_sparse_moe.experts.72.w3", "model.layers.21.block_sparse_moe.experts.73.w3", "model.layers.21.block_sparse_moe.experts.74.w3", "model.layers.21.block_sparse_moe.experts.75.w3", "model.layers.21.block_sparse_moe.experts.76.w3", "model.layers.21.block_sparse_moe.experts.77.w3", "model.layers.21.block_sparse_moe.experts.78.w3", "model.layers.21.block_sparse_moe.experts.79.w3", "model.layers.21.block_sparse_moe.experts.80.w3", "model.layers.21.block_sparse_moe.experts.81.w3", "model.layers.21.block_sparse_moe.experts.82.w3", "model.layers.21.block_sparse_moe.experts.83.w3", "model.layers.21.block_sparse_moe.experts.84.w3", "model.layers.21.block_sparse_moe.experts.85.w3", "model.layers.21.block_sparse_moe.experts.86.w3", "model.layers.21.block_sparse_moe.experts.87.w3", "model.layers.21.block_sparse_moe.experts.88.w3", "model.layers.21.block_sparse_moe.experts.89.w3", "model.layers.21.block_sparse_moe.experts.90.w3", "model.layers.21.block_sparse_moe.experts.91.w3", "model.layers.21.block_sparse_moe.experts.92.w3", "model.layers.21.block_sparse_moe.experts.93.w3", "model.layers.21.block_sparse_moe.experts.94.w3", "model.layers.21.block_sparse_moe.experts.95.w3", "model.layers.21.block_sparse_moe.experts.96.w3", "model.layers.21.block_sparse_moe.experts.97.w3", "model.layers.21.block_sparse_moe.experts.98.w3", "model.layers.21.block_sparse_moe.experts.99.w3", "model.layers.21.block_sparse_moe.experts.100.w3", "model.layers.21.block_sparse_moe.experts.101.w3", "model.layers.21.block_sparse_moe.experts.102.w3", "model.layers.21.block_sparse_moe.experts.103.w3", "model.layers.21.block_sparse_moe.experts.104.w3", "model.layers.21.block_sparse_moe.experts.105.w3", "model.layers.21.block_sparse_moe.experts.106.w3", "model.layers.21.block_sparse_moe.experts.107.w3", "model.layers.21.block_sparse_moe.experts.108.w3", "model.layers.21.block_sparse_moe.experts.109.w3", "model.layers.21.block_sparse_moe.experts.110.w3", "model.layers.21.block_sparse_moe.experts.111.w3", "model.layers.21.block_sparse_moe.experts.112.w3", "model.layers.21.block_sparse_moe.experts.113.w3", "model.layers.21.block_sparse_moe.experts.114.w3", "model.layers.21.block_sparse_moe.experts.115.w3", "model.layers.21.block_sparse_moe.experts.116.w3", "model.layers.21.block_sparse_moe.experts.117.w3", "model.layers.21.block_sparse_moe.experts.118.w3", "model.layers.21.block_sparse_moe.experts.119.w3", "model.layers.21.block_sparse_moe.experts.120.w3", "model.layers.21.block_sparse_moe.experts.121.w3", "model.layers.21.block_sparse_moe.experts.122.w3", "model.layers.21.block_sparse_moe.experts.123.w3", "model.layers.21.block_sparse_moe.experts.124.w3", "model.layers.21.block_sparse_moe.experts.125.w3", "model.layers.21.block_sparse_moe.experts.126.w3", "model.layers.21.block_sparse_moe.experts.127.w3", "model.layers.21.block_sparse_moe.experts.128.w3", "model.layers.21.block_sparse_moe.experts.129.w3", "model.layers.21.block_sparse_moe.experts.130.w3", "model.layers.21.block_sparse_moe.experts.131.w3", "model.layers.21.block_sparse_moe.experts.132.w3", "model.layers.21.block_sparse_moe.experts.133.w3", "model.layers.21.block_sparse_moe.experts.134.w3", "model.layers.21.block_sparse_moe.experts.135.w3", "model.layers.21.block_sparse_moe.experts.136.w3", "model.layers.21.block_sparse_moe.experts.137.w3", "model.layers.21.block_sparse_moe.experts.138.w3", "model.layers.21.block_sparse_moe.experts.139.w3", "model.layers.21.block_sparse_moe.experts.140.w3", "model.layers.21.block_sparse_moe.experts.141.w3", "model.layers.21.block_sparse_moe.experts.142.w3", "model.layers.21.block_sparse_moe.experts.143.w3", "model.layers.21.block_sparse_moe.experts.144.w3", "model.layers.21.block_sparse_moe.experts.145.w3", "model.layers.21.block_sparse_moe.experts.146.w3", "model.layers.21.block_sparse_moe.experts.147.w3", "model.layers.21.block_sparse_moe.experts.148.w3", "model.layers.21.block_sparse_moe.experts.149.w3", "model.layers.21.block_sparse_moe.experts.150.w3", "model.layers.21.block_sparse_moe.experts.151.w3", "model.layers.21.block_sparse_moe.experts.152.w3", "model.layers.21.block_sparse_moe.experts.153.w3", "model.layers.21.block_sparse_moe.experts.154.w3", "model.layers.21.block_sparse_moe.experts.155.w3", "model.layers.21.block_sparse_moe.experts.156.w3", "model.layers.21.block_sparse_moe.experts.157.w3", "model.layers.21.block_sparse_moe.experts.158.w3", "model.layers.21.block_sparse_moe.experts.159.w3", "model.layers.21.block_sparse_moe.experts.160.w3", "model.layers.21.block_sparse_moe.experts.161.w3", "model.layers.21.block_sparse_moe.experts.162.w3", "model.layers.21.block_sparse_moe.experts.163.w3", "model.layers.21.block_sparse_moe.experts.164.w3", "model.layers.21.block_sparse_moe.experts.165.w3", "model.layers.21.block_sparse_moe.experts.166.w3", "model.layers.21.block_sparse_moe.experts.167.w3", "model.layers.21.block_sparse_moe.experts.168.w3", "model.layers.21.block_sparse_moe.experts.169.w3", "model.layers.21.block_sparse_moe.experts.170.w3", "model.layers.21.block_sparse_moe.experts.171.w3", "model.layers.21.block_sparse_moe.experts.172.w3", "model.layers.21.block_sparse_moe.experts.173.w3", "model.layers.21.block_sparse_moe.experts.174.w3", "model.layers.21.block_sparse_moe.experts.175.w3", "model.layers.21.block_sparse_moe.experts.176.w3", "model.layers.21.block_sparse_moe.experts.177.w3", "model.layers.21.block_sparse_moe.experts.178.w3", "model.layers.21.block_sparse_moe.experts.179.w3", "model.layers.21.block_sparse_moe.experts.180.w3", "model.layers.21.block_sparse_moe.experts.181.w3", "model.layers.21.block_sparse_moe.experts.182.w3", "model.layers.21.block_sparse_moe.experts.183.w3", "model.layers.21.block_sparse_moe.experts.184.w3", "model.layers.21.block_sparse_moe.experts.185.w3", "model.layers.21.block_sparse_moe.experts.186.w3", "model.layers.21.block_sparse_moe.experts.187.w3", "model.layers.21.block_sparse_moe.experts.188.w3", "model.layers.21.block_sparse_moe.experts.189.w3", "model.layers.21.block_sparse_moe.experts.190.w3", "model.layers.21.block_sparse_moe.experts.191.w3", "model.layers.21.block_sparse_moe.experts.192.w3", "model.layers.21.block_sparse_moe.experts.193.w3", "model.layers.21.block_sparse_moe.experts.194.w3", "model.layers.21.block_sparse_moe.experts.195.w3", "model.layers.21.block_sparse_moe.experts.196.w3", "model.layers.21.block_sparse_moe.experts.197.w3", "model.layers.21.block_sparse_moe.experts.198.w3", "model.layers.21.block_sparse_moe.experts.199.w3", "model.layers.21.block_sparse_moe.experts.200.w3", "model.layers.21.block_sparse_moe.experts.201.w3", "model.layers.21.block_sparse_moe.experts.202.w3", "model.layers.21.block_sparse_moe.experts.203.w3", "model.layers.21.block_sparse_moe.experts.204.w3", "model.layers.21.block_sparse_moe.experts.205.w3", "model.layers.21.block_sparse_moe.experts.206.w3", "model.layers.21.block_sparse_moe.experts.207.w3", "model.layers.21.block_sparse_moe.experts.208.w3", "model.layers.21.block_sparse_moe.experts.209.w3", "model.layers.21.block_sparse_moe.experts.210.w3", "model.layers.21.block_sparse_moe.experts.211.w3", "model.layers.21.block_sparse_moe.experts.212.w3", "model.layers.21.block_sparse_moe.experts.213.w3", "model.layers.21.block_sparse_moe.experts.214.w3", "model.layers.21.block_sparse_moe.experts.215.w3", "model.layers.21.block_sparse_moe.experts.216.w3", "model.layers.21.block_sparse_moe.experts.217.w3", "model.layers.21.block_sparse_moe.experts.218.w3", "model.layers.21.block_sparse_moe.experts.219.w3", "model.layers.21.block_sparse_moe.experts.220.w3", "model.layers.21.block_sparse_moe.experts.221.w3", "model.layers.21.block_sparse_moe.experts.222.w3", "model.layers.21.block_sparse_moe.experts.223.w3", "model.layers.21.block_sparse_moe.experts.224.w3", "model.layers.21.block_sparse_moe.experts.225.w3", "model.layers.21.block_sparse_moe.experts.226.w3", "model.layers.21.block_sparse_moe.experts.227.w3", "model.layers.21.block_sparse_moe.experts.228.w3", "model.layers.21.block_sparse_moe.experts.229.w3", "model.layers.21.block_sparse_moe.experts.230.w3", "model.layers.21.block_sparse_moe.experts.231.w3", "model.layers.21.block_sparse_moe.experts.232.w3", "model.layers.21.block_sparse_moe.experts.233.w3", "model.layers.21.block_sparse_moe.experts.234.w3", "model.layers.21.block_sparse_moe.experts.235.w3", "model.layers.21.block_sparse_moe.experts.236.w3", "model.layers.21.block_sparse_moe.experts.237.w3", "model.layers.21.block_sparse_moe.experts.238.w3", "model.layers.21.block_sparse_moe.experts.239.w3", "model.layers.21.block_sparse_moe.experts.240.w3", "model.layers.21.block_sparse_moe.experts.241.w3", "model.layers.21.block_sparse_moe.experts.242.w3", "model.layers.21.block_sparse_moe.experts.243.w3", "model.layers.21.block_sparse_moe.experts.244.w3", "model.layers.21.block_sparse_moe.experts.245.w3", "model.layers.21.block_sparse_moe.experts.246.w3", "model.layers.21.block_sparse_moe.experts.247.w3", "model.layers.21.block_sparse_moe.experts.248.w3", "model.layers.21.block_sparse_moe.experts.249.w3", "model.layers.21.block_sparse_moe.experts.250.w3", "model.layers.21.block_sparse_moe.experts.251.w3", "model.layers.21.block_sparse_moe.experts.252.w3", "model.layers.21.block_sparse_moe.experts.253.w3", "model.layers.21.block_sparse_moe.experts.254.w3", "model.layers.21.block_sparse_moe.experts.255.w3", "model.layers.21.block_sparse_moe.experts.0.w2", "model.layers.21.block_sparse_moe.experts.1.w2", "model.layers.21.block_sparse_moe.experts.2.w2", "model.layers.21.block_sparse_moe.experts.3.w2", "model.layers.21.block_sparse_moe.experts.4.w2", "model.layers.21.block_sparse_moe.experts.5.w2", "model.layers.21.block_sparse_moe.experts.6.w2", "model.layers.21.block_sparse_moe.experts.7.w2", "model.layers.21.block_sparse_moe.experts.8.w2", "model.layers.21.block_sparse_moe.experts.9.w2", "model.layers.21.block_sparse_moe.experts.10.w2", "model.layers.21.block_sparse_moe.experts.11.w2", "model.layers.21.block_sparse_moe.experts.12.w2", "model.layers.21.block_sparse_moe.experts.13.w2", "model.layers.21.block_sparse_moe.experts.14.w2", "model.layers.21.block_sparse_moe.experts.15.w2", "model.layers.21.block_sparse_moe.experts.16.w2", "model.layers.21.block_sparse_moe.experts.17.w2", "model.layers.21.block_sparse_moe.experts.18.w2", "model.layers.21.block_sparse_moe.experts.19.w2", "model.layers.21.block_sparse_moe.experts.20.w2", "model.layers.21.block_sparse_moe.experts.21.w2", "model.layers.21.block_sparse_moe.experts.22.w2", "model.layers.21.block_sparse_moe.experts.23.w2", "model.layers.21.block_sparse_moe.experts.24.w2", "model.layers.21.block_sparse_moe.experts.25.w2", "model.layers.21.block_sparse_moe.experts.26.w2", "model.layers.21.block_sparse_moe.experts.27.w2", "model.layers.21.block_sparse_moe.experts.28.w2", "model.layers.21.block_sparse_moe.experts.29.w2", "model.layers.21.block_sparse_moe.experts.30.w2", "model.layers.21.block_sparse_moe.experts.31.w2", "model.layers.21.block_sparse_moe.experts.32.w2", "model.layers.21.block_sparse_moe.experts.33.w2", "model.layers.21.block_sparse_moe.experts.34.w2", "model.layers.21.block_sparse_moe.experts.35.w2", "model.layers.21.block_sparse_moe.experts.36.w2", "model.layers.21.block_sparse_moe.experts.37.w2", "model.layers.21.block_sparse_moe.experts.38.w2", "model.layers.21.block_sparse_moe.experts.39.w2", "model.layers.21.block_sparse_moe.experts.40.w2", "model.layers.21.block_sparse_moe.experts.41.w2", "model.layers.21.block_sparse_moe.experts.42.w2", "model.layers.21.block_sparse_moe.experts.43.w2", "model.layers.21.block_sparse_moe.experts.44.w2", "model.layers.21.block_sparse_moe.experts.45.w2", "model.layers.21.block_sparse_moe.experts.46.w2", "model.layers.21.block_sparse_moe.experts.47.w2", "model.layers.21.block_sparse_moe.experts.48.w2", "model.layers.21.block_sparse_moe.experts.49.w2", "model.layers.21.block_sparse_moe.experts.50.w2", "model.layers.21.block_sparse_moe.experts.51.w2", "model.layers.21.block_sparse_moe.experts.52.w2", "model.layers.21.block_sparse_moe.experts.53.w2", "model.layers.21.block_sparse_moe.experts.54.w2", "model.layers.21.block_sparse_moe.experts.55.w2", "model.layers.21.block_sparse_moe.experts.56.w2", "model.layers.21.block_sparse_moe.experts.57.w2", "model.layers.21.block_sparse_moe.experts.58.w2", "model.layers.21.block_sparse_moe.experts.59.w2", "model.layers.21.block_sparse_moe.experts.60.w2", "model.layers.21.block_sparse_moe.experts.61.w2", "model.layers.21.block_sparse_moe.experts.62.w2", "model.layers.21.block_sparse_moe.experts.63.w2", "model.layers.21.block_sparse_moe.experts.64.w2", "model.layers.21.block_sparse_moe.experts.65.w2", "model.layers.21.block_sparse_moe.experts.66.w2", "model.layers.21.block_sparse_moe.experts.67.w2", "model.layers.21.block_sparse_moe.experts.68.w2", "model.layers.21.block_sparse_moe.experts.69.w2", "model.layers.21.block_sparse_moe.experts.70.w2", "model.layers.21.block_sparse_moe.experts.71.w2", "model.layers.21.block_sparse_moe.experts.72.w2", "model.layers.21.block_sparse_moe.experts.73.w2", "model.layers.21.block_sparse_moe.experts.74.w2", "model.layers.21.block_sparse_moe.experts.75.w2", "model.layers.21.block_sparse_moe.experts.76.w2", "model.layers.21.block_sparse_moe.experts.77.w2", "model.layers.21.block_sparse_moe.experts.78.w2", "model.layers.21.block_sparse_moe.experts.79.w2", "model.layers.21.block_sparse_moe.experts.80.w2", "model.layers.21.block_sparse_moe.experts.81.w2", "model.layers.21.block_sparse_moe.experts.82.w2", "model.layers.21.block_sparse_moe.experts.83.w2", "model.layers.21.block_sparse_moe.experts.84.w2", "model.layers.21.block_sparse_moe.experts.85.w2", "model.layers.21.block_sparse_moe.experts.86.w2", "model.layers.21.block_sparse_moe.experts.87.w2", "model.layers.21.block_sparse_moe.experts.88.w2", "model.layers.21.block_sparse_moe.experts.89.w2", "model.layers.21.block_sparse_moe.experts.90.w2", "model.layers.21.block_sparse_moe.experts.91.w2", "model.layers.21.block_sparse_moe.experts.92.w2", "model.layers.21.block_sparse_moe.experts.93.w2", "model.layers.21.block_sparse_moe.experts.94.w2", "model.layers.21.block_sparse_moe.experts.95.w2", "model.layers.21.block_sparse_moe.experts.96.w2", "model.layers.21.block_sparse_moe.experts.97.w2", "model.layers.21.block_sparse_moe.experts.98.w2", "model.layers.21.block_sparse_moe.experts.99.w2", "model.layers.21.block_sparse_moe.experts.100.w2", "model.layers.21.block_sparse_moe.experts.101.w2", "model.layers.21.block_sparse_moe.experts.102.w2", "model.layers.21.block_sparse_moe.experts.103.w2", "model.layers.21.block_sparse_moe.experts.104.w2", "model.layers.21.block_sparse_moe.experts.105.w2", "model.layers.21.block_sparse_moe.experts.106.w2", "model.layers.21.block_sparse_moe.experts.107.w2", "model.layers.21.block_sparse_moe.experts.108.w2", "model.layers.21.block_sparse_moe.experts.109.w2", "model.layers.21.block_sparse_moe.experts.110.w2", "model.layers.21.block_sparse_moe.experts.111.w2", "model.layers.21.block_sparse_moe.experts.112.w2", "model.layers.21.block_sparse_moe.experts.113.w2", "model.layers.21.block_sparse_moe.experts.114.w2", "model.layers.21.block_sparse_moe.experts.115.w2", "model.layers.21.block_sparse_moe.experts.116.w2", "model.layers.21.block_sparse_moe.experts.117.w2", "model.layers.21.block_sparse_moe.experts.118.w2", "model.layers.21.block_sparse_moe.experts.119.w2", "model.layers.21.block_sparse_moe.experts.120.w2", "model.layers.21.block_sparse_moe.experts.121.w2", "model.layers.21.block_sparse_moe.experts.122.w2", "model.layers.21.block_sparse_moe.experts.123.w2", "model.layers.21.block_sparse_moe.experts.124.w2", "model.layers.21.block_sparse_moe.experts.125.w2", "model.layers.21.block_sparse_moe.experts.126.w2", "model.layers.21.block_sparse_moe.experts.127.w2", "model.layers.21.block_sparse_moe.experts.128.w2", "model.layers.21.block_sparse_moe.experts.129.w2", "model.layers.21.block_sparse_moe.experts.130.w2", "model.layers.21.block_sparse_moe.experts.131.w2", "model.layers.21.block_sparse_moe.experts.132.w2", "model.layers.21.block_sparse_moe.experts.133.w2", "model.layers.21.block_sparse_moe.experts.134.w2", "model.layers.21.block_sparse_moe.experts.135.w2", "model.layers.21.block_sparse_moe.experts.136.w2", "model.layers.21.block_sparse_moe.experts.137.w2", "model.layers.21.block_sparse_moe.experts.138.w2", "model.layers.21.block_sparse_moe.experts.139.w2", "model.layers.21.block_sparse_moe.experts.140.w2", "model.layers.21.block_sparse_moe.experts.141.w2", "model.layers.21.block_sparse_moe.experts.142.w2", "model.layers.21.block_sparse_moe.experts.143.w2", "model.layers.21.block_sparse_moe.experts.144.w2", "model.layers.21.block_sparse_moe.experts.145.w2", "model.layers.21.block_sparse_moe.experts.146.w2", "model.layers.21.block_sparse_moe.experts.147.w2", "model.layers.21.block_sparse_moe.experts.148.w2", "model.layers.21.block_sparse_moe.experts.149.w2", "model.layers.21.block_sparse_moe.experts.150.w2", "model.layers.21.block_sparse_moe.experts.151.w2", "model.layers.21.block_sparse_moe.experts.152.w2", "model.layers.21.block_sparse_moe.experts.153.w2", "model.layers.21.block_sparse_moe.experts.154.w2", "model.layers.21.block_sparse_moe.experts.155.w2", "model.layers.21.block_sparse_moe.experts.156.w2", "model.layers.21.block_sparse_moe.experts.157.w2", "model.layers.21.block_sparse_moe.experts.158.w2", "model.layers.21.block_sparse_moe.experts.159.w2", "model.layers.21.block_sparse_moe.experts.160.w2", "model.layers.21.block_sparse_moe.experts.161.w2", "model.layers.21.block_sparse_moe.experts.162.w2", "model.layers.21.block_sparse_moe.experts.163.w2", "model.layers.21.block_sparse_moe.experts.164.w2", "model.layers.21.block_sparse_moe.experts.165.w2", "model.layers.21.block_sparse_moe.experts.166.w2", "model.layers.21.block_sparse_moe.experts.167.w2", "model.layers.21.block_sparse_moe.experts.168.w2", "model.layers.21.block_sparse_moe.experts.169.w2", "model.layers.21.block_sparse_moe.experts.170.w2", "model.layers.21.block_sparse_moe.experts.171.w2", "model.layers.21.block_sparse_moe.experts.172.w2", "model.layers.21.block_sparse_moe.experts.173.w2", "model.layers.21.block_sparse_moe.experts.174.w2", "model.layers.21.block_sparse_moe.experts.175.w2", "model.layers.21.block_sparse_moe.experts.176.w2", "model.layers.21.block_sparse_moe.experts.177.w2", "model.layers.21.block_sparse_moe.experts.178.w2", "model.layers.21.block_sparse_moe.experts.179.w2", "model.layers.21.block_sparse_moe.experts.180.w2", "model.layers.21.block_sparse_moe.experts.181.w2", "model.layers.21.block_sparse_moe.experts.182.w2", "model.layers.21.block_sparse_moe.experts.183.w2", "model.layers.21.block_sparse_moe.experts.184.w2", "model.layers.21.block_sparse_moe.experts.185.w2", "model.layers.21.block_sparse_moe.experts.186.w2", "model.layers.21.block_sparse_moe.experts.187.w2", "model.layers.21.block_sparse_moe.experts.188.w2", "model.layers.21.block_sparse_moe.experts.189.w2", "model.layers.21.block_sparse_moe.experts.190.w2", "model.layers.21.block_sparse_moe.experts.191.w2", "model.layers.21.block_sparse_moe.experts.192.w2", "model.layers.21.block_sparse_moe.experts.193.w2", "model.layers.21.block_sparse_moe.experts.194.w2", "model.layers.21.block_sparse_moe.experts.195.w2", "model.layers.21.block_sparse_moe.experts.196.w2", "model.layers.21.block_sparse_moe.experts.197.w2", "model.layers.21.block_sparse_moe.experts.198.w2", "model.layers.21.block_sparse_moe.experts.199.w2", "model.layers.21.block_sparse_moe.experts.200.w2", "model.layers.21.block_sparse_moe.experts.201.w2", "model.layers.21.block_sparse_moe.experts.202.w2", "model.layers.21.block_sparse_moe.experts.203.w2", "model.layers.21.block_sparse_moe.experts.204.w2", "model.layers.21.block_sparse_moe.experts.205.w2", "model.layers.21.block_sparse_moe.experts.206.w2", "model.layers.21.block_sparse_moe.experts.207.w2", "model.layers.21.block_sparse_moe.experts.208.w2", "model.layers.21.block_sparse_moe.experts.209.w2", "model.layers.21.block_sparse_moe.experts.210.w2", "model.layers.21.block_sparse_moe.experts.211.w2", "model.layers.21.block_sparse_moe.experts.212.w2", "model.layers.21.block_sparse_moe.experts.213.w2", "model.layers.21.block_sparse_moe.experts.214.w2", "model.layers.21.block_sparse_moe.experts.215.w2", "model.layers.21.block_sparse_moe.experts.216.w2", "model.layers.21.block_sparse_moe.experts.217.w2", "model.layers.21.block_sparse_moe.experts.218.w2", "model.layers.21.block_sparse_moe.experts.219.w2", "model.layers.21.block_sparse_moe.experts.220.w2", "model.layers.21.block_sparse_moe.experts.221.w2", "model.layers.21.block_sparse_moe.experts.222.w2", "model.layers.21.block_sparse_moe.experts.223.w2", "model.layers.21.block_sparse_moe.experts.224.w2", "model.layers.21.block_sparse_moe.experts.225.w2", "model.layers.21.block_sparse_moe.experts.226.w2", "model.layers.21.block_sparse_moe.experts.227.w2", "model.layers.21.block_sparse_moe.experts.228.w2", "model.layers.21.block_sparse_moe.experts.229.w2", "model.layers.21.block_sparse_moe.experts.230.w2", "model.layers.21.block_sparse_moe.experts.231.w2", "model.layers.21.block_sparse_moe.experts.232.w2", "model.layers.21.block_sparse_moe.experts.233.w2", "model.layers.21.block_sparse_moe.experts.234.w2", "model.layers.21.block_sparse_moe.experts.235.w2", "model.layers.21.block_sparse_moe.experts.236.w2", "model.layers.21.block_sparse_moe.experts.237.w2", "model.layers.21.block_sparse_moe.experts.238.w2", "model.layers.21.block_sparse_moe.experts.239.w2", "model.layers.21.block_sparse_moe.experts.240.w2", "model.layers.21.block_sparse_moe.experts.241.w2", "model.layers.21.block_sparse_moe.experts.242.w2", "model.layers.21.block_sparse_moe.experts.243.w2", "model.layers.21.block_sparse_moe.experts.244.w2", "model.layers.21.block_sparse_moe.experts.245.w2", "model.layers.21.block_sparse_moe.experts.246.w2", "model.layers.21.block_sparse_moe.experts.247.w2", "model.layers.21.block_sparse_moe.experts.248.w2", "model.layers.21.block_sparse_moe.experts.249.w2", "model.layers.21.block_sparse_moe.experts.250.w2", "model.layers.21.block_sparse_moe.experts.251.w2", "model.layers.21.block_sparse_moe.experts.252.w2", "model.layers.21.block_sparse_moe.experts.253.w2", "model.layers.21.block_sparse_moe.experts.254.w2", "model.layers.21.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006790434941649215, "dbits": 3623878656 } ] }, { "idx": 44, "layers": [ "model.layers.22.self_attn.q_proj", "model.layers.22.self_attn.k_proj", "model.layers.22.self_attn.v_proj", "model.layers.22.self_attn.o_proj" ], "candidates": [ { "dkld": -0.02616172507405279, "dbits": 44040192 } ] }, { "idx": 45, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w1", "model.layers.22.block_sparse_moe.experts.1.w1", "model.layers.22.block_sparse_moe.experts.2.w1", "model.layers.22.block_sparse_moe.experts.3.w1", "model.layers.22.block_sparse_moe.experts.4.w1", "model.layers.22.block_sparse_moe.experts.5.w1", "model.layers.22.block_sparse_moe.experts.6.w1", "model.layers.22.block_sparse_moe.experts.7.w1", "model.layers.22.block_sparse_moe.experts.8.w1", "model.layers.22.block_sparse_moe.experts.9.w1", "model.layers.22.block_sparse_moe.experts.10.w1", "model.layers.22.block_sparse_moe.experts.11.w1", "model.layers.22.block_sparse_moe.experts.12.w1", "model.layers.22.block_sparse_moe.experts.13.w1", "model.layers.22.block_sparse_moe.experts.14.w1", "model.layers.22.block_sparse_moe.experts.15.w1", "model.layers.22.block_sparse_moe.experts.16.w1", "model.layers.22.block_sparse_moe.experts.17.w1", "model.layers.22.block_sparse_moe.experts.18.w1", "model.layers.22.block_sparse_moe.experts.19.w1", "model.layers.22.block_sparse_moe.experts.20.w1", "model.layers.22.block_sparse_moe.experts.21.w1", "model.layers.22.block_sparse_moe.experts.22.w1", "model.layers.22.block_sparse_moe.experts.23.w1", "model.layers.22.block_sparse_moe.experts.24.w1", "model.layers.22.block_sparse_moe.experts.25.w1", "model.layers.22.block_sparse_moe.experts.26.w1", "model.layers.22.block_sparse_moe.experts.27.w1", "model.layers.22.block_sparse_moe.experts.28.w1", "model.layers.22.block_sparse_moe.experts.29.w1", "model.layers.22.block_sparse_moe.experts.30.w1", "model.layers.22.block_sparse_moe.experts.31.w1", "model.layers.22.block_sparse_moe.experts.32.w1", "model.layers.22.block_sparse_moe.experts.33.w1", "model.layers.22.block_sparse_moe.experts.34.w1", "model.layers.22.block_sparse_moe.experts.35.w1", "model.layers.22.block_sparse_moe.experts.36.w1", "model.layers.22.block_sparse_moe.experts.37.w1", "model.layers.22.block_sparse_moe.experts.38.w1", "model.layers.22.block_sparse_moe.experts.39.w1", "model.layers.22.block_sparse_moe.experts.40.w1", "model.layers.22.block_sparse_moe.experts.41.w1", "model.layers.22.block_sparse_moe.experts.42.w1", "model.layers.22.block_sparse_moe.experts.43.w1", "model.layers.22.block_sparse_moe.experts.44.w1", "model.layers.22.block_sparse_moe.experts.45.w1", "model.layers.22.block_sparse_moe.experts.46.w1", "model.layers.22.block_sparse_moe.experts.47.w1", "model.layers.22.block_sparse_moe.experts.48.w1", "model.layers.22.block_sparse_moe.experts.49.w1", "model.layers.22.block_sparse_moe.experts.50.w1", "model.layers.22.block_sparse_moe.experts.51.w1", "model.layers.22.block_sparse_moe.experts.52.w1", "model.layers.22.block_sparse_moe.experts.53.w1", "model.layers.22.block_sparse_moe.experts.54.w1", "model.layers.22.block_sparse_moe.experts.55.w1", "model.layers.22.block_sparse_moe.experts.56.w1", "model.layers.22.block_sparse_moe.experts.57.w1", "model.layers.22.block_sparse_moe.experts.58.w1", "model.layers.22.block_sparse_moe.experts.59.w1", "model.layers.22.block_sparse_moe.experts.60.w1", "model.layers.22.block_sparse_moe.experts.61.w1", "model.layers.22.block_sparse_moe.experts.62.w1", "model.layers.22.block_sparse_moe.experts.63.w1", "model.layers.22.block_sparse_moe.experts.64.w1", "model.layers.22.block_sparse_moe.experts.65.w1", "model.layers.22.block_sparse_moe.experts.66.w1", "model.layers.22.block_sparse_moe.experts.67.w1", "model.layers.22.block_sparse_moe.experts.68.w1", "model.layers.22.block_sparse_moe.experts.69.w1", "model.layers.22.block_sparse_moe.experts.70.w1", "model.layers.22.block_sparse_moe.experts.71.w1", "model.layers.22.block_sparse_moe.experts.72.w1", "model.layers.22.block_sparse_moe.experts.73.w1", "model.layers.22.block_sparse_moe.experts.74.w1", "model.layers.22.block_sparse_moe.experts.75.w1", "model.layers.22.block_sparse_moe.experts.76.w1", "model.layers.22.block_sparse_moe.experts.77.w1", "model.layers.22.block_sparse_moe.experts.78.w1", "model.layers.22.block_sparse_moe.experts.79.w1", "model.layers.22.block_sparse_moe.experts.80.w1", "model.layers.22.block_sparse_moe.experts.81.w1", "model.layers.22.block_sparse_moe.experts.82.w1", "model.layers.22.block_sparse_moe.experts.83.w1", "model.layers.22.block_sparse_moe.experts.84.w1", "model.layers.22.block_sparse_moe.experts.85.w1", "model.layers.22.block_sparse_moe.experts.86.w1", "model.layers.22.block_sparse_moe.experts.87.w1", "model.layers.22.block_sparse_moe.experts.88.w1", "model.layers.22.block_sparse_moe.experts.89.w1", "model.layers.22.block_sparse_moe.experts.90.w1", "model.layers.22.block_sparse_moe.experts.91.w1", "model.layers.22.block_sparse_moe.experts.92.w1", "model.layers.22.block_sparse_moe.experts.93.w1", "model.layers.22.block_sparse_moe.experts.94.w1", "model.layers.22.block_sparse_moe.experts.95.w1", "model.layers.22.block_sparse_moe.experts.96.w1", "model.layers.22.block_sparse_moe.experts.97.w1", "model.layers.22.block_sparse_moe.experts.98.w1", "model.layers.22.block_sparse_moe.experts.99.w1", "model.layers.22.block_sparse_moe.experts.100.w1", "model.layers.22.block_sparse_moe.experts.101.w1", "model.layers.22.block_sparse_moe.experts.102.w1", "model.layers.22.block_sparse_moe.experts.103.w1", "model.layers.22.block_sparse_moe.experts.104.w1", "model.layers.22.block_sparse_moe.experts.105.w1", "model.layers.22.block_sparse_moe.experts.106.w1", "model.layers.22.block_sparse_moe.experts.107.w1", "model.layers.22.block_sparse_moe.experts.108.w1", "model.layers.22.block_sparse_moe.experts.109.w1", "model.layers.22.block_sparse_moe.experts.110.w1", "model.layers.22.block_sparse_moe.experts.111.w1", "model.layers.22.block_sparse_moe.experts.112.w1", "model.layers.22.block_sparse_moe.experts.113.w1", "model.layers.22.block_sparse_moe.experts.114.w1", "model.layers.22.block_sparse_moe.experts.115.w1", "model.layers.22.block_sparse_moe.experts.116.w1", "model.layers.22.block_sparse_moe.experts.117.w1", "model.layers.22.block_sparse_moe.experts.118.w1", "model.layers.22.block_sparse_moe.experts.119.w1", "model.layers.22.block_sparse_moe.experts.120.w1", "model.layers.22.block_sparse_moe.experts.121.w1", "model.layers.22.block_sparse_moe.experts.122.w1", "model.layers.22.block_sparse_moe.experts.123.w1", "model.layers.22.block_sparse_moe.experts.124.w1", "model.layers.22.block_sparse_moe.experts.125.w1", "model.layers.22.block_sparse_moe.experts.126.w1", "model.layers.22.block_sparse_moe.experts.127.w1", "model.layers.22.block_sparse_moe.experts.128.w1", "model.layers.22.block_sparse_moe.experts.129.w1", "model.layers.22.block_sparse_moe.experts.130.w1", "model.layers.22.block_sparse_moe.experts.131.w1", "model.layers.22.block_sparse_moe.experts.132.w1", "model.layers.22.block_sparse_moe.experts.133.w1", "model.layers.22.block_sparse_moe.experts.134.w1", "model.layers.22.block_sparse_moe.experts.135.w1", "model.layers.22.block_sparse_moe.experts.136.w1", "model.layers.22.block_sparse_moe.experts.137.w1", "model.layers.22.block_sparse_moe.experts.138.w1", "model.layers.22.block_sparse_moe.experts.139.w1", "model.layers.22.block_sparse_moe.experts.140.w1", "model.layers.22.block_sparse_moe.experts.141.w1", "model.layers.22.block_sparse_moe.experts.142.w1", "model.layers.22.block_sparse_moe.experts.143.w1", "model.layers.22.block_sparse_moe.experts.144.w1", "model.layers.22.block_sparse_moe.experts.145.w1", "model.layers.22.block_sparse_moe.experts.146.w1", "model.layers.22.block_sparse_moe.experts.147.w1", "model.layers.22.block_sparse_moe.experts.148.w1", "model.layers.22.block_sparse_moe.experts.149.w1", "model.layers.22.block_sparse_moe.experts.150.w1", "model.layers.22.block_sparse_moe.experts.151.w1", "model.layers.22.block_sparse_moe.experts.152.w1", "model.layers.22.block_sparse_moe.experts.153.w1", "model.layers.22.block_sparse_moe.experts.154.w1", "model.layers.22.block_sparse_moe.experts.155.w1", "model.layers.22.block_sparse_moe.experts.156.w1", "model.layers.22.block_sparse_moe.experts.157.w1", "model.layers.22.block_sparse_moe.experts.158.w1", "model.layers.22.block_sparse_moe.experts.159.w1", "model.layers.22.block_sparse_moe.experts.160.w1", "model.layers.22.block_sparse_moe.experts.161.w1", "model.layers.22.block_sparse_moe.experts.162.w1", "model.layers.22.block_sparse_moe.experts.163.w1", "model.layers.22.block_sparse_moe.experts.164.w1", "model.layers.22.block_sparse_moe.experts.165.w1", "model.layers.22.block_sparse_moe.experts.166.w1", "model.layers.22.block_sparse_moe.experts.167.w1", "model.layers.22.block_sparse_moe.experts.168.w1", "model.layers.22.block_sparse_moe.experts.169.w1", "model.layers.22.block_sparse_moe.experts.170.w1", "model.layers.22.block_sparse_moe.experts.171.w1", "model.layers.22.block_sparse_moe.experts.172.w1", "model.layers.22.block_sparse_moe.experts.173.w1", "model.layers.22.block_sparse_moe.experts.174.w1", "model.layers.22.block_sparse_moe.experts.175.w1", "model.layers.22.block_sparse_moe.experts.176.w1", "model.layers.22.block_sparse_moe.experts.177.w1", "model.layers.22.block_sparse_moe.experts.178.w1", "model.layers.22.block_sparse_moe.experts.179.w1", "model.layers.22.block_sparse_moe.experts.180.w1", "model.layers.22.block_sparse_moe.experts.181.w1", "model.layers.22.block_sparse_moe.experts.182.w1", "model.layers.22.block_sparse_moe.experts.183.w1", "model.layers.22.block_sparse_moe.experts.184.w1", "model.layers.22.block_sparse_moe.experts.185.w1", "model.layers.22.block_sparse_moe.experts.186.w1", "model.layers.22.block_sparse_moe.experts.187.w1", "model.layers.22.block_sparse_moe.experts.188.w1", "model.layers.22.block_sparse_moe.experts.189.w1", "model.layers.22.block_sparse_moe.experts.190.w1", "model.layers.22.block_sparse_moe.experts.191.w1", "model.layers.22.block_sparse_moe.experts.192.w1", "model.layers.22.block_sparse_moe.experts.193.w1", "model.layers.22.block_sparse_moe.experts.194.w1", "model.layers.22.block_sparse_moe.experts.195.w1", "model.layers.22.block_sparse_moe.experts.196.w1", "model.layers.22.block_sparse_moe.experts.197.w1", "model.layers.22.block_sparse_moe.experts.198.w1", "model.layers.22.block_sparse_moe.experts.199.w1", "model.layers.22.block_sparse_moe.experts.200.w1", "model.layers.22.block_sparse_moe.experts.201.w1", "model.layers.22.block_sparse_moe.experts.202.w1", "model.layers.22.block_sparse_moe.experts.203.w1", "model.layers.22.block_sparse_moe.experts.204.w1", "model.layers.22.block_sparse_moe.experts.205.w1", "model.layers.22.block_sparse_moe.experts.206.w1", "model.layers.22.block_sparse_moe.experts.207.w1", "model.layers.22.block_sparse_moe.experts.208.w1", "model.layers.22.block_sparse_moe.experts.209.w1", "model.layers.22.block_sparse_moe.experts.210.w1", "model.layers.22.block_sparse_moe.experts.211.w1", "model.layers.22.block_sparse_moe.experts.212.w1", "model.layers.22.block_sparse_moe.experts.213.w1", "model.layers.22.block_sparse_moe.experts.214.w1", "model.layers.22.block_sparse_moe.experts.215.w1", "model.layers.22.block_sparse_moe.experts.216.w1", "model.layers.22.block_sparse_moe.experts.217.w1", "model.layers.22.block_sparse_moe.experts.218.w1", "model.layers.22.block_sparse_moe.experts.219.w1", "model.layers.22.block_sparse_moe.experts.220.w1", "model.layers.22.block_sparse_moe.experts.221.w1", "model.layers.22.block_sparse_moe.experts.222.w1", "model.layers.22.block_sparse_moe.experts.223.w1", "model.layers.22.block_sparse_moe.experts.224.w1", "model.layers.22.block_sparse_moe.experts.225.w1", "model.layers.22.block_sparse_moe.experts.226.w1", "model.layers.22.block_sparse_moe.experts.227.w1", "model.layers.22.block_sparse_moe.experts.228.w1", "model.layers.22.block_sparse_moe.experts.229.w1", "model.layers.22.block_sparse_moe.experts.230.w1", "model.layers.22.block_sparse_moe.experts.231.w1", "model.layers.22.block_sparse_moe.experts.232.w1", "model.layers.22.block_sparse_moe.experts.233.w1", "model.layers.22.block_sparse_moe.experts.234.w1", "model.layers.22.block_sparse_moe.experts.235.w1", "model.layers.22.block_sparse_moe.experts.236.w1", "model.layers.22.block_sparse_moe.experts.237.w1", "model.layers.22.block_sparse_moe.experts.238.w1", "model.layers.22.block_sparse_moe.experts.239.w1", "model.layers.22.block_sparse_moe.experts.240.w1", "model.layers.22.block_sparse_moe.experts.241.w1", "model.layers.22.block_sparse_moe.experts.242.w1", "model.layers.22.block_sparse_moe.experts.243.w1", "model.layers.22.block_sparse_moe.experts.244.w1", "model.layers.22.block_sparse_moe.experts.245.w1", "model.layers.22.block_sparse_moe.experts.246.w1", "model.layers.22.block_sparse_moe.experts.247.w1", "model.layers.22.block_sparse_moe.experts.248.w1", "model.layers.22.block_sparse_moe.experts.249.w1", "model.layers.22.block_sparse_moe.experts.250.w1", "model.layers.22.block_sparse_moe.experts.251.w1", "model.layers.22.block_sparse_moe.experts.252.w1", "model.layers.22.block_sparse_moe.experts.253.w1", "model.layers.22.block_sparse_moe.experts.254.w1", "model.layers.22.block_sparse_moe.experts.255.w1", "model.layers.22.block_sparse_moe.experts.0.w3", "model.layers.22.block_sparse_moe.experts.1.w3", "model.layers.22.block_sparse_moe.experts.2.w3", "model.layers.22.block_sparse_moe.experts.3.w3", "model.layers.22.block_sparse_moe.experts.4.w3", "model.layers.22.block_sparse_moe.experts.5.w3", "model.layers.22.block_sparse_moe.experts.6.w3", "model.layers.22.block_sparse_moe.experts.7.w3", "model.layers.22.block_sparse_moe.experts.8.w3", "model.layers.22.block_sparse_moe.experts.9.w3", "model.layers.22.block_sparse_moe.experts.10.w3", "model.layers.22.block_sparse_moe.experts.11.w3", "model.layers.22.block_sparse_moe.experts.12.w3", "model.layers.22.block_sparse_moe.experts.13.w3", "model.layers.22.block_sparse_moe.experts.14.w3", "model.layers.22.block_sparse_moe.experts.15.w3", "model.layers.22.block_sparse_moe.experts.16.w3", "model.layers.22.block_sparse_moe.experts.17.w3", "model.layers.22.block_sparse_moe.experts.18.w3", "model.layers.22.block_sparse_moe.experts.19.w3", "model.layers.22.block_sparse_moe.experts.20.w3", "model.layers.22.block_sparse_moe.experts.21.w3", "model.layers.22.block_sparse_moe.experts.22.w3", "model.layers.22.block_sparse_moe.experts.23.w3", "model.layers.22.block_sparse_moe.experts.24.w3", "model.layers.22.block_sparse_moe.experts.25.w3", "model.layers.22.block_sparse_moe.experts.26.w3", "model.layers.22.block_sparse_moe.experts.27.w3", "model.layers.22.block_sparse_moe.experts.28.w3", "model.layers.22.block_sparse_moe.experts.29.w3", "model.layers.22.block_sparse_moe.experts.30.w3", "model.layers.22.block_sparse_moe.experts.31.w3", "model.layers.22.block_sparse_moe.experts.32.w3", "model.layers.22.block_sparse_moe.experts.33.w3", "model.layers.22.block_sparse_moe.experts.34.w3", "model.layers.22.block_sparse_moe.experts.35.w3", "model.layers.22.block_sparse_moe.experts.36.w3", "model.layers.22.block_sparse_moe.experts.37.w3", "model.layers.22.block_sparse_moe.experts.38.w3", "model.layers.22.block_sparse_moe.experts.39.w3", "model.layers.22.block_sparse_moe.experts.40.w3", "model.layers.22.block_sparse_moe.experts.41.w3", "model.layers.22.block_sparse_moe.experts.42.w3", "model.layers.22.block_sparse_moe.experts.43.w3", "model.layers.22.block_sparse_moe.experts.44.w3", "model.layers.22.block_sparse_moe.experts.45.w3", "model.layers.22.block_sparse_moe.experts.46.w3", "model.layers.22.block_sparse_moe.experts.47.w3", "model.layers.22.block_sparse_moe.experts.48.w3", "model.layers.22.block_sparse_moe.experts.49.w3", "model.layers.22.block_sparse_moe.experts.50.w3", "model.layers.22.block_sparse_moe.experts.51.w3", "model.layers.22.block_sparse_moe.experts.52.w3", "model.layers.22.block_sparse_moe.experts.53.w3", "model.layers.22.block_sparse_moe.experts.54.w3", "model.layers.22.block_sparse_moe.experts.55.w3", "model.layers.22.block_sparse_moe.experts.56.w3", "model.layers.22.block_sparse_moe.experts.57.w3", "model.layers.22.block_sparse_moe.experts.58.w3", "model.layers.22.block_sparse_moe.experts.59.w3", "model.layers.22.block_sparse_moe.experts.60.w3", "model.layers.22.block_sparse_moe.experts.61.w3", "model.layers.22.block_sparse_moe.experts.62.w3", "model.layers.22.block_sparse_moe.experts.63.w3", "model.layers.22.block_sparse_moe.experts.64.w3", "model.layers.22.block_sparse_moe.experts.65.w3", "model.layers.22.block_sparse_moe.experts.66.w3", "model.layers.22.block_sparse_moe.experts.67.w3", "model.layers.22.block_sparse_moe.experts.68.w3", "model.layers.22.block_sparse_moe.experts.69.w3", "model.layers.22.block_sparse_moe.experts.70.w3", "model.layers.22.block_sparse_moe.experts.71.w3", "model.layers.22.block_sparse_moe.experts.72.w3", "model.layers.22.block_sparse_moe.experts.73.w3", "model.layers.22.block_sparse_moe.experts.74.w3", "model.layers.22.block_sparse_moe.experts.75.w3", "model.layers.22.block_sparse_moe.experts.76.w3", "model.layers.22.block_sparse_moe.experts.77.w3", "model.layers.22.block_sparse_moe.experts.78.w3", "model.layers.22.block_sparse_moe.experts.79.w3", "model.layers.22.block_sparse_moe.experts.80.w3", "model.layers.22.block_sparse_moe.experts.81.w3", "model.layers.22.block_sparse_moe.experts.82.w3", "model.layers.22.block_sparse_moe.experts.83.w3", "model.layers.22.block_sparse_moe.experts.84.w3", "model.layers.22.block_sparse_moe.experts.85.w3", "model.layers.22.block_sparse_moe.experts.86.w3", "model.layers.22.block_sparse_moe.experts.87.w3", "model.layers.22.block_sparse_moe.experts.88.w3", "model.layers.22.block_sparse_moe.experts.89.w3", "model.layers.22.block_sparse_moe.experts.90.w3", "model.layers.22.block_sparse_moe.experts.91.w3", "model.layers.22.block_sparse_moe.experts.92.w3", "model.layers.22.block_sparse_moe.experts.93.w3", "model.layers.22.block_sparse_moe.experts.94.w3", "model.layers.22.block_sparse_moe.experts.95.w3", "model.layers.22.block_sparse_moe.experts.96.w3", "model.layers.22.block_sparse_moe.experts.97.w3", "model.layers.22.block_sparse_moe.experts.98.w3", "model.layers.22.block_sparse_moe.experts.99.w3", "model.layers.22.block_sparse_moe.experts.100.w3", "model.layers.22.block_sparse_moe.experts.101.w3", "model.layers.22.block_sparse_moe.experts.102.w3", "model.layers.22.block_sparse_moe.experts.103.w3", "model.layers.22.block_sparse_moe.experts.104.w3", "model.layers.22.block_sparse_moe.experts.105.w3", "model.layers.22.block_sparse_moe.experts.106.w3", "model.layers.22.block_sparse_moe.experts.107.w3", "model.layers.22.block_sparse_moe.experts.108.w3", "model.layers.22.block_sparse_moe.experts.109.w3", "model.layers.22.block_sparse_moe.experts.110.w3", "model.layers.22.block_sparse_moe.experts.111.w3", "model.layers.22.block_sparse_moe.experts.112.w3", "model.layers.22.block_sparse_moe.experts.113.w3", "model.layers.22.block_sparse_moe.experts.114.w3", "model.layers.22.block_sparse_moe.experts.115.w3", "model.layers.22.block_sparse_moe.experts.116.w3", "model.layers.22.block_sparse_moe.experts.117.w3", "model.layers.22.block_sparse_moe.experts.118.w3", "model.layers.22.block_sparse_moe.experts.119.w3", "model.layers.22.block_sparse_moe.experts.120.w3", "model.layers.22.block_sparse_moe.experts.121.w3", "model.layers.22.block_sparse_moe.experts.122.w3", "model.layers.22.block_sparse_moe.experts.123.w3", "model.layers.22.block_sparse_moe.experts.124.w3", "model.layers.22.block_sparse_moe.experts.125.w3", "model.layers.22.block_sparse_moe.experts.126.w3", "model.layers.22.block_sparse_moe.experts.127.w3", "model.layers.22.block_sparse_moe.experts.128.w3", "model.layers.22.block_sparse_moe.experts.129.w3", "model.layers.22.block_sparse_moe.experts.130.w3", "model.layers.22.block_sparse_moe.experts.131.w3", "model.layers.22.block_sparse_moe.experts.132.w3", "model.layers.22.block_sparse_moe.experts.133.w3", "model.layers.22.block_sparse_moe.experts.134.w3", "model.layers.22.block_sparse_moe.experts.135.w3", "model.layers.22.block_sparse_moe.experts.136.w3", "model.layers.22.block_sparse_moe.experts.137.w3", "model.layers.22.block_sparse_moe.experts.138.w3", "model.layers.22.block_sparse_moe.experts.139.w3", "model.layers.22.block_sparse_moe.experts.140.w3", "model.layers.22.block_sparse_moe.experts.141.w3", "model.layers.22.block_sparse_moe.experts.142.w3", "model.layers.22.block_sparse_moe.experts.143.w3", "model.layers.22.block_sparse_moe.experts.144.w3", "model.layers.22.block_sparse_moe.experts.145.w3", "model.layers.22.block_sparse_moe.experts.146.w3", "model.layers.22.block_sparse_moe.experts.147.w3", "model.layers.22.block_sparse_moe.experts.148.w3", "model.layers.22.block_sparse_moe.experts.149.w3", "model.layers.22.block_sparse_moe.experts.150.w3", "model.layers.22.block_sparse_moe.experts.151.w3", "model.layers.22.block_sparse_moe.experts.152.w3", "model.layers.22.block_sparse_moe.experts.153.w3", "model.layers.22.block_sparse_moe.experts.154.w3", "model.layers.22.block_sparse_moe.experts.155.w3", "model.layers.22.block_sparse_moe.experts.156.w3", "model.layers.22.block_sparse_moe.experts.157.w3", "model.layers.22.block_sparse_moe.experts.158.w3", "model.layers.22.block_sparse_moe.experts.159.w3", "model.layers.22.block_sparse_moe.experts.160.w3", "model.layers.22.block_sparse_moe.experts.161.w3", "model.layers.22.block_sparse_moe.experts.162.w3", "model.layers.22.block_sparse_moe.experts.163.w3", "model.layers.22.block_sparse_moe.experts.164.w3", "model.layers.22.block_sparse_moe.experts.165.w3", "model.layers.22.block_sparse_moe.experts.166.w3", "model.layers.22.block_sparse_moe.experts.167.w3", "model.layers.22.block_sparse_moe.experts.168.w3", "model.layers.22.block_sparse_moe.experts.169.w3", "model.layers.22.block_sparse_moe.experts.170.w3", "model.layers.22.block_sparse_moe.experts.171.w3", "model.layers.22.block_sparse_moe.experts.172.w3", "model.layers.22.block_sparse_moe.experts.173.w3", "model.layers.22.block_sparse_moe.experts.174.w3", "model.layers.22.block_sparse_moe.experts.175.w3", "model.layers.22.block_sparse_moe.experts.176.w3", "model.layers.22.block_sparse_moe.experts.177.w3", "model.layers.22.block_sparse_moe.experts.178.w3", "model.layers.22.block_sparse_moe.experts.179.w3", "model.layers.22.block_sparse_moe.experts.180.w3", "model.layers.22.block_sparse_moe.experts.181.w3", "model.layers.22.block_sparse_moe.experts.182.w3", "model.layers.22.block_sparse_moe.experts.183.w3", "model.layers.22.block_sparse_moe.experts.184.w3", "model.layers.22.block_sparse_moe.experts.185.w3", "model.layers.22.block_sparse_moe.experts.186.w3", "model.layers.22.block_sparse_moe.experts.187.w3", "model.layers.22.block_sparse_moe.experts.188.w3", "model.layers.22.block_sparse_moe.experts.189.w3", "model.layers.22.block_sparse_moe.experts.190.w3", "model.layers.22.block_sparse_moe.experts.191.w3", "model.layers.22.block_sparse_moe.experts.192.w3", "model.layers.22.block_sparse_moe.experts.193.w3", "model.layers.22.block_sparse_moe.experts.194.w3", "model.layers.22.block_sparse_moe.experts.195.w3", "model.layers.22.block_sparse_moe.experts.196.w3", "model.layers.22.block_sparse_moe.experts.197.w3", "model.layers.22.block_sparse_moe.experts.198.w3", "model.layers.22.block_sparse_moe.experts.199.w3", "model.layers.22.block_sparse_moe.experts.200.w3", "model.layers.22.block_sparse_moe.experts.201.w3", "model.layers.22.block_sparse_moe.experts.202.w3", "model.layers.22.block_sparse_moe.experts.203.w3", "model.layers.22.block_sparse_moe.experts.204.w3", "model.layers.22.block_sparse_moe.experts.205.w3", "model.layers.22.block_sparse_moe.experts.206.w3", "model.layers.22.block_sparse_moe.experts.207.w3", "model.layers.22.block_sparse_moe.experts.208.w3", "model.layers.22.block_sparse_moe.experts.209.w3", "model.layers.22.block_sparse_moe.experts.210.w3", "model.layers.22.block_sparse_moe.experts.211.w3", "model.layers.22.block_sparse_moe.experts.212.w3", "model.layers.22.block_sparse_moe.experts.213.w3", "model.layers.22.block_sparse_moe.experts.214.w3", "model.layers.22.block_sparse_moe.experts.215.w3", "model.layers.22.block_sparse_moe.experts.216.w3", "model.layers.22.block_sparse_moe.experts.217.w3", "model.layers.22.block_sparse_moe.experts.218.w3", "model.layers.22.block_sparse_moe.experts.219.w3", "model.layers.22.block_sparse_moe.experts.220.w3", "model.layers.22.block_sparse_moe.experts.221.w3", "model.layers.22.block_sparse_moe.experts.222.w3", "model.layers.22.block_sparse_moe.experts.223.w3", "model.layers.22.block_sparse_moe.experts.224.w3", "model.layers.22.block_sparse_moe.experts.225.w3", "model.layers.22.block_sparse_moe.experts.226.w3", "model.layers.22.block_sparse_moe.experts.227.w3", "model.layers.22.block_sparse_moe.experts.228.w3", "model.layers.22.block_sparse_moe.experts.229.w3", "model.layers.22.block_sparse_moe.experts.230.w3", "model.layers.22.block_sparse_moe.experts.231.w3", "model.layers.22.block_sparse_moe.experts.232.w3", "model.layers.22.block_sparse_moe.experts.233.w3", "model.layers.22.block_sparse_moe.experts.234.w3", "model.layers.22.block_sparse_moe.experts.235.w3", "model.layers.22.block_sparse_moe.experts.236.w3", "model.layers.22.block_sparse_moe.experts.237.w3", "model.layers.22.block_sparse_moe.experts.238.w3", "model.layers.22.block_sparse_moe.experts.239.w3", "model.layers.22.block_sparse_moe.experts.240.w3", "model.layers.22.block_sparse_moe.experts.241.w3", "model.layers.22.block_sparse_moe.experts.242.w3", "model.layers.22.block_sparse_moe.experts.243.w3", "model.layers.22.block_sparse_moe.experts.244.w3", "model.layers.22.block_sparse_moe.experts.245.w3", "model.layers.22.block_sparse_moe.experts.246.w3", "model.layers.22.block_sparse_moe.experts.247.w3", "model.layers.22.block_sparse_moe.experts.248.w3", "model.layers.22.block_sparse_moe.experts.249.w3", "model.layers.22.block_sparse_moe.experts.250.w3", "model.layers.22.block_sparse_moe.experts.251.w3", "model.layers.22.block_sparse_moe.experts.252.w3", "model.layers.22.block_sparse_moe.experts.253.w3", "model.layers.22.block_sparse_moe.experts.254.w3", "model.layers.22.block_sparse_moe.experts.255.w3", "model.layers.22.block_sparse_moe.experts.0.w2", "model.layers.22.block_sparse_moe.experts.1.w2", "model.layers.22.block_sparse_moe.experts.2.w2", "model.layers.22.block_sparse_moe.experts.3.w2", "model.layers.22.block_sparse_moe.experts.4.w2", "model.layers.22.block_sparse_moe.experts.5.w2", "model.layers.22.block_sparse_moe.experts.6.w2", "model.layers.22.block_sparse_moe.experts.7.w2", "model.layers.22.block_sparse_moe.experts.8.w2", "model.layers.22.block_sparse_moe.experts.9.w2", "model.layers.22.block_sparse_moe.experts.10.w2", "model.layers.22.block_sparse_moe.experts.11.w2", "model.layers.22.block_sparse_moe.experts.12.w2", "model.layers.22.block_sparse_moe.experts.13.w2", "model.layers.22.block_sparse_moe.experts.14.w2", "model.layers.22.block_sparse_moe.experts.15.w2", "model.layers.22.block_sparse_moe.experts.16.w2", "model.layers.22.block_sparse_moe.experts.17.w2", "model.layers.22.block_sparse_moe.experts.18.w2", "model.layers.22.block_sparse_moe.experts.19.w2", "model.layers.22.block_sparse_moe.experts.20.w2", "model.layers.22.block_sparse_moe.experts.21.w2", "model.layers.22.block_sparse_moe.experts.22.w2", "model.layers.22.block_sparse_moe.experts.23.w2", "model.layers.22.block_sparse_moe.experts.24.w2", "model.layers.22.block_sparse_moe.experts.25.w2", "model.layers.22.block_sparse_moe.experts.26.w2", "model.layers.22.block_sparse_moe.experts.27.w2", "model.layers.22.block_sparse_moe.experts.28.w2", "model.layers.22.block_sparse_moe.experts.29.w2", "model.layers.22.block_sparse_moe.experts.30.w2", "model.layers.22.block_sparse_moe.experts.31.w2", "model.layers.22.block_sparse_moe.experts.32.w2", "model.layers.22.block_sparse_moe.experts.33.w2", "model.layers.22.block_sparse_moe.experts.34.w2", "model.layers.22.block_sparse_moe.experts.35.w2", "model.layers.22.block_sparse_moe.experts.36.w2", "model.layers.22.block_sparse_moe.experts.37.w2", "model.layers.22.block_sparse_moe.experts.38.w2", "model.layers.22.block_sparse_moe.experts.39.w2", "model.layers.22.block_sparse_moe.experts.40.w2", "model.layers.22.block_sparse_moe.experts.41.w2", "model.layers.22.block_sparse_moe.experts.42.w2", "model.layers.22.block_sparse_moe.experts.43.w2", "model.layers.22.block_sparse_moe.experts.44.w2", "model.layers.22.block_sparse_moe.experts.45.w2", "model.layers.22.block_sparse_moe.experts.46.w2", "model.layers.22.block_sparse_moe.experts.47.w2", "model.layers.22.block_sparse_moe.experts.48.w2", "model.layers.22.block_sparse_moe.experts.49.w2", "model.layers.22.block_sparse_moe.experts.50.w2", "model.layers.22.block_sparse_moe.experts.51.w2", "model.layers.22.block_sparse_moe.experts.52.w2", "model.layers.22.block_sparse_moe.experts.53.w2", "model.layers.22.block_sparse_moe.experts.54.w2", "model.layers.22.block_sparse_moe.experts.55.w2", "model.layers.22.block_sparse_moe.experts.56.w2", "model.layers.22.block_sparse_moe.experts.57.w2", "model.layers.22.block_sparse_moe.experts.58.w2", "model.layers.22.block_sparse_moe.experts.59.w2", "model.layers.22.block_sparse_moe.experts.60.w2", "model.layers.22.block_sparse_moe.experts.61.w2", "model.layers.22.block_sparse_moe.experts.62.w2", "model.layers.22.block_sparse_moe.experts.63.w2", "model.layers.22.block_sparse_moe.experts.64.w2", "model.layers.22.block_sparse_moe.experts.65.w2", "model.layers.22.block_sparse_moe.experts.66.w2", "model.layers.22.block_sparse_moe.experts.67.w2", "model.layers.22.block_sparse_moe.experts.68.w2", "model.layers.22.block_sparse_moe.experts.69.w2", "model.layers.22.block_sparse_moe.experts.70.w2", "model.layers.22.block_sparse_moe.experts.71.w2", "model.layers.22.block_sparse_moe.experts.72.w2", "model.layers.22.block_sparse_moe.experts.73.w2", "model.layers.22.block_sparse_moe.experts.74.w2", "model.layers.22.block_sparse_moe.experts.75.w2", "model.layers.22.block_sparse_moe.experts.76.w2", "model.layers.22.block_sparse_moe.experts.77.w2", "model.layers.22.block_sparse_moe.experts.78.w2", "model.layers.22.block_sparse_moe.experts.79.w2", "model.layers.22.block_sparse_moe.experts.80.w2", "model.layers.22.block_sparse_moe.experts.81.w2", "model.layers.22.block_sparse_moe.experts.82.w2", "model.layers.22.block_sparse_moe.experts.83.w2", "model.layers.22.block_sparse_moe.experts.84.w2", "model.layers.22.block_sparse_moe.experts.85.w2", "model.layers.22.block_sparse_moe.experts.86.w2", "model.layers.22.block_sparse_moe.experts.87.w2", "model.layers.22.block_sparse_moe.experts.88.w2", "model.layers.22.block_sparse_moe.experts.89.w2", "model.layers.22.block_sparse_moe.experts.90.w2", "model.layers.22.block_sparse_moe.experts.91.w2", "model.layers.22.block_sparse_moe.experts.92.w2", "model.layers.22.block_sparse_moe.experts.93.w2", "model.layers.22.block_sparse_moe.experts.94.w2", "model.layers.22.block_sparse_moe.experts.95.w2", "model.layers.22.block_sparse_moe.experts.96.w2", "model.layers.22.block_sparse_moe.experts.97.w2", "model.layers.22.block_sparse_moe.experts.98.w2", "model.layers.22.block_sparse_moe.experts.99.w2", "model.layers.22.block_sparse_moe.experts.100.w2", "model.layers.22.block_sparse_moe.experts.101.w2", "model.layers.22.block_sparse_moe.experts.102.w2", "model.layers.22.block_sparse_moe.experts.103.w2", "model.layers.22.block_sparse_moe.experts.104.w2", "model.layers.22.block_sparse_moe.experts.105.w2", "model.layers.22.block_sparse_moe.experts.106.w2", "model.layers.22.block_sparse_moe.experts.107.w2", "model.layers.22.block_sparse_moe.experts.108.w2", "model.layers.22.block_sparse_moe.experts.109.w2", "model.layers.22.block_sparse_moe.experts.110.w2", "model.layers.22.block_sparse_moe.experts.111.w2", "model.layers.22.block_sparse_moe.experts.112.w2", "model.layers.22.block_sparse_moe.experts.113.w2", "model.layers.22.block_sparse_moe.experts.114.w2", "model.layers.22.block_sparse_moe.experts.115.w2", "model.layers.22.block_sparse_moe.experts.116.w2", "model.layers.22.block_sparse_moe.experts.117.w2", "model.layers.22.block_sparse_moe.experts.118.w2", "model.layers.22.block_sparse_moe.experts.119.w2", "model.layers.22.block_sparse_moe.experts.120.w2", "model.layers.22.block_sparse_moe.experts.121.w2", "model.layers.22.block_sparse_moe.experts.122.w2", "model.layers.22.block_sparse_moe.experts.123.w2", "model.layers.22.block_sparse_moe.experts.124.w2", "model.layers.22.block_sparse_moe.experts.125.w2", "model.layers.22.block_sparse_moe.experts.126.w2", "model.layers.22.block_sparse_moe.experts.127.w2", "model.layers.22.block_sparse_moe.experts.128.w2", "model.layers.22.block_sparse_moe.experts.129.w2", "model.layers.22.block_sparse_moe.experts.130.w2", "model.layers.22.block_sparse_moe.experts.131.w2", "model.layers.22.block_sparse_moe.experts.132.w2", "model.layers.22.block_sparse_moe.experts.133.w2", "model.layers.22.block_sparse_moe.experts.134.w2", "model.layers.22.block_sparse_moe.experts.135.w2", "model.layers.22.block_sparse_moe.experts.136.w2", "model.layers.22.block_sparse_moe.experts.137.w2", "model.layers.22.block_sparse_moe.experts.138.w2", "model.layers.22.block_sparse_moe.experts.139.w2", "model.layers.22.block_sparse_moe.experts.140.w2", "model.layers.22.block_sparse_moe.experts.141.w2", "model.layers.22.block_sparse_moe.experts.142.w2", "model.layers.22.block_sparse_moe.experts.143.w2", "model.layers.22.block_sparse_moe.experts.144.w2", "model.layers.22.block_sparse_moe.experts.145.w2", "model.layers.22.block_sparse_moe.experts.146.w2", "model.layers.22.block_sparse_moe.experts.147.w2", "model.layers.22.block_sparse_moe.experts.148.w2", "model.layers.22.block_sparse_moe.experts.149.w2", "model.layers.22.block_sparse_moe.experts.150.w2", "model.layers.22.block_sparse_moe.experts.151.w2", "model.layers.22.block_sparse_moe.experts.152.w2", "model.layers.22.block_sparse_moe.experts.153.w2", "model.layers.22.block_sparse_moe.experts.154.w2", "model.layers.22.block_sparse_moe.experts.155.w2", "model.layers.22.block_sparse_moe.experts.156.w2", "model.layers.22.block_sparse_moe.experts.157.w2", "model.layers.22.block_sparse_moe.experts.158.w2", "model.layers.22.block_sparse_moe.experts.159.w2", "model.layers.22.block_sparse_moe.experts.160.w2", "model.layers.22.block_sparse_moe.experts.161.w2", "model.layers.22.block_sparse_moe.experts.162.w2", "model.layers.22.block_sparse_moe.experts.163.w2", "model.layers.22.block_sparse_moe.experts.164.w2", "model.layers.22.block_sparse_moe.experts.165.w2", "model.layers.22.block_sparse_moe.experts.166.w2", "model.layers.22.block_sparse_moe.experts.167.w2", "model.layers.22.block_sparse_moe.experts.168.w2", "model.layers.22.block_sparse_moe.experts.169.w2", "model.layers.22.block_sparse_moe.experts.170.w2", "model.layers.22.block_sparse_moe.experts.171.w2", "model.layers.22.block_sparse_moe.experts.172.w2", "model.layers.22.block_sparse_moe.experts.173.w2", "model.layers.22.block_sparse_moe.experts.174.w2", "model.layers.22.block_sparse_moe.experts.175.w2", "model.layers.22.block_sparse_moe.experts.176.w2", "model.layers.22.block_sparse_moe.experts.177.w2", "model.layers.22.block_sparse_moe.experts.178.w2", "model.layers.22.block_sparse_moe.experts.179.w2", "model.layers.22.block_sparse_moe.experts.180.w2", "model.layers.22.block_sparse_moe.experts.181.w2", "model.layers.22.block_sparse_moe.experts.182.w2", "model.layers.22.block_sparse_moe.experts.183.w2", "model.layers.22.block_sparse_moe.experts.184.w2", "model.layers.22.block_sparse_moe.experts.185.w2", "model.layers.22.block_sparse_moe.experts.186.w2", "model.layers.22.block_sparse_moe.experts.187.w2", "model.layers.22.block_sparse_moe.experts.188.w2", "model.layers.22.block_sparse_moe.experts.189.w2", "model.layers.22.block_sparse_moe.experts.190.w2", "model.layers.22.block_sparse_moe.experts.191.w2", "model.layers.22.block_sparse_moe.experts.192.w2", "model.layers.22.block_sparse_moe.experts.193.w2", "model.layers.22.block_sparse_moe.experts.194.w2", "model.layers.22.block_sparse_moe.experts.195.w2", "model.layers.22.block_sparse_moe.experts.196.w2", "model.layers.22.block_sparse_moe.experts.197.w2", "model.layers.22.block_sparse_moe.experts.198.w2", "model.layers.22.block_sparse_moe.experts.199.w2", "model.layers.22.block_sparse_moe.experts.200.w2", "model.layers.22.block_sparse_moe.experts.201.w2", "model.layers.22.block_sparse_moe.experts.202.w2", "model.layers.22.block_sparse_moe.experts.203.w2", "model.layers.22.block_sparse_moe.experts.204.w2", "model.layers.22.block_sparse_moe.experts.205.w2", "model.layers.22.block_sparse_moe.experts.206.w2", "model.layers.22.block_sparse_moe.experts.207.w2", "model.layers.22.block_sparse_moe.experts.208.w2", "model.layers.22.block_sparse_moe.experts.209.w2", "model.layers.22.block_sparse_moe.experts.210.w2", "model.layers.22.block_sparse_moe.experts.211.w2", "model.layers.22.block_sparse_moe.experts.212.w2", "model.layers.22.block_sparse_moe.experts.213.w2", "model.layers.22.block_sparse_moe.experts.214.w2", "model.layers.22.block_sparse_moe.experts.215.w2", "model.layers.22.block_sparse_moe.experts.216.w2", "model.layers.22.block_sparse_moe.experts.217.w2", "model.layers.22.block_sparse_moe.experts.218.w2", "model.layers.22.block_sparse_moe.experts.219.w2", "model.layers.22.block_sparse_moe.experts.220.w2", "model.layers.22.block_sparse_moe.experts.221.w2", "model.layers.22.block_sparse_moe.experts.222.w2", "model.layers.22.block_sparse_moe.experts.223.w2", "model.layers.22.block_sparse_moe.experts.224.w2", "model.layers.22.block_sparse_moe.experts.225.w2", "model.layers.22.block_sparse_moe.experts.226.w2", "model.layers.22.block_sparse_moe.experts.227.w2", "model.layers.22.block_sparse_moe.experts.228.w2", "model.layers.22.block_sparse_moe.experts.229.w2", "model.layers.22.block_sparse_moe.experts.230.w2", "model.layers.22.block_sparse_moe.experts.231.w2", "model.layers.22.block_sparse_moe.experts.232.w2", "model.layers.22.block_sparse_moe.experts.233.w2", "model.layers.22.block_sparse_moe.experts.234.w2", "model.layers.22.block_sparse_moe.experts.235.w2", "model.layers.22.block_sparse_moe.experts.236.w2", "model.layers.22.block_sparse_moe.experts.237.w2", "model.layers.22.block_sparse_moe.experts.238.w2", "model.layers.22.block_sparse_moe.experts.239.w2", "model.layers.22.block_sparse_moe.experts.240.w2", "model.layers.22.block_sparse_moe.experts.241.w2", "model.layers.22.block_sparse_moe.experts.242.w2", "model.layers.22.block_sparse_moe.experts.243.w2", "model.layers.22.block_sparse_moe.experts.244.w2", "model.layers.22.block_sparse_moe.experts.245.w2", "model.layers.22.block_sparse_moe.experts.246.w2", "model.layers.22.block_sparse_moe.experts.247.w2", "model.layers.22.block_sparse_moe.experts.248.w2", "model.layers.22.block_sparse_moe.experts.249.w2", "model.layers.22.block_sparse_moe.experts.250.w2", "model.layers.22.block_sparse_moe.experts.251.w2", "model.layers.22.block_sparse_moe.experts.252.w2", "model.layers.22.block_sparse_moe.experts.253.w2", "model.layers.22.block_sparse_moe.experts.254.w2", "model.layers.22.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0013066662475466617, "dbits": 3623878656 } ] }, { "idx": 46, "layers": [ "model.layers.23.self_attn.q_proj", "model.layers.23.self_attn.k_proj", "model.layers.23.self_attn.v_proj", "model.layers.23.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004970164969563484, "dbits": 44040192 } ] }, { "idx": 47, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w1", "model.layers.23.block_sparse_moe.experts.1.w1", "model.layers.23.block_sparse_moe.experts.2.w1", "model.layers.23.block_sparse_moe.experts.3.w1", "model.layers.23.block_sparse_moe.experts.4.w1", "model.layers.23.block_sparse_moe.experts.5.w1", "model.layers.23.block_sparse_moe.experts.6.w1", "model.layers.23.block_sparse_moe.experts.7.w1", "model.layers.23.block_sparse_moe.experts.8.w1", "model.layers.23.block_sparse_moe.experts.9.w1", "model.layers.23.block_sparse_moe.experts.10.w1", "model.layers.23.block_sparse_moe.experts.11.w1", "model.layers.23.block_sparse_moe.experts.12.w1", "model.layers.23.block_sparse_moe.experts.13.w1", "model.layers.23.block_sparse_moe.experts.14.w1", "model.layers.23.block_sparse_moe.experts.15.w1", "model.layers.23.block_sparse_moe.experts.16.w1", "model.layers.23.block_sparse_moe.experts.17.w1", "model.layers.23.block_sparse_moe.experts.18.w1", "model.layers.23.block_sparse_moe.experts.19.w1", "model.layers.23.block_sparse_moe.experts.20.w1", "model.layers.23.block_sparse_moe.experts.21.w1", "model.layers.23.block_sparse_moe.experts.22.w1", "model.layers.23.block_sparse_moe.experts.23.w1", "model.layers.23.block_sparse_moe.experts.24.w1", "model.layers.23.block_sparse_moe.experts.25.w1", "model.layers.23.block_sparse_moe.experts.26.w1", "model.layers.23.block_sparse_moe.experts.27.w1", "model.layers.23.block_sparse_moe.experts.28.w1", "model.layers.23.block_sparse_moe.experts.29.w1", "model.layers.23.block_sparse_moe.experts.30.w1", "model.layers.23.block_sparse_moe.experts.31.w1", "model.layers.23.block_sparse_moe.experts.32.w1", "model.layers.23.block_sparse_moe.experts.33.w1", "model.layers.23.block_sparse_moe.experts.34.w1", "model.layers.23.block_sparse_moe.experts.35.w1", "model.layers.23.block_sparse_moe.experts.36.w1", "model.layers.23.block_sparse_moe.experts.37.w1", "model.layers.23.block_sparse_moe.experts.38.w1", "model.layers.23.block_sparse_moe.experts.39.w1", "model.layers.23.block_sparse_moe.experts.40.w1", "model.layers.23.block_sparse_moe.experts.41.w1", "model.layers.23.block_sparse_moe.experts.42.w1", "model.layers.23.block_sparse_moe.experts.43.w1", "model.layers.23.block_sparse_moe.experts.44.w1", "model.layers.23.block_sparse_moe.experts.45.w1", "model.layers.23.block_sparse_moe.experts.46.w1", "model.layers.23.block_sparse_moe.experts.47.w1", "model.layers.23.block_sparse_moe.experts.48.w1", "model.layers.23.block_sparse_moe.experts.49.w1", "model.layers.23.block_sparse_moe.experts.50.w1", "model.layers.23.block_sparse_moe.experts.51.w1", "model.layers.23.block_sparse_moe.experts.52.w1", "model.layers.23.block_sparse_moe.experts.53.w1", "model.layers.23.block_sparse_moe.experts.54.w1", "model.layers.23.block_sparse_moe.experts.55.w1", "model.layers.23.block_sparse_moe.experts.56.w1", "model.layers.23.block_sparse_moe.experts.57.w1", "model.layers.23.block_sparse_moe.experts.58.w1", "model.layers.23.block_sparse_moe.experts.59.w1", "model.layers.23.block_sparse_moe.experts.60.w1", "model.layers.23.block_sparse_moe.experts.61.w1", "model.layers.23.block_sparse_moe.experts.62.w1", "model.layers.23.block_sparse_moe.experts.63.w1", "model.layers.23.block_sparse_moe.experts.64.w1", "model.layers.23.block_sparse_moe.experts.65.w1", "model.layers.23.block_sparse_moe.experts.66.w1", "model.layers.23.block_sparse_moe.experts.67.w1", "model.layers.23.block_sparse_moe.experts.68.w1", "model.layers.23.block_sparse_moe.experts.69.w1", "model.layers.23.block_sparse_moe.experts.70.w1", "model.layers.23.block_sparse_moe.experts.71.w1", "model.layers.23.block_sparse_moe.experts.72.w1", "model.layers.23.block_sparse_moe.experts.73.w1", "model.layers.23.block_sparse_moe.experts.74.w1", "model.layers.23.block_sparse_moe.experts.75.w1", "model.layers.23.block_sparse_moe.experts.76.w1", "model.layers.23.block_sparse_moe.experts.77.w1", "model.layers.23.block_sparse_moe.experts.78.w1", "model.layers.23.block_sparse_moe.experts.79.w1", "model.layers.23.block_sparse_moe.experts.80.w1", "model.layers.23.block_sparse_moe.experts.81.w1", "model.layers.23.block_sparse_moe.experts.82.w1", "model.layers.23.block_sparse_moe.experts.83.w1", "model.layers.23.block_sparse_moe.experts.84.w1", "model.layers.23.block_sparse_moe.experts.85.w1", "model.layers.23.block_sparse_moe.experts.86.w1", "model.layers.23.block_sparse_moe.experts.87.w1", "model.layers.23.block_sparse_moe.experts.88.w1", "model.layers.23.block_sparse_moe.experts.89.w1", "model.layers.23.block_sparse_moe.experts.90.w1", "model.layers.23.block_sparse_moe.experts.91.w1", "model.layers.23.block_sparse_moe.experts.92.w1", "model.layers.23.block_sparse_moe.experts.93.w1", "model.layers.23.block_sparse_moe.experts.94.w1", "model.layers.23.block_sparse_moe.experts.95.w1", "model.layers.23.block_sparse_moe.experts.96.w1", "model.layers.23.block_sparse_moe.experts.97.w1", "model.layers.23.block_sparse_moe.experts.98.w1", "model.layers.23.block_sparse_moe.experts.99.w1", "model.layers.23.block_sparse_moe.experts.100.w1", "model.layers.23.block_sparse_moe.experts.101.w1", "model.layers.23.block_sparse_moe.experts.102.w1", "model.layers.23.block_sparse_moe.experts.103.w1", "model.layers.23.block_sparse_moe.experts.104.w1", "model.layers.23.block_sparse_moe.experts.105.w1", "model.layers.23.block_sparse_moe.experts.106.w1", "model.layers.23.block_sparse_moe.experts.107.w1", "model.layers.23.block_sparse_moe.experts.108.w1", "model.layers.23.block_sparse_moe.experts.109.w1", "model.layers.23.block_sparse_moe.experts.110.w1", "model.layers.23.block_sparse_moe.experts.111.w1", "model.layers.23.block_sparse_moe.experts.112.w1", "model.layers.23.block_sparse_moe.experts.113.w1", "model.layers.23.block_sparse_moe.experts.114.w1", "model.layers.23.block_sparse_moe.experts.115.w1", "model.layers.23.block_sparse_moe.experts.116.w1", "model.layers.23.block_sparse_moe.experts.117.w1", "model.layers.23.block_sparse_moe.experts.118.w1", "model.layers.23.block_sparse_moe.experts.119.w1", "model.layers.23.block_sparse_moe.experts.120.w1", "model.layers.23.block_sparse_moe.experts.121.w1", "model.layers.23.block_sparse_moe.experts.122.w1", "model.layers.23.block_sparse_moe.experts.123.w1", "model.layers.23.block_sparse_moe.experts.124.w1", "model.layers.23.block_sparse_moe.experts.125.w1", "model.layers.23.block_sparse_moe.experts.126.w1", "model.layers.23.block_sparse_moe.experts.127.w1", "model.layers.23.block_sparse_moe.experts.128.w1", "model.layers.23.block_sparse_moe.experts.129.w1", "model.layers.23.block_sparse_moe.experts.130.w1", "model.layers.23.block_sparse_moe.experts.131.w1", "model.layers.23.block_sparse_moe.experts.132.w1", "model.layers.23.block_sparse_moe.experts.133.w1", "model.layers.23.block_sparse_moe.experts.134.w1", "model.layers.23.block_sparse_moe.experts.135.w1", "model.layers.23.block_sparse_moe.experts.136.w1", "model.layers.23.block_sparse_moe.experts.137.w1", "model.layers.23.block_sparse_moe.experts.138.w1", "model.layers.23.block_sparse_moe.experts.139.w1", "model.layers.23.block_sparse_moe.experts.140.w1", "model.layers.23.block_sparse_moe.experts.141.w1", "model.layers.23.block_sparse_moe.experts.142.w1", "model.layers.23.block_sparse_moe.experts.143.w1", "model.layers.23.block_sparse_moe.experts.144.w1", "model.layers.23.block_sparse_moe.experts.145.w1", "model.layers.23.block_sparse_moe.experts.146.w1", "model.layers.23.block_sparse_moe.experts.147.w1", "model.layers.23.block_sparse_moe.experts.148.w1", "model.layers.23.block_sparse_moe.experts.149.w1", "model.layers.23.block_sparse_moe.experts.150.w1", "model.layers.23.block_sparse_moe.experts.151.w1", "model.layers.23.block_sparse_moe.experts.152.w1", "model.layers.23.block_sparse_moe.experts.153.w1", "model.layers.23.block_sparse_moe.experts.154.w1", "model.layers.23.block_sparse_moe.experts.155.w1", "model.layers.23.block_sparse_moe.experts.156.w1", "model.layers.23.block_sparse_moe.experts.157.w1", "model.layers.23.block_sparse_moe.experts.158.w1", "model.layers.23.block_sparse_moe.experts.159.w1", "model.layers.23.block_sparse_moe.experts.160.w1", "model.layers.23.block_sparse_moe.experts.161.w1", "model.layers.23.block_sparse_moe.experts.162.w1", "model.layers.23.block_sparse_moe.experts.163.w1", "model.layers.23.block_sparse_moe.experts.164.w1", "model.layers.23.block_sparse_moe.experts.165.w1", "model.layers.23.block_sparse_moe.experts.166.w1", "model.layers.23.block_sparse_moe.experts.167.w1", "model.layers.23.block_sparse_moe.experts.168.w1", "model.layers.23.block_sparse_moe.experts.169.w1", "model.layers.23.block_sparse_moe.experts.170.w1", "model.layers.23.block_sparse_moe.experts.171.w1", "model.layers.23.block_sparse_moe.experts.172.w1", "model.layers.23.block_sparse_moe.experts.173.w1", "model.layers.23.block_sparse_moe.experts.174.w1", "model.layers.23.block_sparse_moe.experts.175.w1", "model.layers.23.block_sparse_moe.experts.176.w1", "model.layers.23.block_sparse_moe.experts.177.w1", "model.layers.23.block_sparse_moe.experts.178.w1", "model.layers.23.block_sparse_moe.experts.179.w1", "model.layers.23.block_sparse_moe.experts.180.w1", "model.layers.23.block_sparse_moe.experts.181.w1", "model.layers.23.block_sparse_moe.experts.182.w1", "model.layers.23.block_sparse_moe.experts.183.w1", "model.layers.23.block_sparse_moe.experts.184.w1", "model.layers.23.block_sparse_moe.experts.185.w1", "model.layers.23.block_sparse_moe.experts.186.w1", "model.layers.23.block_sparse_moe.experts.187.w1", "model.layers.23.block_sparse_moe.experts.188.w1", "model.layers.23.block_sparse_moe.experts.189.w1", "model.layers.23.block_sparse_moe.experts.190.w1", "model.layers.23.block_sparse_moe.experts.191.w1", "model.layers.23.block_sparse_moe.experts.192.w1", "model.layers.23.block_sparse_moe.experts.193.w1", "model.layers.23.block_sparse_moe.experts.194.w1", "model.layers.23.block_sparse_moe.experts.195.w1", "model.layers.23.block_sparse_moe.experts.196.w1", "model.layers.23.block_sparse_moe.experts.197.w1", "model.layers.23.block_sparse_moe.experts.198.w1", "model.layers.23.block_sparse_moe.experts.199.w1", "model.layers.23.block_sparse_moe.experts.200.w1", "model.layers.23.block_sparse_moe.experts.201.w1", "model.layers.23.block_sparse_moe.experts.202.w1", "model.layers.23.block_sparse_moe.experts.203.w1", "model.layers.23.block_sparse_moe.experts.204.w1", "model.layers.23.block_sparse_moe.experts.205.w1", "model.layers.23.block_sparse_moe.experts.206.w1", "model.layers.23.block_sparse_moe.experts.207.w1", "model.layers.23.block_sparse_moe.experts.208.w1", "model.layers.23.block_sparse_moe.experts.209.w1", "model.layers.23.block_sparse_moe.experts.210.w1", "model.layers.23.block_sparse_moe.experts.211.w1", "model.layers.23.block_sparse_moe.experts.212.w1", "model.layers.23.block_sparse_moe.experts.213.w1", "model.layers.23.block_sparse_moe.experts.214.w1", "model.layers.23.block_sparse_moe.experts.215.w1", "model.layers.23.block_sparse_moe.experts.216.w1", "model.layers.23.block_sparse_moe.experts.217.w1", "model.layers.23.block_sparse_moe.experts.218.w1", "model.layers.23.block_sparse_moe.experts.219.w1", "model.layers.23.block_sparse_moe.experts.220.w1", "model.layers.23.block_sparse_moe.experts.221.w1", "model.layers.23.block_sparse_moe.experts.222.w1", "model.layers.23.block_sparse_moe.experts.223.w1", "model.layers.23.block_sparse_moe.experts.224.w1", "model.layers.23.block_sparse_moe.experts.225.w1", "model.layers.23.block_sparse_moe.experts.226.w1", "model.layers.23.block_sparse_moe.experts.227.w1", "model.layers.23.block_sparse_moe.experts.228.w1", "model.layers.23.block_sparse_moe.experts.229.w1", "model.layers.23.block_sparse_moe.experts.230.w1", "model.layers.23.block_sparse_moe.experts.231.w1", "model.layers.23.block_sparse_moe.experts.232.w1", "model.layers.23.block_sparse_moe.experts.233.w1", "model.layers.23.block_sparse_moe.experts.234.w1", "model.layers.23.block_sparse_moe.experts.235.w1", "model.layers.23.block_sparse_moe.experts.236.w1", "model.layers.23.block_sparse_moe.experts.237.w1", "model.layers.23.block_sparse_moe.experts.238.w1", "model.layers.23.block_sparse_moe.experts.239.w1", "model.layers.23.block_sparse_moe.experts.240.w1", "model.layers.23.block_sparse_moe.experts.241.w1", "model.layers.23.block_sparse_moe.experts.242.w1", "model.layers.23.block_sparse_moe.experts.243.w1", "model.layers.23.block_sparse_moe.experts.244.w1", "model.layers.23.block_sparse_moe.experts.245.w1", "model.layers.23.block_sparse_moe.experts.246.w1", "model.layers.23.block_sparse_moe.experts.247.w1", "model.layers.23.block_sparse_moe.experts.248.w1", "model.layers.23.block_sparse_moe.experts.249.w1", "model.layers.23.block_sparse_moe.experts.250.w1", "model.layers.23.block_sparse_moe.experts.251.w1", "model.layers.23.block_sparse_moe.experts.252.w1", "model.layers.23.block_sparse_moe.experts.253.w1", "model.layers.23.block_sparse_moe.experts.254.w1", "model.layers.23.block_sparse_moe.experts.255.w1", "model.layers.23.block_sparse_moe.experts.0.w3", "model.layers.23.block_sparse_moe.experts.1.w3", "model.layers.23.block_sparse_moe.experts.2.w3", "model.layers.23.block_sparse_moe.experts.3.w3", "model.layers.23.block_sparse_moe.experts.4.w3", "model.layers.23.block_sparse_moe.experts.5.w3", "model.layers.23.block_sparse_moe.experts.6.w3", "model.layers.23.block_sparse_moe.experts.7.w3", "model.layers.23.block_sparse_moe.experts.8.w3", "model.layers.23.block_sparse_moe.experts.9.w3", "model.layers.23.block_sparse_moe.experts.10.w3", "model.layers.23.block_sparse_moe.experts.11.w3", "model.layers.23.block_sparse_moe.experts.12.w3", "model.layers.23.block_sparse_moe.experts.13.w3", "model.layers.23.block_sparse_moe.experts.14.w3", "model.layers.23.block_sparse_moe.experts.15.w3", "model.layers.23.block_sparse_moe.experts.16.w3", "model.layers.23.block_sparse_moe.experts.17.w3", "model.layers.23.block_sparse_moe.experts.18.w3", "model.layers.23.block_sparse_moe.experts.19.w3", "model.layers.23.block_sparse_moe.experts.20.w3", "model.layers.23.block_sparse_moe.experts.21.w3", "model.layers.23.block_sparse_moe.experts.22.w3", "model.layers.23.block_sparse_moe.experts.23.w3", "model.layers.23.block_sparse_moe.experts.24.w3", "model.layers.23.block_sparse_moe.experts.25.w3", "model.layers.23.block_sparse_moe.experts.26.w3", "model.layers.23.block_sparse_moe.experts.27.w3", "model.layers.23.block_sparse_moe.experts.28.w3", "model.layers.23.block_sparse_moe.experts.29.w3", "model.layers.23.block_sparse_moe.experts.30.w3", "model.layers.23.block_sparse_moe.experts.31.w3", "model.layers.23.block_sparse_moe.experts.32.w3", "model.layers.23.block_sparse_moe.experts.33.w3", "model.layers.23.block_sparse_moe.experts.34.w3", "model.layers.23.block_sparse_moe.experts.35.w3", "model.layers.23.block_sparse_moe.experts.36.w3", "model.layers.23.block_sparse_moe.experts.37.w3", "model.layers.23.block_sparse_moe.experts.38.w3", "model.layers.23.block_sparse_moe.experts.39.w3", "model.layers.23.block_sparse_moe.experts.40.w3", "model.layers.23.block_sparse_moe.experts.41.w3", "model.layers.23.block_sparse_moe.experts.42.w3", "model.layers.23.block_sparse_moe.experts.43.w3", "model.layers.23.block_sparse_moe.experts.44.w3", "model.layers.23.block_sparse_moe.experts.45.w3", "model.layers.23.block_sparse_moe.experts.46.w3", "model.layers.23.block_sparse_moe.experts.47.w3", "model.layers.23.block_sparse_moe.experts.48.w3", "model.layers.23.block_sparse_moe.experts.49.w3", "model.layers.23.block_sparse_moe.experts.50.w3", "model.layers.23.block_sparse_moe.experts.51.w3", "model.layers.23.block_sparse_moe.experts.52.w3", "model.layers.23.block_sparse_moe.experts.53.w3", "model.layers.23.block_sparse_moe.experts.54.w3", "model.layers.23.block_sparse_moe.experts.55.w3", "model.layers.23.block_sparse_moe.experts.56.w3", "model.layers.23.block_sparse_moe.experts.57.w3", "model.layers.23.block_sparse_moe.experts.58.w3", "model.layers.23.block_sparse_moe.experts.59.w3", "model.layers.23.block_sparse_moe.experts.60.w3", "model.layers.23.block_sparse_moe.experts.61.w3", "model.layers.23.block_sparse_moe.experts.62.w3", "model.layers.23.block_sparse_moe.experts.63.w3", "model.layers.23.block_sparse_moe.experts.64.w3", "model.layers.23.block_sparse_moe.experts.65.w3", "model.layers.23.block_sparse_moe.experts.66.w3", "model.layers.23.block_sparse_moe.experts.67.w3", "model.layers.23.block_sparse_moe.experts.68.w3", "model.layers.23.block_sparse_moe.experts.69.w3", "model.layers.23.block_sparse_moe.experts.70.w3", "model.layers.23.block_sparse_moe.experts.71.w3", "model.layers.23.block_sparse_moe.experts.72.w3", "model.layers.23.block_sparse_moe.experts.73.w3", "model.layers.23.block_sparse_moe.experts.74.w3", "model.layers.23.block_sparse_moe.experts.75.w3", "model.layers.23.block_sparse_moe.experts.76.w3", "model.layers.23.block_sparse_moe.experts.77.w3", "model.layers.23.block_sparse_moe.experts.78.w3", "model.layers.23.block_sparse_moe.experts.79.w3", "model.layers.23.block_sparse_moe.experts.80.w3", "model.layers.23.block_sparse_moe.experts.81.w3", "model.layers.23.block_sparse_moe.experts.82.w3", "model.layers.23.block_sparse_moe.experts.83.w3", "model.layers.23.block_sparse_moe.experts.84.w3", "model.layers.23.block_sparse_moe.experts.85.w3", "model.layers.23.block_sparse_moe.experts.86.w3", "model.layers.23.block_sparse_moe.experts.87.w3", "model.layers.23.block_sparse_moe.experts.88.w3", "model.layers.23.block_sparse_moe.experts.89.w3", "model.layers.23.block_sparse_moe.experts.90.w3", "model.layers.23.block_sparse_moe.experts.91.w3", "model.layers.23.block_sparse_moe.experts.92.w3", "model.layers.23.block_sparse_moe.experts.93.w3", "model.layers.23.block_sparse_moe.experts.94.w3", "model.layers.23.block_sparse_moe.experts.95.w3", "model.layers.23.block_sparse_moe.experts.96.w3", "model.layers.23.block_sparse_moe.experts.97.w3", "model.layers.23.block_sparse_moe.experts.98.w3", "model.layers.23.block_sparse_moe.experts.99.w3", "model.layers.23.block_sparse_moe.experts.100.w3", "model.layers.23.block_sparse_moe.experts.101.w3", "model.layers.23.block_sparse_moe.experts.102.w3", "model.layers.23.block_sparse_moe.experts.103.w3", "model.layers.23.block_sparse_moe.experts.104.w3", "model.layers.23.block_sparse_moe.experts.105.w3", "model.layers.23.block_sparse_moe.experts.106.w3", "model.layers.23.block_sparse_moe.experts.107.w3", "model.layers.23.block_sparse_moe.experts.108.w3", "model.layers.23.block_sparse_moe.experts.109.w3", "model.layers.23.block_sparse_moe.experts.110.w3", "model.layers.23.block_sparse_moe.experts.111.w3", "model.layers.23.block_sparse_moe.experts.112.w3", "model.layers.23.block_sparse_moe.experts.113.w3", "model.layers.23.block_sparse_moe.experts.114.w3", "model.layers.23.block_sparse_moe.experts.115.w3", "model.layers.23.block_sparse_moe.experts.116.w3", "model.layers.23.block_sparse_moe.experts.117.w3", "model.layers.23.block_sparse_moe.experts.118.w3", "model.layers.23.block_sparse_moe.experts.119.w3", "model.layers.23.block_sparse_moe.experts.120.w3", "model.layers.23.block_sparse_moe.experts.121.w3", "model.layers.23.block_sparse_moe.experts.122.w3", "model.layers.23.block_sparse_moe.experts.123.w3", "model.layers.23.block_sparse_moe.experts.124.w3", "model.layers.23.block_sparse_moe.experts.125.w3", "model.layers.23.block_sparse_moe.experts.126.w3", "model.layers.23.block_sparse_moe.experts.127.w3", "model.layers.23.block_sparse_moe.experts.128.w3", "model.layers.23.block_sparse_moe.experts.129.w3", "model.layers.23.block_sparse_moe.experts.130.w3", "model.layers.23.block_sparse_moe.experts.131.w3", "model.layers.23.block_sparse_moe.experts.132.w3", "model.layers.23.block_sparse_moe.experts.133.w3", "model.layers.23.block_sparse_moe.experts.134.w3", "model.layers.23.block_sparse_moe.experts.135.w3", "model.layers.23.block_sparse_moe.experts.136.w3", "model.layers.23.block_sparse_moe.experts.137.w3", "model.layers.23.block_sparse_moe.experts.138.w3", "model.layers.23.block_sparse_moe.experts.139.w3", "model.layers.23.block_sparse_moe.experts.140.w3", "model.layers.23.block_sparse_moe.experts.141.w3", "model.layers.23.block_sparse_moe.experts.142.w3", "model.layers.23.block_sparse_moe.experts.143.w3", "model.layers.23.block_sparse_moe.experts.144.w3", "model.layers.23.block_sparse_moe.experts.145.w3", "model.layers.23.block_sparse_moe.experts.146.w3", "model.layers.23.block_sparse_moe.experts.147.w3", "model.layers.23.block_sparse_moe.experts.148.w3", "model.layers.23.block_sparse_moe.experts.149.w3", "model.layers.23.block_sparse_moe.experts.150.w3", "model.layers.23.block_sparse_moe.experts.151.w3", "model.layers.23.block_sparse_moe.experts.152.w3", "model.layers.23.block_sparse_moe.experts.153.w3", "model.layers.23.block_sparse_moe.experts.154.w3", "model.layers.23.block_sparse_moe.experts.155.w3", "model.layers.23.block_sparse_moe.experts.156.w3", "model.layers.23.block_sparse_moe.experts.157.w3", "model.layers.23.block_sparse_moe.experts.158.w3", "model.layers.23.block_sparse_moe.experts.159.w3", "model.layers.23.block_sparse_moe.experts.160.w3", "model.layers.23.block_sparse_moe.experts.161.w3", "model.layers.23.block_sparse_moe.experts.162.w3", "model.layers.23.block_sparse_moe.experts.163.w3", "model.layers.23.block_sparse_moe.experts.164.w3", "model.layers.23.block_sparse_moe.experts.165.w3", "model.layers.23.block_sparse_moe.experts.166.w3", "model.layers.23.block_sparse_moe.experts.167.w3", "model.layers.23.block_sparse_moe.experts.168.w3", "model.layers.23.block_sparse_moe.experts.169.w3", "model.layers.23.block_sparse_moe.experts.170.w3", "model.layers.23.block_sparse_moe.experts.171.w3", "model.layers.23.block_sparse_moe.experts.172.w3", "model.layers.23.block_sparse_moe.experts.173.w3", "model.layers.23.block_sparse_moe.experts.174.w3", "model.layers.23.block_sparse_moe.experts.175.w3", "model.layers.23.block_sparse_moe.experts.176.w3", "model.layers.23.block_sparse_moe.experts.177.w3", "model.layers.23.block_sparse_moe.experts.178.w3", "model.layers.23.block_sparse_moe.experts.179.w3", "model.layers.23.block_sparse_moe.experts.180.w3", "model.layers.23.block_sparse_moe.experts.181.w3", "model.layers.23.block_sparse_moe.experts.182.w3", "model.layers.23.block_sparse_moe.experts.183.w3", "model.layers.23.block_sparse_moe.experts.184.w3", "model.layers.23.block_sparse_moe.experts.185.w3", "model.layers.23.block_sparse_moe.experts.186.w3", "model.layers.23.block_sparse_moe.experts.187.w3", "model.layers.23.block_sparse_moe.experts.188.w3", "model.layers.23.block_sparse_moe.experts.189.w3", "model.layers.23.block_sparse_moe.experts.190.w3", "model.layers.23.block_sparse_moe.experts.191.w3", "model.layers.23.block_sparse_moe.experts.192.w3", "model.layers.23.block_sparse_moe.experts.193.w3", "model.layers.23.block_sparse_moe.experts.194.w3", "model.layers.23.block_sparse_moe.experts.195.w3", "model.layers.23.block_sparse_moe.experts.196.w3", "model.layers.23.block_sparse_moe.experts.197.w3", "model.layers.23.block_sparse_moe.experts.198.w3", "model.layers.23.block_sparse_moe.experts.199.w3", "model.layers.23.block_sparse_moe.experts.200.w3", "model.layers.23.block_sparse_moe.experts.201.w3", "model.layers.23.block_sparse_moe.experts.202.w3", "model.layers.23.block_sparse_moe.experts.203.w3", "model.layers.23.block_sparse_moe.experts.204.w3", "model.layers.23.block_sparse_moe.experts.205.w3", "model.layers.23.block_sparse_moe.experts.206.w3", "model.layers.23.block_sparse_moe.experts.207.w3", "model.layers.23.block_sparse_moe.experts.208.w3", "model.layers.23.block_sparse_moe.experts.209.w3", "model.layers.23.block_sparse_moe.experts.210.w3", "model.layers.23.block_sparse_moe.experts.211.w3", "model.layers.23.block_sparse_moe.experts.212.w3", "model.layers.23.block_sparse_moe.experts.213.w3", "model.layers.23.block_sparse_moe.experts.214.w3", "model.layers.23.block_sparse_moe.experts.215.w3", "model.layers.23.block_sparse_moe.experts.216.w3", "model.layers.23.block_sparse_moe.experts.217.w3", "model.layers.23.block_sparse_moe.experts.218.w3", "model.layers.23.block_sparse_moe.experts.219.w3", "model.layers.23.block_sparse_moe.experts.220.w3", "model.layers.23.block_sparse_moe.experts.221.w3", "model.layers.23.block_sparse_moe.experts.222.w3", "model.layers.23.block_sparse_moe.experts.223.w3", "model.layers.23.block_sparse_moe.experts.224.w3", "model.layers.23.block_sparse_moe.experts.225.w3", "model.layers.23.block_sparse_moe.experts.226.w3", "model.layers.23.block_sparse_moe.experts.227.w3", "model.layers.23.block_sparse_moe.experts.228.w3", "model.layers.23.block_sparse_moe.experts.229.w3", "model.layers.23.block_sparse_moe.experts.230.w3", "model.layers.23.block_sparse_moe.experts.231.w3", "model.layers.23.block_sparse_moe.experts.232.w3", "model.layers.23.block_sparse_moe.experts.233.w3", "model.layers.23.block_sparse_moe.experts.234.w3", "model.layers.23.block_sparse_moe.experts.235.w3", "model.layers.23.block_sparse_moe.experts.236.w3", "model.layers.23.block_sparse_moe.experts.237.w3", "model.layers.23.block_sparse_moe.experts.238.w3", "model.layers.23.block_sparse_moe.experts.239.w3", "model.layers.23.block_sparse_moe.experts.240.w3", "model.layers.23.block_sparse_moe.experts.241.w3", "model.layers.23.block_sparse_moe.experts.242.w3", "model.layers.23.block_sparse_moe.experts.243.w3", "model.layers.23.block_sparse_moe.experts.244.w3", "model.layers.23.block_sparse_moe.experts.245.w3", "model.layers.23.block_sparse_moe.experts.246.w3", "model.layers.23.block_sparse_moe.experts.247.w3", "model.layers.23.block_sparse_moe.experts.248.w3", "model.layers.23.block_sparse_moe.experts.249.w3", "model.layers.23.block_sparse_moe.experts.250.w3", "model.layers.23.block_sparse_moe.experts.251.w3", "model.layers.23.block_sparse_moe.experts.252.w3", "model.layers.23.block_sparse_moe.experts.253.w3", "model.layers.23.block_sparse_moe.experts.254.w3", "model.layers.23.block_sparse_moe.experts.255.w3", "model.layers.23.block_sparse_moe.experts.0.w2", "model.layers.23.block_sparse_moe.experts.1.w2", "model.layers.23.block_sparse_moe.experts.2.w2", "model.layers.23.block_sparse_moe.experts.3.w2", "model.layers.23.block_sparse_moe.experts.4.w2", "model.layers.23.block_sparse_moe.experts.5.w2", "model.layers.23.block_sparse_moe.experts.6.w2", "model.layers.23.block_sparse_moe.experts.7.w2", "model.layers.23.block_sparse_moe.experts.8.w2", "model.layers.23.block_sparse_moe.experts.9.w2", "model.layers.23.block_sparse_moe.experts.10.w2", "model.layers.23.block_sparse_moe.experts.11.w2", "model.layers.23.block_sparse_moe.experts.12.w2", "model.layers.23.block_sparse_moe.experts.13.w2", "model.layers.23.block_sparse_moe.experts.14.w2", "model.layers.23.block_sparse_moe.experts.15.w2", "model.layers.23.block_sparse_moe.experts.16.w2", "model.layers.23.block_sparse_moe.experts.17.w2", "model.layers.23.block_sparse_moe.experts.18.w2", "model.layers.23.block_sparse_moe.experts.19.w2", "model.layers.23.block_sparse_moe.experts.20.w2", "model.layers.23.block_sparse_moe.experts.21.w2", "model.layers.23.block_sparse_moe.experts.22.w2", "model.layers.23.block_sparse_moe.experts.23.w2", "model.layers.23.block_sparse_moe.experts.24.w2", "model.layers.23.block_sparse_moe.experts.25.w2", "model.layers.23.block_sparse_moe.experts.26.w2", "model.layers.23.block_sparse_moe.experts.27.w2", "model.layers.23.block_sparse_moe.experts.28.w2", "model.layers.23.block_sparse_moe.experts.29.w2", "model.layers.23.block_sparse_moe.experts.30.w2", "model.layers.23.block_sparse_moe.experts.31.w2", "model.layers.23.block_sparse_moe.experts.32.w2", "model.layers.23.block_sparse_moe.experts.33.w2", "model.layers.23.block_sparse_moe.experts.34.w2", "model.layers.23.block_sparse_moe.experts.35.w2", "model.layers.23.block_sparse_moe.experts.36.w2", "model.layers.23.block_sparse_moe.experts.37.w2", "model.layers.23.block_sparse_moe.experts.38.w2", "model.layers.23.block_sparse_moe.experts.39.w2", "model.layers.23.block_sparse_moe.experts.40.w2", "model.layers.23.block_sparse_moe.experts.41.w2", "model.layers.23.block_sparse_moe.experts.42.w2", "model.layers.23.block_sparse_moe.experts.43.w2", "model.layers.23.block_sparse_moe.experts.44.w2", "model.layers.23.block_sparse_moe.experts.45.w2", "model.layers.23.block_sparse_moe.experts.46.w2", "model.layers.23.block_sparse_moe.experts.47.w2", "model.layers.23.block_sparse_moe.experts.48.w2", "model.layers.23.block_sparse_moe.experts.49.w2", "model.layers.23.block_sparse_moe.experts.50.w2", "model.layers.23.block_sparse_moe.experts.51.w2", "model.layers.23.block_sparse_moe.experts.52.w2", "model.layers.23.block_sparse_moe.experts.53.w2", "model.layers.23.block_sparse_moe.experts.54.w2", "model.layers.23.block_sparse_moe.experts.55.w2", "model.layers.23.block_sparse_moe.experts.56.w2", "model.layers.23.block_sparse_moe.experts.57.w2", "model.layers.23.block_sparse_moe.experts.58.w2", "model.layers.23.block_sparse_moe.experts.59.w2", "model.layers.23.block_sparse_moe.experts.60.w2", "model.layers.23.block_sparse_moe.experts.61.w2", "model.layers.23.block_sparse_moe.experts.62.w2", "model.layers.23.block_sparse_moe.experts.63.w2", "model.layers.23.block_sparse_moe.experts.64.w2", "model.layers.23.block_sparse_moe.experts.65.w2", "model.layers.23.block_sparse_moe.experts.66.w2", "model.layers.23.block_sparse_moe.experts.67.w2", "model.layers.23.block_sparse_moe.experts.68.w2", "model.layers.23.block_sparse_moe.experts.69.w2", "model.layers.23.block_sparse_moe.experts.70.w2", "model.layers.23.block_sparse_moe.experts.71.w2", "model.layers.23.block_sparse_moe.experts.72.w2", "model.layers.23.block_sparse_moe.experts.73.w2", "model.layers.23.block_sparse_moe.experts.74.w2", "model.layers.23.block_sparse_moe.experts.75.w2", "model.layers.23.block_sparse_moe.experts.76.w2", "model.layers.23.block_sparse_moe.experts.77.w2", "model.layers.23.block_sparse_moe.experts.78.w2", "model.layers.23.block_sparse_moe.experts.79.w2", "model.layers.23.block_sparse_moe.experts.80.w2", "model.layers.23.block_sparse_moe.experts.81.w2", "model.layers.23.block_sparse_moe.experts.82.w2", "model.layers.23.block_sparse_moe.experts.83.w2", "model.layers.23.block_sparse_moe.experts.84.w2", "model.layers.23.block_sparse_moe.experts.85.w2", "model.layers.23.block_sparse_moe.experts.86.w2", "model.layers.23.block_sparse_moe.experts.87.w2", "model.layers.23.block_sparse_moe.experts.88.w2", "model.layers.23.block_sparse_moe.experts.89.w2", "model.layers.23.block_sparse_moe.experts.90.w2", "model.layers.23.block_sparse_moe.experts.91.w2", "model.layers.23.block_sparse_moe.experts.92.w2", "model.layers.23.block_sparse_moe.experts.93.w2", "model.layers.23.block_sparse_moe.experts.94.w2", "model.layers.23.block_sparse_moe.experts.95.w2", "model.layers.23.block_sparse_moe.experts.96.w2", "model.layers.23.block_sparse_moe.experts.97.w2", "model.layers.23.block_sparse_moe.experts.98.w2", "model.layers.23.block_sparse_moe.experts.99.w2", "model.layers.23.block_sparse_moe.experts.100.w2", "model.layers.23.block_sparse_moe.experts.101.w2", "model.layers.23.block_sparse_moe.experts.102.w2", "model.layers.23.block_sparse_moe.experts.103.w2", "model.layers.23.block_sparse_moe.experts.104.w2", "model.layers.23.block_sparse_moe.experts.105.w2", "model.layers.23.block_sparse_moe.experts.106.w2", "model.layers.23.block_sparse_moe.experts.107.w2", "model.layers.23.block_sparse_moe.experts.108.w2", "model.layers.23.block_sparse_moe.experts.109.w2", "model.layers.23.block_sparse_moe.experts.110.w2", "model.layers.23.block_sparse_moe.experts.111.w2", "model.layers.23.block_sparse_moe.experts.112.w2", "model.layers.23.block_sparse_moe.experts.113.w2", "model.layers.23.block_sparse_moe.experts.114.w2", "model.layers.23.block_sparse_moe.experts.115.w2", "model.layers.23.block_sparse_moe.experts.116.w2", "model.layers.23.block_sparse_moe.experts.117.w2", "model.layers.23.block_sparse_moe.experts.118.w2", "model.layers.23.block_sparse_moe.experts.119.w2", "model.layers.23.block_sparse_moe.experts.120.w2", "model.layers.23.block_sparse_moe.experts.121.w2", "model.layers.23.block_sparse_moe.experts.122.w2", "model.layers.23.block_sparse_moe.experts.123.w2", "model.layers.23.block_sparse_moe.experts.124.w2", "model.layers.23.block_sparse_moe.experts.125.w2", "model.layers.23.block_sparse_moe.experts.126.w2", "model.layers.23.block_sparse_moe.experts.127.w2", "model.layers.23.block_sparse_moe.experts.128.w2", "model.layers.23.block_sparse_moe.experts.129.w2", "model.layers.23.block_sparse_moe.experts.130.w2", "model.layers.23.block_sparse_moe.experts.131.w2", "model.layers.23.block_sparse_moe.experts.132.w2", "model.layers.23.block_sparse_moe.experts.133.w2", "model.layers.23.block_sparse_moe.experts.134.w2", "model.layers.23.block_sparse_moe.experts.135.w2", "model.layers.23.block_sparse_moe.experts.136.w2", "model.layers.23.block_sparse_moe.experts.137.w2", "model.layers.23.block_sparse_moe.experts.138.w2", "model.layers.23.block_sparse_moe.experts.139.w2", "model.layers.23.block_sparse_moe.experts.140.w2", "model.layers.23.block_sparse_moe.experts.141.w2", "model.layers.23.block_sparse_moe.experts.142.w2", "model.layers.23.block_sparse_moe.experts.143.w2", "model.layers.23.block_sparse_moe.experts.144.w2", "model.layers.23.block_sparse_moe.experts.145.w2", "model.layers.23.block_sparse_moe.experts.146.w2", "model.layers.23.block_sparse_moe.experts.147.w2", "model.layers.23.block_sparse_moe.experts.148.w2", "model.layers.23.block_sparse_moe.experts.149.w2", "model.layers.23.block_sparse_moe.experts.150.w2", "model.layers.23.block_sparse_moe.experts.151.w2", "model.layers.23.block_sparse_moe.experts.152.w2", "model.layers.23.block_sparse_moe.experts.153.w2", "model.layers.23.block_sparse_moe.experts.154.w2", "model.layers.23.block_sparse_moe.experts.155.w2", "model.layers.23.block_sparse_moe.experts.156.w2", "model.layers.23.block_sparse_moe.experts.157.w2", "model.layers.23.block_sparse_moe.experts.158.w2", "model.layers.23.block_sparse_moe.experts.159.w2", "model.layers.23.block_sparse_moe.experts.160.w2", "model.layers.23.block_sparse_moe.experts.161.w2", "model.layers.23.block_sparse_moe.experts.162.w2", "model.layers.23.block_sparse_moe.experts.163.w2", "model.layers.23.block_sparse_moe.experts.164.w2", "model.layers.23.block_sparse_moe.experts.165.w2", "model.layers.23.block_sparse_moe.experts.166.w2", "model.layers.23.block_sparse_moe.experts.167.w2", "model.layers.23.block_sparse_moe.experts.168.w2", "model.layers.23.block_sparse_moe.experts.169.w2", "model.layers.23.block_sparse_moe.experts.170.w2", "model.layers.23.block_sparse_moe.experts.171.w2", "model.layers.23.block_sparse_moe.experts.172.w2", "model.layers.23.block_sparse_moe.experts.173.w2", "model.layers.23.block_sparse_moe.experts.174.w2", "model.layers.23.block_sparse_moe.experts.175.w2", "model.layers.23.block_sparse_moe.experts.176.w2", "model.layers.23.block_sparse_moe.experts.177.w2", "model.layers.23.block_sparse_moe.experts.178.w2", "model.layers.23.block_sparse_moe.experts.179.w2", "model.layers.23.block_sparse_moe.experts.180.w2", "model.layers.23.block_sparse_moe.experts.181.w2", "model.layers.23.block_sparse_moe.experts.182.w2", "model.layers.23.block_sparse_moe.experts.183.w2", "model.layers.23.block_sparse_moe.experts.184.w2", "model.layers.23.block_sparse_moe.experts.185.w2", "model.layers.23.block_sparse_moe.experts.186.w2", "model.layers.23.block_sparse_moe.experts.187.w2", "model.layers.23.block_sparse_moe.experts.188.w2", "model.layers.23.block_sparse_moe.experts.189.w2", "model.layers.23.block_sparse_moe.experts.190.w2", "model.layers.23.block_sparse_moe.experts.191.w2", "model.layers.23.block_sparse_moe.experts.192.w2", "model.layers.23.block_sparse_moe.experts.193.w2", "model.layers.23.block_sparse_moe.experts.194.w2", "model.layers.23.block_sparse_moe.experts.195.w2", "model.layers.23.block_sparse_moe.experts.196.w2", "model.layers.23.block_sparse_moe.experts.197.w2", "model.layers.23.block_sparse_moe.experts.198.w2", "model.layers.23.block_sparse_moe.experts.199.w2", "model.layers.23.block_sparse_moe.experts.200.w2", "model.layers.23.block_sparse_moe.experts.201.w2", "model.layers.23.block_sparse_moe.experts.202.w2", "model.layers.23.block_sparse_moe.experts.203.w2", "model.layers.23.block_sparse_moe.experts.204.w2", "model.layers.23.block_sparse_moe.experts.205.w2", "model.layers.23.block_sparse_moe.experts.206.w2", "model.layers.23.block_sparse_moe.experts.207.w2", "model.layers.23.block_sparse_moe.experts.208.w2", "model.layers.23.block_sparse_moe.experts.209.w2", "model.layers.23.block_sparse_moe.experts.210.w2", "model.layers.23.block_sparse_moe.experts.211.w2", "model.layers.23.block_sparse_moe.experts.212.w2", "model.layers.23.block_sparse_moe.experts.213.w2", "model.layers.23.block_sparse_moe.experts.214.w2", "model.layers.23.block_sparse_moe.experts.215.w2", "model.layers.23.block_sparse_moe.experts.216.w2", "model.layers.23.block_sparse_moe.experts.217.w2", "model.layers.23.block_sparse_moe.experts.218.w2", "model.layers.23.block_sparse_moe.experts.219.w2", "model.layers.23.block_sparse_moe.experts.220.w2", "model.layers.23.block_sparse_moe.experts.221.w2", "model.layers.23.block_sparse_moe.experts.222.w2", "model.layers.23.block_sparse_moe.experts.223.w2", "model.layers.23.block_sparse_moe.experts.224.w2", "model.layers.23.block_sparse_moe.experts.225.w2", "model.layers.23.block_sparse_moe.experts.226.w2", "model.layers.23.block_sparse_moe.experts.227.w2", "model.layers.23.block_sparse_moe.experts.228.w2", "model.layers.23.block_sparse_moe.experts.229.w2", "model.layers.23.block_sparse_moe.experts.230.w2", "model.layers.23.block_sparse_moe.experts.231.w2", "model.layers.23.block_sparse_moe.experts.232.w2", "model.layers.23.block_sparse_moe.experts.233.w2", "model.layers.23.block_sparse_moe.experts.234.w2", "model.layers.23.block_sparse_moe.experts.235.w2", "model.layers.23.block_sparse_moe.experts.236.w2", "model.layers.23.block_sparse_moe.experts.237.w2", "model.layers.23.block_sparse_moe.experts.238.w2", "model.layers.23.block_sparse_moe.experts.239.w2", "model.layers.23.block_sparse_moe.experts.240.w2", "model.layers.23.block_sparse_moe.experts.241.w2", "model.layers.23.block_sparse_moe.experts.242.w2", "model.layers.23.block_sparse_moe.experts.243.w2", "model.layers.23.block_sparse_moe.experts.244.w2", "model.layers.23.block_sparse_moe.experts.245.w2", "model.layers.23.block_sparse_moe.experts.246.w2", "model.layers.23.block_sparse_moe.experts.247.w2", "model.layers.23.block_sparse_moe.experts.248.w2", "model.layers.23.block_sparse_moe.experts.249.w2", "model.layers.23.block_sparse_moe.experts.250.w2", "model.layers.23.block_sparse_moe.experts.251.w2", "model.layers.23.block_sparse_moe.experts.252.w2", "model.layers.23.block_sparse_moe.experts.253.w2", "model.layers.23.block_sparse_moe.experts.254.w2", "model.layers.23.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0010076535865664593, "dbits": 3623878656 } ] }, { "idx": 48, "layers": [ "model.layers.24.self_attn.q_proj", "model.layers.24.self_attn.k_proj", "model.layers.24.self_attn.v_proj", "model.layers.24.self_attn.o_proj" ], "candidates": [ { "dkld": 0.008848878927528891, "dbits": 44040192 } ] }, { "idx": 49, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w1", "model.layers.24.block_sparse_moe.experts.1.w1", "model.layers.24.block_sparse_moe.experts.2.w1", "model.layers.24.block_sparse_moe.experts.3.w1", "model.layers.24.block_sparse_moe.experts.4.w1", "model.layers.24.block_sparse_moe.experts.5.w1", "model.layers.24.block_sparse_moe.experts.6.w1", "model.layers.24.block_sparse_moe.experts.7.w1", "model.layers.24.block_sparse_moe.experts.8.w1", "model.layers.24.block_sparse_moe.experts.9.w1", "model.layers.24.block_sparse_moe.experts.10.w1", "model.layers.24.block_sparse_moe.experts.11.w1", "model.layers.24.block_sparse_moe.experts.12.w1", "model.layers.24.block_sparse_moe.experts.13.w1", "model.layers.24.block_sparse_moe.experts.14.w1", "model.layers.24.block_sparse_moe.experts.15.w1", "model.layers.24.block_sparse_moe.experts.16.w1", "model.layers.24.block_sparse_moe.experts.17.w1", "model.layers.24.block_sparse_moe.experts.18.w1", "model.layers.24.block_sparse_moe.experts.19.w1", "model.layers.24.block_sparse_moe.experts.20.w1", "model.layers.24.block_sparse_moe.experts.21.w1", "model.layers.24.block_sparse_moe.experts.22.w1", "model.layers.24.block_sparse_moe.experts.23.w1", "model.layers.24.block_sparse_moe.experts.24.w1", "model.layers.24.block_sparse_moe.experts.25.w1", "model.layers.24.block_sparse_moe.experts.26.w1", "model.layers.24.block_sparse_moe.experts.27.w1", "model.layers.24.block_sparse_moe.experts.28.w1", "model.layers.24.block_sparse_moe.experts.29.w1", "model.layers.24.block_sparse_moe.experts.30.w1", "model.layers.24.block_sparse_moe.experts.31.w1", "model.layers.24.block_sparse_moe.experts.32.w1", "model.layers.24.block_sparse_moe.experts.33.w1", "model.layers.24.block_sparse_moe.experts.34.w1", "model.layers.24.block_sparse_moe.experts.35.w1", "model.layers.24.block_sparse_moe.experts.36.w1", "model.layers.24.block_sparse_moe.experts.37.w1", "model.layers.24.block_sparse_moe.experts.38.w1", "model.layers.24.block_sparse_moe.experts.39.w1", "model.layers.24.block_sparse_moe.experts.40.w1", "model.layers.24.block_sparse_moe.experts.41.w1", "model.layers.24.block_sparse_moe.experts.42.w1", "model.layers.24.block_sparse_moe.experts.43.w1", "model.layers.24.block_sparse_moe.experts.44.w1", "model.layers.24.block_sparse_moe.experts.45.w1", "model.layers.24.block_sparse_moe.experts.46.w1", "model.layers.24.block_sparse_moe.experts.47.w1", "model.layers.24.block_sparse_moe.experts.48.w1", "model.layers.24.block_sparse_moe.experts.49.w1", "model.layers.24.block_sparse_moe.experts.50.w1", "model.layers.24.block_sparse_moe.experts.51.w1", "model.layers.24.block_sparse_moe.experts.52.w1", "model.layers.24.block_sparse_moe.experts.53.w1", "model.layers.24.block_sparse_moe.experts.54.w1", "model.layers.24.block_sparse_moe.experts.55.w1", "model.layers.24.block_sparse_moe.experts.56.w1", "model.layers.24.block_sparse_moe.experts.57.w1", "model.layers.24.block_sparse_moe.experts.58.w1", "model.layers.24.block_sparse_moe.experts.59.w1", "model.layers.24.block_sparse_moe.experts.60.w1", "model.layers.24.block_sparse_moe.experts.61.w1", "model.layers.24.block_sparse_moe.experts.62.w1", "model.layers.24.block_sparse_moe.experts.63.w1", "model.layers.24.block_sparse_moe.experts.64.w1", "model.layers.24.block_sparse_moe.experts.65.w1", "model.layers.24.block_sparse_moe.experts.66.w1", "model.layers.24.block_sparse_moe.experts.67.w1", "model.layers.24.block_sparse_moe.experts.68.w1", "model.layers.24.block_sparse_moe.experts.69.w1", "model.layers.24.block_sparse_moe.experts.70.w1", "model.layers.24.block_sparse_moe.experts.71.w1", "model.layers.24.block_sparse_moe.experts.72.w1", "model.layers.24.block_sparse_moe.experts.73.w1", "model.layers.24.block_sparse_moe.experts.74.w1", "model.layers.24.block_sparse_moe.experts.75.w1", "model.layers.24.block_sparse_moe.experts.76.w1", "model.layers.24.block_sparse_moe.experts.77.w1", "model.layers.24.block_sparse_moe.experts.78.w1", "model.layers.24.block_sparse_moe.experts.79.w1", "model.layers.24.block_sparse_moe.experts.80.w1", "model.layers.24.block_sparse_moe.experts.81.w1", "model.layers.24.block_sparse_moe.experts.82.w1", "model.layers.24.block_sparse_moe.experts.83.w1", "model.layers.24.block_sparse_moe.experts.84.w1", "model.layers.24.block_sparse_moe.experts.85.w1", "model.layers.24.block_sparse_moe.experts.86.w1", "model.layers.24.block_sparse_moe.experts.87.w1", "model.layers.24.block_sparse_moe.experts.88.w1", "model.layers.24.block_sparse_moe.experts.89.w1", "model.layers.24.block_sparse_moe.experts.90.w1", "model.layers.24.block_sparse_moe.experts.91.w1", "model.layers.24.block_sparse_moe.experts.92.w1", "model.layers.24.block_sparse_moe.experts.93.w1", "model.layers.24.block_sparse_moe.experts.94.w1", "model.layers.24.block_sparse_moe.experts.95.w1", "model.layers.24.block_sparse_moe.experts.96.w1", "model.layers.24.block_sparse_moe.experts.97.w1", "model.layers.24.block_sparse_moe.experts.98.w1", "model.layers.24.block_sparse_moe.experts.99.w1", "model.layers.24.block_sparse_moe.experts.100.w1", "model.layers.24.block_sparse_moe.experts.101.w1", "model.layers.24.block_sparse_moe.experts.102.w1", "model.layers.24.block_sparse_moe.experts.103.w1", "model.layers.24.block_sparse_moe.experts.104.w1", "model.layers.24.block_sparse_moe.experts.105.w1", "model.layers.24.block_sparse_moe.experts.106.w1", "model.layers.24.block_sparse_moe.experts.107.w1", "model.layers.24.block_sparse_moe.experts.108.w1", "model.layers.24.block_sparse_moe.experts.109.w1", "model.layers.24.block_sparse_moe.experts.110.w1", "model.layers.24.block_sparse_moe.experts.111.w1", "model.layers.24.block_sparse_moe.experts.112.w1", "model.layers.24.block_sparse_moe.experts.113.w1", "model.layers.24.block_sparse_moe.experts.114.w1", "model.layers.24.block_sparse_moe.experts.115.w1", "model.layers.24.block_sparse_moe.experts.116.w1", "model.layers.24.block_sparse_moe.experts.117.w1", "model.layers.24.block_sparse_moe.experts.118.w1", "model.layers.24.block_sparse_moe.experts.119.w1", "model.layers.24.block_sparse_moe.experts.120.w1", "model.layers.24.block_sparse_moe.experts.121.w1", "model.layers.24.block_sparse_moe.experts.122.w1", "model.layers.24.block_sparse_moe.experts.123.w1", "model.layers.24.block_sparse_moe.experts.124.w1", "model.layers.24.block_sparse_moe.experts.125.w1", "model.layers.24.block_sparse_moe.experts.126.w1", "model.layers.24.block_sparse_moe.experts.127.w1", "model.layers.24.block_sparse_moe.experts.128.w1", "model.layers.24.block_sparse_moe.experts.129.w1", "model.layers.24.block_sparse_moe.experts.130.w1", "model.layers.24.block_sparse_moe.experts.131.w1", "model.layers.24.block_sparse_moe.experts.132.w1", "model.layers.24.block_sparse_moe.experts.133.w1", "model.layers.24.block_sparse_moe.experts.134.w1", "model.layers.24.block_sparse_moe.experts.135.w1", "model.layers.24.block_sparse_moe.experts.136.w1", "model.layers.24.block_sparse_moe.experts.137.w1", "model.layers.24.block_sparse_moe.experts.138.w1", "model.layers.24.block_sparse_moe.experts.139.w1", "model.layers.24.block_sparse_moe.experts.140.w1", "model.layers.24.block_sparse_moe.experts.141.w1", "model.layers.24.block_sparse_moe.experts.142.w1", "model.layers.24.block_sparse_moe.experts.143.w1", "model.layers.24.block_sparse_moe.experts.144.w1", "model.layers.24.block_sparse_moe.experts.145.w1", "model.layers.24.block_sparse_moe.experts.146.w1", "model.layers.24.block_sparse_moe.experts.147.w1", "model.layers.24.block_sparse_moe.experts.148.w1", "model.layers.24.block_sparse_moe.experts.149.w1", "model.layers.24.block_sparse_moe.experts.150.w1", "model.layers.24.block_sparse_moe.experts.151.w1", "model.layers.24.block_sparse_moe.experts.152.w1", "model.layers.24.block_sparse_moe.experts.153.w1", "model.layers.24.block_sparse_moe.experts.154.w1", "model.layers.24.block_sparse_moe.experts.155.w1", "model.layers.24.block_sparse_moe.experts.156.w1", "model.layers.24.block_sparse_moe.experts.157.w1", "model.layers.24.block_sparse_moe.experts.158.w1", "model.layers.24.block_sparse_moe.experts.159.w1", "model.layers.24.block_sparse_moe.experts.160.w1", "model.layers.24.block_sparse_moe.experts.161.w1", "model.layers.24.block_sparse_moe.experts.162.w1", "model.layers.24.block_sparse_moe.experts.163.w1", "model.layers.24.block_sparse_moe.experts.164.w1", "model.layers.24.block_sparse_moe.experts.165.w1", "model.layers.24.block_sparse_moe.experts.166.w1", "model.layers.24.block_sparse_moe.experts.167.w1", "model.layers.24.block_sparse_moe.experts.168.w1", "model.layers.24.block_sparse_moe.experts.169.w1", "model.layers.24.block_sparse_moe.experts.170.w1", "model.layers.24.block_sparse_moe.experts.171.w1", "model.layers.24.block_sparse_moe.experts.172.w1", "model.layers.24.block_sparse_moe.experts.173.w1", "model.layers.24.block_sparse_moe.experts.174.w1", "model.layers.24.block_sparse_moe.experts.175.w1", "model.layers.24.block_sparse_moe.experts.176.w1", "model.layers.24.block_sparse_moe.experts.177.w1", "model.layers.24.block_sparse_moe.experts.178.w1", "model.layers.24.block_sparse_moe.experts.179.w1", "model.layers.24.block_sparse_moe.experts.180.w1", "model.layers.24.block_sparse_moe.experts.181.w1", "model.layers.24.block_sparse_moe.experts.182.w1", "model.layers.24.block_sparse_moe.experts.183.w1", "model.layers.24.block_sparse_moe.experts.184.w1", "model.layers.24.block_sparse_moe.experts.185.w1", "model.layers.24.block_sparse_moe.experts.186.w1", "model.layers.24.block_sparse_moe.experts.187.w1", "model.layers.24.block_sparse_moe.experts.188.w1", "model.layers.24.block_sparse_moe.experts.189.w1", "model.layers.24.block_sparse_moe.experts.190.w1", "model.layers.24.block_sparse_moe.experts.191.w1", "model.layers.24.block_sparse_moe.experts.192.w1", "model.layers.24.block_sparse_moe.experts.193.w1", "model.layers.24.block_sparse_moe.experts.194.w1", "model.layers.24.block_sparse_moe.experts.195.w1", "model.layers.24.block_sparse_moe.experts.196.w1", "model.layers.24.block_sparse_moe.experts.197.w1", "model.layers.24.block_sparse_moe.experts.198.w1", "model.layers.24.block_sparse_moe.experts.199.w1", "model.layers.24.block_sparse_moe.experts.200.w1", "model.layers.24.block_sparse_moe.experts.201.w1", "model.layers.24.block_sparse_moe.experts.202.w1", "model.layers.24.block_sparse_moe.experts.203.w1", "model.layers.24.block_sparse_moe.experts.204.w1", "model.layers.24.block_sparse_moe.experts.205.w1", "model.layers.24.block_sparse_moe.experts.206.w1", "model.layers.24.block_sparse_moe.experts.207.w1", "model.layers.24.block_sparse_moe.experts.208.w1", "model.layers.24.block_sparse_moe.experts.209.w1", "model.layers.24.block_sparse_moe.experts.210.w1", "model.layers.24.block_sparse_moe.experts.211.w1", "model.layers.24.block_sparse_moe.experts.212.w1", "model.layers.24.block_sparse_moe.experts.213.w1", "model.layers.24.block_sparse_moe.experts.214.w1", "model.layers.24.block_sparse_moe.experts.215.w1", "model.layers.24.block_sparse_moe.experts.216.w1", "model.layers.24.block_sparse_moe.experts.217.w1", "model.layers.24.block_sparse_moe.experts.218.w1", "model.layers.24.block_sparse_moe.experts.219.w1", "model.layers.24.block_sparse_moe.experts.220.w1", "model.layers.24.block_sparse_moe.experts.221.w1", "model.layers.24.block_sparse_moe.experts.222.w1", "model.layers.24.block_sparse_moe.experts.223.w1", "model.layers.24.block_sparse_moe.experts.224.w1", "model.layers.24.block_sparse_moe.experts.225.w1", "model.layers.24.block_sparse_moe.experts.226.w1", "model.layers.24.block_sparse_moe.experts.227.w1", "model.layers.24.block_sparse_moe.experts.228.w1", "model.layers.24.block_sparse_moe.experts.229.w1", "model.layers.24.block_sparse_moe.experts.230.w1", "model.layers.24.block_sparse_moe.experts.231.w1", "model.layers.24.block_sparse_moe.experts.232.w1", "model.layers.24.block_sparse_moe.experts.233.w1", "model.layers.24.block_sparse_moe.experts.234.w1", "model.layers.24.block_sparse_moe.experts.235.w1", "model.layers.24.block_sparse_moe.experts.236.w1", "model.layers.24.block_sparse_moe.experts.237.w1", "model.layers.24.block_sparse_moe.experts.238.w1", "model.layers.24.block_sparse_moe.experts.239.w1", "model.layers.24.block_sparse_moe.experts.240.w1", "model.layers.24.block_sparse_moe.experts.241.w1", "model.layers.24.block_sparse_moe.experts.242.w1", "model.layers.24.block_sparse_moe.experts.243.w1", "model.layers.24.block_sparse_moe.experts.244.w1", "model.layers.24.block_sparse_moe.experts.245.w1", "model.layers.24.block_sparse_moe.experts.246.w1", "model.layers.24.block_sparse_moe.experts.247.w1", "model.layers.24.block_sparse_moe.experts.248.w1", "model.layers.24.block_sparse_moe.experts.249.w1", "model.layers.24.block_sparse_moe.experts.250.w1", "model.layers.24.block_sparse_moe.experts.251.w1", "model.layers.24.block_sparse_moe.experts.252.w1", "model.layers.24.block_sparse_moe.experts.253.w1", "model.layers.24.block_sparse_moe.experts.254.w1", "model.layers.24.block_sparse_moe.experts.255.w1", "model.layers.24.block_sparse_moe.experts.0.w3", "model.layers.24.block_sparse_moe.experts.1.w3", "model.layers.24.block_sparse_moe.experts.2.w3", "model.layers.24.block_sparse_moe.experts.3.w3", "model.layers.24.block_sparse_moe.experts.4.w3", "model.layers.24.block_sparse_moe.experts.5.w3", "model.layers.24.block_sparse_moe.experts.6.w3", "model.layers.24.block_sparse_moe.experts.7.w3", "model.layers.24.block_sparse_moe.experts.8.w3", "model.layers.24.block_sparse_moe.experts.9.w3", "model.layers.24.block_sparse_moe.experts.10.w3", "model.layers.24.block_sparse_moe.experts.11.w3", "model.layers.24.block_sparse_moe.experts.12.w3", "model.layers.24.block_sparse_moe.experts.13.w3", "model.layers.24.block_sparse_moe.experts.14.w3", "model.layers.24.block_sparse_moe.experts.15.w3", "model.layers.24.block_sparse_moe.experts.16.w3", "model.layers.24.block_sparse_moe.experts.17.w3", "model.layers.24.block_sparse_moe.experts.18.w3", "model.layers.24.block_sparse_moe.experts.19.w3", "model.layers.24.block_sparse_moe.experts.20.w3", "model.layers.24.block_sparse_moe.experts.21.w3", "model.layers.24.block_sparse_moe.experts.22.w3", "model.layers.24.block_sparse_moe.experts.23.w3", "model.layers.24.block_sparse_moe.experts.24.w3", "model.layers.24.block_sparse_moe.experts.25.w3", "model.layers.24.block_sparse_moe.experts.26.w3", "model.layers.24.block_sparse_moe.experts.27.w3", "model.layers.24.block_sparse_moe.experts.28.w3", "model.layers.24.block_sparse_moe.experts.29.w3", "model.layers.24.block_sparse_moe.experts.30.w3", "model.layers.24.block_sparse_moe.experts.31.w3", "model.layers.24.block_sparse_moe.experts.32.w3", "model.layers.24.block_sparse_moe.experts.33.w3", "model.layers.24.block_sparse_moe.experts.34.w3", "model.layers.24.block_sparse_moe.experts.35.w3", "model.layers.24.block_sparse_moe.experts.36.w3", "model.layers.24.block_sparse_moe.experts.37.w3", "model.layers.24.block_sparse_moe.experts.38.w3", "model.layers.24.block_sparse_moe.experts.39.w3", "model.layers.24.block_sparse_moe.experts.40.w3", "model.layers.24.block_sparse_moe.experts.41.w3", "model.layers.24.block_sparse_moe.experts.42.w3", "model.layers.24.block_sparse_moe.experts.43.w3", "model.layers.24.block_sparse_moe.experts.44.w3", "model.layers.24.block_sparse_moe.experts.45.w3", "model.layers.24.block_sparse_moe.experts.46.w3", "model.layers.24.block_sparse_moe.experts.47.w3", "model.layers.24.block_sparse_moe.experts.48.w3", "model.layers.24.block_sparse_moe.experts.49.w3", "model.layers.24.block_sparse_moe.experts.50.w3", "model.layers.24.block_sparse_moe.experts.51.w3", "model.layers.24.block_sparse_moe.experts.52.w3", "model.layers.24.block_sparse_moe.experts.53.w3", "model.layers.24.block_sparse_moe.experts.54.w3", "model.layers.24.block_sparse_moe.experts.55.w3", "model.layers.24.block_sparse_moe.experts.56.w3", "model.layers.24.block_sparse_moe.experts.57.w3", "model.layers.24.block_sparse_moe.experts.58.w3", "model.layers.24.block_sparse_moe.experts.59.w3", "model.layers.24.block_sparse_moe.experts.60.w3", "model.layers.24.block_sparse_moe.experts.61.w3", "model.layers.24.block_sparse_moe.experts.62.w3", "model.layers.24.block_sparse_moe.experts.63.w3", "model.layers.24.block_sparse_moe.experts.64.w3", "model.layers.24.block_sparse_moe.experts.65.w3", "model.layers.24.block_sparse_moe.experts.66.w3", "model.layers.24.block_sparse_moe.experts.67.w3", "model.layers.24.block_sparse_moe.experts.68.w3", "model.layers.24.block_sparse_moe.experts.69.w3", "model.layers.24.block_sparse_moe.experts.70.w3", "model.layers.24.block_sparse_moe.experts.71.w3", "model.layers.24.block_sparse_moe.experts.72.w3", "model.layers.24.block_sparse_moe.experts.73.w3", "model.layers.24.block_sparse_moe.experts.74.w3", "model.layers.24.block_sparse_moe.experts.75.w3", "model.layers.24.block_sparse_moe.experts.76.w3", "model.layers.24.block_sparse_moe.experts.77.w3", "model.layers.24.block_sparse_moe.experts.78.w3", "model.layers.24.block_sparse_moe.experts.79.w3", "model.layers.24.block_sparse_moe.experts.80.w3", "model.layers.24.block_sparse_moe.experts.81.w3", "model.layers.24.block_sparse_moe.experts.82.w3", "model.layers.24.block_sparse_moe.experts.83.w3", "model.layers.24.block_sparse_moe.experts.84.w3", "model.layers.24.block_sparse_moe.experts.85.w3", "model.layers.24.block_sparse_moe.experts.86.w3", "model.layers.24.block_sparse_moe.experts.87.w3", "model.layers.24.block_sparse_moe.experts.88.w3", "model.layers.24.block_sparse_moe.experts.89.w3", "model.layers.24.block_sparse_moe.experts.90.w3", "model.layers.24.block_sparse_moe.experts.91.w3", "model.layers.24.block_sparse_moe.experts.92.w3", "model.layers.24.block_sparse_moe.experts.93.w3", "model.layers.24.block_sparse_moe.experts.94.w3", "model.layers.24.block_sparse_moe.experts.95.w3", "model.layers.24.block_sparse_moe.experts.96.w3", "model.layers.24.block_sparse_moe.experts.97.w3", "model.layers.24.block_sparse_moe.experts.98.w3", "model.layers.24.block_sparse_moe.experts.99.w3", "model.layers.24.block_sparse_moe.experts.100.w3", "model.layers.24.block_sparse_moe.experts.101.w3", "model.layers.24.block_sparse_moe.experts.102.w3", "model.layers.24.block_sparse_moe.experts.103.w3", "model.layers.24.block_sparse_moe.experts.104.w3", "model.layers.24.block_sparse_moe.experts.105.w3", "model.layers.24.block_sparse_moe.experts.106.w3", "model.layers.24.block_sparse_moe.experts.107.w3", "model.layers.24.block_sparse_moe.experts.108.w3", "model.layers.24.block_sparse_moe.experts.109.w3", "model.layers.24.block_sparse_moe.experts.110.w3", "model.layers.24.block_sparse_moe.experts.111.w3", "model.layers.24.block_sparse_moe.experts.112.w3", "model.layers.24.block_sparse_moe.experts.113.w3", "model.layers.24.block_sparse_moe.experts.114.w3", "model.layers.24.block_sparse_moe.experts.115.w3", "model.layers.24.block_sparse_moe.experts.116.w3", "model.layers.24.block_sparse_moe.experts.117.w3", "model.layers.24.block_sparse_moe.experts.118.w3", "model.layers.24.block_sparse_moe.experts.119.w3", "model.layers.24.block_sparse_moe.experts.120.w3", "model.layers.24.block_sparse_moe.experts.121.w3", "model.layers.24.block_sparse_moe.experts.122.w3", "model.layers.24.block_sparse_moe.experts.123.w3", "model.layers.24.block_sparse_moe.experts.124.w3", "model.layers.24.block_sparse_moe.experts.125.w3", "model.layers.24.block_sparse_moe.experts.126.w3", "model.layers.24.block_sparse_moe.experts.127.w3", "model.layers.24.block_sparse_moe.experts.128.w3", "model.layers.24.block_sparse_moe.experts.129.w3", "model.layers.24.block_sparse_moe.experts.130.w3", "model.layers.24.block_sparse_moe.experts.131.w3", "model.layers.24.block_sparse_moe.experts.132.w3", "model.layers.24.block_sparse_moe.experts.133.w3", "model.layers.24.block_sparse_moe.experts.134.w3", "model.layers.24.block_sparse_moe.experts.135.w3", "model.layers.24.block_sparse_moe.experts.136.w3", "model.layers.24.block_sparse_moe.experts.137.w3", "model.layers.24.block_sparse_moe.experts.138.w3", "model.layers.24.block_sparse_moe.experts.139.w3", "model.layers.24.block_sparse_moe.experts.140.w3", "model.layers.24.block_sparse_moe.experts.141.w3", "model.layers.24.block_sparse_moe.experts.142.w3", "model.layers.24.block_sparse_moe.experts.143.w3", "model.layers.24.block_sparse_moe.experts.144.w3", "model.layers.24.block_sparse_moe.experts.145.w3", "model.layers.24.block_sparse_moe.experts.146.w3", "model.layers.24.block_sparse_moe.experts.147.w3", "model.layers.24.block_sparse_moe.experts.148.w3", "model.layers.24.block_sparse_moe.experts.149.w3", "model.layers.24.block_sparse_moe.experts.150.w3", "model.layers.24.block_sparse_moe.experts.151.w3", "model.layers.24.block_sparse_moe.experts.152.w3", "model.layers.24.block_sparse_moe.experts.153.w3", "model.layers.24.block_sparse_moe.experts.154.w3", "model.layers.24.block_sparse_moe.experts.155.w3", "model.layers.24.block_sparse_moe.experts.156.w3", "model.layers.24.block_sparse_moe.experts.157.w3", "model.layers.24.block_sparse_moe.experts.158.w3", "model.layers.24.block_sparse_moe.experts.159.w3", "model.layers.24.block_sparse_moe.experts.160.w3", "model.layers.24.block_sparse_moe.experts.161.w3", "model.layers.24.block_sparse_moe.experts.162.w3", "model.layers.24.block_sparse_moe.experts.163.w3", "model.layers.24.block_sparse_moe.experts.164.w3", "model.layers.24.block_sparse_moe.experts.165.w3", "model.layers.24.block_sparse_moe.experts.166.w3", "model.layers.24.block_sparse_moe.experts.167.w3", "model.layers.24.block_sparse_moe.experts.168.w3", "model.layers.24.block_sparse_moe.experts.169.w3", "model.layers.24.block_sparse_moe.experts.170.w3", "model.layers.24.block_sparse_moe.experts.171.w3", "model.layers.24.block_sparse_moe.experts.172.w3", "model.layers.24.block_sparse_moe.experts.173.w3", "model.layers.24.block_sparse_moe.experts.174.w3", "model.layers.24.block_sparse_moe.experts.175.w3", "model.layers.24.block_sparse_moe.experts.176.w3", "model.layers.24.block_sparse_moe.experts.177.w3", "model.layers.24.block_sparse_moe.experts.178.w3", "model.layers.24.block_sparse_moe.experts.179.w3", "model.layers.24.block_sparse_moe.experts.180.w3", "model.layers.24.block_sparse_moe.experts.181.w3", "model.layers.24.block_sparse_moe.experts.182.w3", "model.layers.24.block_sparse_moe.experts.183.w3", "model.layers.24.block_sparse_moe.experts.184.w3", "model.layers.24.block_sparse_moe.experts.185.w3", "model.layers.24.block_sparse_moe.experts.186.w3", "model.layers.24.block_sparse_moe.experts.187.w3", "model.layers.24.block_sparse_moe.experts.188.w3", "model.layers.24.block_sparse_moe.experts.189.w3", "model.layers.24.block_sparse_moe.experts.190.w3", "model.layers.24.block_sparse_moe.experts.191.w3", "model.layers.24.block_sparse_moe.experts.192.w3", "model.layers.24.block_sparse_moe.experts.193.w3", "model.layers.24.block_sparse_moe.experts.194.w3", "model.layers.24.block_sparse_moe.experts.195.w3", "model.layers.24.block_sparse_moe.experts.196.w3", "model.layers.24.block_sparse_moe.experts.197.w3", "model.layers.24.block_sparse_moe.experts.198.w3", "model.layers.24.block_sparse_moe.experts.199.w3", "model.layers.24.block_sparse_moe.experts.200.w3", "model.layers.24.block_sparse_moe.experts.201.w3", "model.layers.24.block_sparse_moe.experts.202.w3", "model.layers.24.block_sparse_moe.experts.203.w3", "model.layers.24.block_sparse_moe.experts.204.w3", "model.layers.24.block_sparse_moe.experts.205.w3", "model.layers.24.block_sparse_moe.experts.206.w3", "model.layers.24.block_sparse_moe.experts.207.w3", "model.layers.24.block_sparse_moe.experts.208.w3", "model.layers.24.block_sparse_moe.experts.209.w3", "model.layers.24.block_sparse_moe.experts.210.w3", "model.layers.24.block_sparse_moe.experts.211.w3", "model.layers.24.block_sparse_moe.experts.212.w3", "model.layers.24.block_sparse_moe.experts.213.w3", "model.layers.24.block_sparse_moe.experts.214.w3", "model.layers.24.block_sparse_moe.experts.215.w3", "model.layers.24.block_sparse_moe.experts.216.w3", "model.layers.24.block_sparse_moe.experts.217.w3", "model.layers.24.block_sparse_moe.experts.218.w3", "model.layers.24.block_sparse_moe.experts.219.w3", "model.layers.24.block_sparse_moe.experts.220.w3", "model.layers.24.block_sparse_moe.experts.221.w3", "model.layers.24.block_sparse_moe.experts.222.w3", "model.layers.24.block_sparse_moe.experts.223.w3", "model.layers.24.block_sparse_moe.experts.224.w3", "model.layers.24.block_sparse_moe.experts.225.w3", "model.layers.24.block_sparse_moe.experts.226.w3", "model.layers.24.block_sparse_moe.experts.227.w3", "model.layers.24.block_sparse_moe.experts.228.w3", "model.layers.24.block_sparse_moe.experts.229.w3", "model.layers.24.block_sparse_moe.experts.230.w3", "model.layers.24.block_sparse_moe.experts.231.w3", "model.layers.24.block_sparse_moe.experts.232.w3", "model.layers.24.block_sparse_moe.experts.233.w3", "model.layers.24.block_sparse_moe.experts.234.w3", "model.layers.24.block_sparse_moe.experts.235.w3", "model.layers.24.block_sparse_moe.experts.236.w3", "model.layers.24.block_sparse_moe.experts.237.w3", "model.layers.24.block_sparse_moe.experts.238.w3", "model.layers.24.block_sparse_moe.experts.239.w3", "model.layers.24.block_sparse_moe.experts.240.w3", "model.layers.24.block_sparse_moe.experts.241.w3", "model.layers.24.block_sparse_moe.experts.242.w3", "model.layers.24.block_sparse_moe.experts.243.w3", "model.layers.24.block_sparse_moe.experts.244.w3", "model.layers.24.block_sparse_moe.experts.245.w3", "model.layers.24.block_sparse_moe.experts.246.w3", "model.layers.24.block_sparse_moe.experts.247.w3", "model.layers.24.block_sparse_moe.experts.248.w3", "model.layers.24.block_sparse_moe.experts.249.w3", "model.layers.24.block_sparse_moe.experts.250.w3", "model.layers.24.block_sparse_moe.experts.251.w3", "model.layers.24.block_sparse_moe.experts.252.w3", "model.layers.24.block_sparse_moe.experts.253.w3", "model.layers.24.block_sparse_moe.experts.254.w3", "model.layers.24.block_sparse_moe.experts.255.w3", "model.layers.24.block_sparse_moe.experts.0.w2", "model.layers.24.block_sparse_moe.experts.1.w2", "model.layers.24.block_sparse_moe.experts.2.w2", "model.layers.24.block_sparse_moe.experts.3.w2", "model.layers.24.block_sparse_moe.experts.4.w2", "model.layers.24.block_sparse_moe.experts.5.w2", "model.layers.24.block_sparse_moe.experts.6.w2", "model.layers.24.block_sparse_moe.experts.7.w2", "model.layers.24.block_sparse_moe.experts.8.w2", "model.layers.24.block_sparse_moe.experts.9.w2", "model.layers.24.block_sparse_moe.experts.10.w2", "model.layers.24.block_sparse_moe.experts.11.w2", "model.layers.24.block_sparse_moe.experts.12.w2", "model.layers.24.block_sparse_moe.experts.13.w2", "model.layers.24.block_sparse_moe.experts.14.w2", "model.layers.24.block_sparse_moe.experts.15.w2", "model.layers.24.block_sparse_moe.experts.16.w2", "model.layers.24.block_sparse_moe.experts.17.w2", "model.layers.24.block_sparse_moe.experts.18.w2", "model.layers.24.block_sparse_moe.experts.19.w2", "model.layers.24.block_sparse_moe.experts.20.w2", "model.layers.24.block_sparse_moe.experts.21.w2", "model.layers.24.block_sparse_moe.experts.22.w2", "model.layers.24.block_sparse_moe.experts.23.w2", "model.layers.24.block_sparse_moe.experts.24.w2", "model.layers.24.block_sparse_moe.experts.25.w2", "model.layers.24.block_sparse_moe.experts.26.w2", "model.layers.24.block_sparse_moe.experts.27.w2", "model.layers.24.block_sparse_moe.experts.28.w2", "model.layers.24.block_sparse_moe.experts.29.w2", "model.layers.24.block_sparse_moe.experts.30.w2", "model.layers.24.block_sparse_moe.experts.31.w2", "model.layers.24.block_sparse_moe.experts.32.w2", "model.layers.24.block_sparse_moe.experts.33.w2", "model.layers.24.block_sparse_moe.experts.34.w2", "model.layers.24.block_sparse_moe.experts.35.w2", "model.layers.24.block_sparse_moe.experts.36.w2", "model.layers.24.block_sparse_moe.experts.37.w2", "model.layers.24.block_sparse_moe.experts.38.w2", "model.layers.24.block_sparse_moe.experts.39.w2", "model.layers.24.block_sparse_moe.experts.40.w2", "model.layers.24.block_sparse_moe.experts.41.w2", "model.layers.24.block_sparse_moe.experts.42.w2", "model.layers.24.block_sparse_moe.experts.43.w2", "model.layers.24.block_sparse_moe.experts.44.w2", "model.layers.24.block_sparse_moe.experts.45.w2", "model.layers.24.block_sparse_moe.experts.46.w2", "model.layers.24.block_sparse_moe.experts.47.w2", "model.layers.24.block_sparse_moe.experts.48.w2", "model.layers.24.block_sparse_moe.experts.49.w2", "model.layers.24.block_sparse_moe.experts.50.w2", "model.layers.24.block_sparse_moe.experts.51.w2", "model.layers.24.block_sparse_moe.experts.52.w2", "model.layers.24.block_sparse_moe.experts.53.w2", "model.layers.24.block_sparse_moe.experts.54.w2", "model.layers.24.block_sparse_moe.experts.55.w2", "model.layers.24.block_sparse_moe.experts.56.w2", "model.layers.24.block_sparse_moe.experts.57.w2", "model.layers.24.block_sparse_moe.experts.58.w2", "model.layers.24.block_sparse_moe.experts.59.w2", "model.layers.24.block_sparse_moe.experts.60.w2", "model.layers.24.block_sparse_moe.experts.61.w2", "model.layers.24.block_sparse_moe.experts.62.w2", "model.layers.24.block_sparse_moe.experts.63.w2", "model.layers.24.block_sparse_moe.experts.64.w2", "model.layers.24.block_sparse_moe.experts.65.w2", "model.layers.24.block_sparse_moe.experts.66.w2", "model.layers.24.block_sparse_moe.experts.67.w2", "model.layers.24.block_sparse_moe.experts.68.w2", "model.layers.24.block_sparse_moe.experts.69.w2", "model.layers.24.block_sparse_moe.experts.70.w2", "model.layers.24.block_sparse_moe.experts.71.w2", "model.layers.24.block_sparse_moe.experts.72.w2", "model.layers.24.block_sparse_moe.experts.73.w2", "model.layers.24.block_sparse_moe.experts.74.w2", "model.layers.24.block_sparse_moe.experts.75.w2", "model.layers.24.block_sparse_moe.experts.76.w2", "model.layers.24.block_sparse_moe.experts.77.w2", "model.layers.24.block_sparse_moe.experts.78.w2", "model.layers.24.block_sparse_moe.experts.79.w2", "model.layers.24.block_sparse_moe.experts.80.w2", "model.layers.24.block_sparse_moe.experts.81.w2", "model.layers.24.block_sparse_moe.experts.82.w2", "model.layers.24.block_sparse_moe.experts.83.w2", "model.layers.24.block_sparse_moe.experts.84.w2", "model.layers.24.block_sparse_moe.experts.85.w2", "model.layers.24.block_sparse_moe.experts.86.w2", "model.layers.24.block_sparse_moe.experts.87.w2", "model.layers.24.block_sparse_moe.experts.88.w2", "model.layers.24.block_sparse_moe.experts.89.w2", "model.layers.24.block_sparse_moe.experts.90.w2", "model.layers.24.block_sparse_moe.experts.91.w2", "model.layers.24.block_sparse_moe.experts.92.w2", "model.layers.24.block_sparse_moe.experts.93.w2", "model.layers.24.block_sparse_moe.experts.94.w2", "model.layers.24.block_sparse_moe.experts.95.w2", "model.layers.24.block_sparse_moe.experts.96.w2", "model.layers.24.block_sparse_moe.experts.97.w2", "model.layers.24.block_sparse_moe.experts.98.w2", "model.layers.24.block_sparse_moe.experts.99.w2", "model.layers.24.block_sparse_moe.experts.100.w2", "model.layers.24.block_sparse_moe.experts.101.w2", "model.layers.24.block_sparse_moe.experts.102.w2", "model.layers.24.block_sparse_moe.experts.103.w2", "model.layers.24.block_sparse_moe.experts.104.w2", "model.layers.24.block_sparse_moe.experts.105.w2", "model.layers.24.block_sparse_moe.experts.106.w2", "model.layers.24.block_sparse_moe.experts.107.w2", "model.layers.24.block_sparse_moe.experts.108.w2", "model.layers.24.block_sparse_moe.experts.109.w2", "model.layers.24.block_sparse_moe.experts.110.w2", "model.layers.24.block_sparse_moe.experts.111.w2", "model.layers.24.block_sparse_moe.experts.112.w2", "model.layers.24.block_sparse_moe.experts.113.w2", "model.layers.24.block_sparse_moe.experts.114.w2", "model.layers.24.block_sparse_moe.experts.115.w2", "model.layers.24.block_sparse_moe.experts.116.w2", "model.layers.24.block_sparse_moe.experts.117.w2", "model.layers.24.block_sparse_moe.experts.118.w2", "model.layers.24.block_sparse_moe.experts.119.w2", "model.layers.24.block_sparse_moe.experts.120.w2", "model.layers.24.block_sparse_moe.experts.121.w2", "model.layers.24.block_sparse_moe.experts.122.w2", "model.layers.24.block_sparse_moe.experts.123.w2", "model.layers.24.block_sparse_moe.experts.124.w2", "model.layers.24.block_sparse_moe.experts.125.w2", "model.layers.24.block_sparse_moe.experts.126.w2", "model.layers.24.block_sparse_moe.experts.127.w2", "model.layers.24.block_sparse_moe.experts.128.w2", "model.layers.24.block_sparse_moe.experts.129.w2", "model.layers.24.block_sparse_moe.experts.130.w2", "model.layers.24.block_sparse_moe.experts.131.w2", "model.layers.24.block_sparse_moe.experts.132.w2", "model.layers.24.block_sparse_moe.experts.133.w2", "model.layers.24.block_sparse_moe.experts.134.w2", "model.layers.24.block_sparse_moe.experts.135.w2", "model.layers.24.block_sparse_moe.experts.136.w2", "model.layers.24.block_sparse_moe.experts.137.w2", "model.layers.24.block_sparse_moe.experts.138.w2", "model.layers.24.block_sparse_moe.experts.139.w2", "model.layers.24.block_sparse_moe.experts.140.w2", "model.layers.24.block_sparse_moe.experts.141.w2", "model.layers.24.block_sparse_moe.experts.142.w2", "model.layers.24.block_sparse_moe.experts.143.w2", "model.layers.24.block_sparse_moe.experts.144.w2", "model.layers.24.block_sparse_moe.experts.145.w2", "model.layers.24.block_sparse_moe.experts.146.w2", "model.layers.24.block_sparse_moe.experts.147.w2", "model.layers.24.block_sparse_moe.experts.148.w2", "model.layers.24.block_sparse_moe.experts.149.w2", "model.layers.24.block_sparse_moe.experts.150.w2", "model.layers.24.block_sparse_moe.experts.151.w2", "model.layers.24.block_sparse_moe.experts.152.w2", "model.layers.24.block_sparse_moe.experts.153.w2", "model.layers.24.block_sparse_moe.experts.154.w2", "model.layers.24.block_sparse_moe.experts.155.w2", "model.layers.24.block_sparse_moe.experts.156.w2", "model.layers.24.block_sparse_moe.experts.157.w2", "model.layers.24.block_sparse_moe.experts.158.w2", "model.layers.24.block_sparse_moe.experts.159.w2", "model.layers.24.block_sparse_moe.experts.160.w2", "model.layers.24.block_sparse_moe.experts.161.w2", "model.layers.24.block_sparse_moe.experts.162.w2", "model.layers.24.block_sparse_moe.experts.163.w2", "model.layers.24.block_sparse_moe.experts.164.w2", "model.layers.24.block_sparse_moe.experts.165.w2", "model.layers.24.block_sparse_moe.experts.166.w2", "model.layers.24.block_sparse_moe.experts.167.w2", "model.layers.24.block_sparse_moe.experts.168.w2", "model.layers.24.block_sparse_moe.experts.169.w2", "model.layers.24.block_sparse_moe.experts.170.w2", "model.layers.24.block_sparse_moe.experts.171.w2", "model.layers.24.block_sparse_moe.experts.172.w2", "model.layers.24.block_sparse_moe.experts.173.w2", "model.layers.24.block_sparse_moe.experts.174.w2", "model.layers.24.block_sparse_moe.experts.175.w2", "model.layers.24.block_sparse_moe.experts.176.w2", "model.layers.24.block_sparse_moe.experts.177.w2", "model.layers.24.block_sparse_moe.experts.178.w2", "model.layers.24.block_sparse_moe.experts.179.w2", "model.layers.24.block_sparse_moe.experts.180.w2", "model.layers.24.block_sparse_moe.experts.181.w2", "model.layers.24.block_sparse_moe.experts.182.w2", "model.layers.24.block_sparse_moe.experts.183.w2", "model.layers.24.block_sparse_moe.experts.184.w2", "model.layers.24.block_sparse_moe.experts.185.w2", "model.layers.24.block_sparse_moe.experts.186.w2", "model.layers.24.block_sparse_moe.experts.187.w2", "model.layers.24.block_sparse_moe.experts.188.w2", "model.layers.24.block_sparse_moe.experts.189.w2", "model.layers.24.block_sparse_moe.experts.190.w2", "model.layers.24.block_sparse_moe.experts.191.w2", "model.layers.24.block_sparse_moe.experts.192.w2", "model.layers.24.block_sparse_moe.experts.193.w2", "model.layers.24.block_sparse_moe.experts.194.w2", "model.layers.24.block_sparse_moe.experts.195.w2", "model.layers.24.block_sparse_moe.experts.196.w2", "model.layers.24.block_sparse_moe.experts.197.w2", "model.layers.24.block_sparse_moe.experts.198.w2", "model.layers.24.block_sparse_moe.experts.199.w2", "model.layers.24.block_sparse_moe.experts.200.w2", "model.layers.24.block_sparse_moe.experts.201.w2", "model.layers.24.block_sparse_moe.experts.202.w2", "model.layers.24.block_sparse_moe.experts.203.w2", "model.layers.24.block_sparse_moe.experts.204.w2", "model.layers.24.block_sparse_moe.experts.205.w2", "model.layers.24.block_sparse_moe.experts.206.w2", "model.layers.24.block_sparse_moe.experts.207.w2", "model.layers.24.block_sparse_moe.experts.208.w2", "model.layers.24.block_sparse_moe.experts.209.w2", "model.layers.24.block_sparse_moe.experts.210.w2", "model.layers.24.block_sparse_moe.experts.211.w2", "model.layers.24.block_sparse_moe.experts.212.w2", "model.layers.24.block_sparse_moe.experts.213.w2", "model.layers.24.block_sparse_moe.experts.214.w2", "model.layers.24.block_sparse_moe.experts.215.w2", "model.layers.24.block_sparse_moe.experts.216.w2", "model.layers.24.block_sparse_moe.experts.217.w2", "model.layers.24.block_sparse_moe.experts.218.w2", "model.layers.24.block_sparse_moe.experts.219.w2", "model.layers.24.block_sparse_moe.experts.220.w2", "model.layers.24.block_sparse_moe.experts.221.w2", "model.layers.24.block_sparse_moe.experts.222.w2", "model.layers.24.block_sparse_moe.experts.223.w2", "model.layers.24.block_sparse_moe.experts.224.w2", "model.layers.24.block_sparse_moe.experts.225.w2", "model.layers.24.block_sparse_moe.experts.226.w2", "model.layers.24.block_sparse_moe.experts.227.w2", "model.layers.24.block_sparse_moe.experts.228.w2", "model.layers.24.block_sparse_moe.experts.229.w2", "model.layers.24.block_sparse_moe.experts.230.w2", "model.layers.24.block_sparse_moe.experts.231.w2", "model.layers.24.block_sparse_moe.experts.232.w2", "model.layers.24.block_sparse_moe.experts.233.w2", "model.layers.24.block_sparse_moe.experts.234.w2", "model.layers.24.block_sparse_moe.experts.235.w2", "model.layers.24.block_sparse_moe.experts.236.w2", "model.layers.24.block_sparse_moe.experts.237.w2", "model.layers.24.block_sparse_moe.experts.238.w2", "model.layers.24.block_sparse_moe.experts.239.w2", "model.layers.24.block_sparse_moe.experts.240.w2", "model.layers.24.block_sparse_moe.experts.241.w2", "model.layers.24.block_sparse_moe.experts.242.w2", "model.layers.24.block_sparse_moe.experts.243.w2", "model.layers.24.block_sparse_moe.experts.244.w2", "model.layers.24.block_sparse_moe.experts.245.w2", "model.layers.24.block_sparse_moe.experts.246.w2", "model.layers.24.block_sparse_moe.experts.247.w2", "model.layers.24.block_sparse_moe.experts.248.w2", "model.layers.24.block_sparse_moe.experts.249.w2", "model.layers.24.block_sparse_moe.experts.250.w2", "model.layers.24.block_sparse_moe.experts.251.w2", "model.layers.24.block_sparse_moe.experts.252.w2", "model.layers.24.block_sparse_moe.experts.253.w2", "model.layers.24.block_sparse_moe.experts.254.w2", "model.layers.24.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0008822225034236797, "dbits": 3623878656 } ] }, { "idx": 50, "layers": [ "model.layers.25.self_attn.q_proj", "model.layers.25.self_attn.k_proj", "model.layers.25.self_attn.v_proj", "model.layers.25.self_attn.o_proj" ], "candidates": [ { "dkld": -0.010199673473834991, "dbits": 44040192 } ] }, { "idx": 51, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w1", "model.layers.25.block_sparse_moe.experts.1.w1", "model.layers.25.block_sparse_moe.experts.2.w1", "model.layers.25.block_sparse_moe.experts.3.w1", "model.layers.25.block_sparse_moe.experts.4.w1", "model.layers.25.block_sparse_moe.experts.5.w1", "model.layers.25.block_sparse_moe.experts.6.w1", "model.layers.25.block_sparse_moe.experts.7.w1", "model.layers.25.block_sparse_moe.experts.8.w1", "model.layers.25.block_sparse_moe.experts.9.w1", "model.layers.25.block_sparse_moe.experts.10.w1", "model.layers.25.block_sparse_moe.experts.11.w1", "model.layers.25.block_sparse_moe.experts.12.w1", "model.layers.25.block_sparse_moe.experts.13.w1", "model.layers.25.block_sparse_moe.experts.14.w1", "model.layers.25.block_sparse_moe.experts.15.w1", "model.layers.25.block_sparse_moe.experts.16.w1", "model.layers.25.block_sparse_moe.experts.17.w1", "model.layers.25.block_sparse_moe.experts.18.w1", "model.layers.25.block_sparse_moe.experts.19.w1", "model.layers.25.block_sparse_moe.experts.20.w1", "model.layers.25.block_sparse_moe.experts.21.w1", "model.layers.25.block_sparse_moe.experts.22.w1", "model.layers.25.block_sparse_moe.experts.23.w1", "model.layers.25.block_sparse_moe.experts.24.w1", "model.layers.25.block_sparse_moe.experts.25.w1", "model.layers.25.block_sparse_moe.experts.26.w1", "model.layers.25.block_sparse_moe.experts.27.w1", "model.layers.25.block_sparse_moe.experts.28.w1", "model.layers.25.block_sparse_moe.experts.29.w1", "model.layers.25.block_sparse_moe.experts.30.w1", "model.layers.25.block_sparse_moe.experts.31.w1", "model.layers.25.block_sparse_moe.experts.32.w1", "model.layers.25.block_sparse_moe.experts.33.w1", "model.layers.25.block_sparse_moe.experts.34.w1", "model.layers.25.block_sparse_moe.experts.35.w1", "model.layers.25.block_sparse_moe.experts.36.w1", "model.layers.25.block_sparse_moe.experts.37.w1", "model.layers.25.block_sparse_moe.experts.38.w1", "model.layers.25.block_sparse_moe.experts.39.w1", "model.layers.25.block_sparse_moe.experts.40.w1", "model.layers.25.block_sparse_moe.experts.41.w1", "model.layers.25.block_sparse_moe.experts.42.w1", "model.layers.25.block_sparse_moe.experts.43.w1", "model.layers.25.block_sparse_moe.experts.44.w1", "model.layers.25.block_sparse_moe.experts.45.w1", "model.layers.25.block_sparse_moe.experts.46.w1", "model.layers.25.block_sparse_moe.experts.47.w1", "model.layers.25.block_sparse_moe.experts.48.w1", "model.layers.25.block_sparse_moe.experts.49.w1", "model.layers.25.block_sparse_moe.experts.50.w1", "model.layers.25.block_sparse_moe.experts.51.w1", "model.layers.25.block_sparse_moe.experts.52.w1", "model.layers.25.block_sparse_moe.experts.53.w1", "model.layers.25.block_sparse_moe.experts.54.w1", "model.layers.25.block_sparse_moe.experts.55.w1", "model.layers.25.block_sparse_moe.experts.56.w1", "model.layers.25.block_sparse_moe.experts.57.w1", "model.layers.25.block_sparse_moe.experts.58.w1", "model.layers.25.block_sparse_moe.experts.59.w1", "model.layers.25.block_sparse_moe.experts.60.w1", "model.layers.25.block_sparse_moe.experts.61.w1", "model.layers.25.block_sparse_moe.experts.62.w1", "model.layers.25.block_sparse_moe.experts.63.w1", "model.layers.25.block_sparse_moe.experts.64.w1", "model.layers.25.block_sparse_moe.experts.65.w1", "model.layers.25.block_sparse_moe.experts.66.w1", "model.layers.25.block_sparse_moe.experts.67.w1", "model.layers.25.block_sparse_moe.experts.68.w1", "model.layers.25.block_sparse_moe.experts.69.w1", "model.layers.25.block_sparse_moe.experts.70.w1", "model.layers.25.block_sparse_moe.experts.71.w1", "model.layers.25.block_sparse_moe.experts.72.w1", "model.layers.25.block_sparse_moe.experts.73.w1", "model.layers.25.block_sparse_moe.experts.74.w1", "model.layers.25.block_sparse_moe.experts.75.w1", "model.layers.25.block_sparse_moe.experts.76.w1", "model.layers.25.block_sparse_moe.experts.77.w1", "model.layers.25.block_sparse_moe.experts.78.w1", "model.layers.25.block_sparse_moe.experts.79.w1", "model.layers.25.block_sparse_moe.experts.80.w1", "model.layers.25.block_sparse_moe.experts.81.w1", "model.layers.25.block_sparse_moe.experts.82.w1", "model.layers.25.block_sparse_moe.experts.83.w1", "model.layers.25.block_sparse_moe.experts.84.w1", "model.layers.25.block_sparse_moe.experts.85.w1", "model.layers.25.block_sparse_moe.experts.86.w1", "model.layers.25.block_sparse_moe.experts.87.w1", "model.layers.25.block_sparse_moe.experts.88.w1", "model.layers.25.block_sparse_moe.experts.89.w1", "model.layers.25.block_sparse_moe.experts.90.w1", "model.layers.25.block_sparse_moe.experts.91.w1", "model.layers.25.block_sparse_moe.experts.92.w1", "model.layers.25.block_sparse_moe.experts.93.w1", "model.layers.25.block_sparse_moe.experts.94.w1", "model.layers.25.block_sparse_moe.experts.95.w1", "model.layers.25.block_sparse_moe.experts.96.w1", "model.layers.25.block_sparse_moe.experts.97.w1", "model.layers.25.block_sparse_moe.experts.98.w1", "model.layers.25.block_sparse_moe.experts.99.w1", "model.layers.25.block_sparse_moe.experts.100.w1", "model.layers.25.block_sparse_moe.experts.101.w1", "model.layers.25.block_sparse_moe.experts.102.w1", "model.layers.25.block_sparse_moe.experts.103.w1", "model.layers.25.block_sparse_moe.experts.104.w1", "model.layers.25.block_sparse_moe.experts.105.w1", "model.layers.25.block_sparse_moe.experts.106.w1", "model.layers.25.block_sparse_moe.experts.107.w1", "model.layers.25.block_sparse_moe.experts.108.w1", "model.layers.25.block_sparse_moe.experts.109.w1", "model.layers.25.block_sparse_moe.experts.110.w1", "model.layers.25.block_sparse_moe.experts.111.w1", "model.layers.25.block_sparse_moe.experts.112.w1", "model.layers.25.block_sparse_moe.experts.113.w1", "model.layers.25.block_sparse_moe.experts.114.w1", "model.layers.25.block_sparse_moe.experts.115.w1", "model.layers.25.block_sparse_moe.experts.116.w1", "model.layers.25.block_sparse_moe.experts.117.w1", "model.layers.25.block_sparse_moe.experts.118.w1", "model.layers.25.block_sparse_moe.experts.119.w1", "model.layers.25.block_sparse_moe.experts.120.w1", "model.layers.25.block_sparse_moe.experts.121.w1", "model.layers.25.block_sparse_moe.experts.122.w1", "model.layers.25.block_sparse_moe.experts.123.w1", "model.layers.25.block_sparse_moe.experts.124.w1", "model.layers.25.block_sparse_moe.experts.125.w1", "model.layers.25.block_sparse_moe.experts.126.w1", "model.layers.25.block_sparse_moe.experts.127.w1", "model.layers.25.block_sparse_moe.experts.128.w1", "model.layers.25.block_sparse_moe.experts.129.w1", "model.layers.25.block_sparse_moe.experts.130.w1", "model.layers.25.block_sparse_moe.experts.131.w1", "model.layers.25.block_sparse_moe.experts.132.w1", "model.layers.25.block_sparse_moe.experts.133.w1", "model.layers.25.block_sparse_moe.experts.134.w1", "model.layers.25.block_sparse_moe.experts.135.w1", "model.layers.25.block_sparse_moe.experts.136.w1", "model.layers.25.block_sparse_moe.experts.137.w1", "model.layers.25.block_sparse_moe.experts.138.w1", "model.layers.25.block_sparse_moe.experts.139.w1", "model.layers.25.block_sparse_moe.experts.140.w1", "model.layers.25.block_sparse_moe.experts.141.w1", "model.layers.25.block_sparse_moe.experts.142.w1", "model.layers.25.block_sparse_moe.experts.143.w1", "model.layers.25.block_sparse_moe.experts.144.w1", "model.layers.25.block_sparse_moe.experts.145.w1", "model.layers.25.block_sparse_moe.experts.146.w1", "model.layers.25.block_sparse_moe.experts.147.w1", "model.layers.25.block_sparse_moe.experts.148.w1", "model.layers.25.block_sparse_moe.experts.149.w1", "model.layers.25.block_sparse_moe.experts.150.w1", "model.layers.25.block_sparse_moe.experts.151.w1", "model.layers.25.block_sparse_moe.experts.152.w1", "model.layers.25.block_sparse_moe.experts.153.w1", "model.layers.25.block_sparse_moe.experts.154.w1", "model.layers.25.block_sparse_moe.experts.155.w1", "model.layers.25.block_sparse_moe.experts.156.w1", "model.layers.25.block_sparse_moe.experts.157.w1", "model.layers.25.block_sparse_moe.experts.158.w1", "model.layers.25.block_sparse_moe.experts.159.w1", "model.layers.25.block_sparse_moe.experts.160.w1", "model.layers.25.block_sparse_moe.experts.161.w1", "model.layers.25.block_sparse_moe.experts.162.w1", "model.layers.25.block_sparse_moe.experts.163.w1", "model.layers.25.block_sparse_moe.experts.164.w1", "model.layers.25.block_sparse_moe.experts.165.w1", "model.layers.25.block_sparse_moe.experts.166.w1", "model.layers.25.block_sparse_moe.experts.167.w1", "model.layers.25.block_sparse_moe.experts.168.w1", "model.layers.25.block_sparse_moe.experts.169.w1", "model.layers.25.block_sparse_moe.experts.170.w1", "model.layers.25.block_sparse_moe.experts.171.w1", "model.layers.25.block_sparse_moe.experts.172.w1", "model.layers.25.block_sparse_moe.experts.173.w1", "model.layers.25.block_sparse_moe.experts.174.w1", "model.layers.25.block_sparse_moe.experts.175.w1", "model.layers.25.block_sparse_moe.experts.176.w1", "model.layers.25.block_sparse_moe.experts.177.w1", "model.layers.25.block_sparse_moe.experts.178.w1", "model.layers.25.block_sparse_moe.experts.179.w1", "model.layers.25.block_sparse_moe.experts.180.w1", "model.layers.25.block_sparse_moe.experts.181.w1", "model.layers.25.block_sparse_moe.experts.182.w1", "model.layers.25.block_sparse_moe.experts.183.w1", "model.layers.25.block_sparse_moe.experts.184.w1", "model.layers.25.block_sparse_moe.experts.185.w1", "model.layers.25.block_sparse_moe.experts.186.w1", "model.layers.25.block_sparse_moe.experts.187.w1", "model.layers.25.block_sparse_moe.experts.188.w1", "model.layers.25.block_sparse_moe.experts.189.w1", "model.layers.25.block_sparse_moe.experts.190.w1", "model.layers.25.block_sparse_moe.experts.191.w1", "model.layers.25.block_sparse_moe.experts.192.w1", "model.layers.25.block_sparse_moe.experts.193.w1", "model.layers.25.block_sparse_moe.experts.194.w1", "model.layers.25.block_sparse_moe.experts.195.w1", "model.layers.25.block_sparse_moe.experts.196.w1", "model.layers.25.block_sparse_moe.experts.197.w1", "model.layers.25.block_sparse_moe.experts.198.w1", "model.layers.25.block_sparse_moe.experts.199.w1", "model.layers.25.block_sparse_moe.experts.200.w1", "model.layers.25.block_sparse_moe.experts.201.w1", "model.layers.25.block_sparse_moe.experts.202.w1", "model.layers.25.block_sparse_moe.experts.203.w1", "model.layers.25.block_sparse_moe.experts.204.w1", "model.layers.25.block_sparse_moe.experts.205.w1", "model.layers.25.block_sparse_moe.experts.206.w1", "model.layers.25.block_sparse_moe.experts.207.w1", "model.layers.25.block_sparse_moe.experts.208.w1", "model.layers.25.block_sparse_moe.experts.209.w1", "model.layers.25.block_sparse_moe.experts.210.w1", "model.layers.25.block_sparse_moe.experts.211.w1", "model.layers.25.block_sparse_moe.experts.212.w1", "model.layers.25.block_sparse_moe.experts.213.w1", "model.layers.25.block_sparse_moe.experts.214.w1", "model.layers.25.block_sparse_moe.experts.215.w1", "model.layers.25.block_sparse_moe.experts.216.w1", "model.layers.25.block_sparse_moe.experts.217.w1", "model.layers.25.block_sparse_moe.experts.218.w1", "model.layers.25.block_sparse_moe.experts.219.w1", "model.layers.25.block_sparse_moe.experts.220.w1", "model.layers.25.block_sparse_moe.experts.221.w1", "model.layers.25.block_sparse_moe.experts.222.w1", "model.layers.25.block_sparse_moe.experts.223.w1", "model.layers.25.block_sparse_moe.experts.224.w1", "model.layers.25.block_sparse_moe.experts.225.w1", "model.layers.25.block_sparse_moe.experts.226.w1", "model.layers.25.block_sparse_moe.experts.227.w1", "model.layers.25.block_sparse_moe.experts.228.w1", "model.layers.25.block_sparse_moe.experts.229.w1", "model.layers.25.block_sparse_moe.experts.230.w1", "model.layers.25.block_sparse_moe.experts.231.w1", "model.layers.25.block_sparse_moe.experts.232.w1", "model.layers.25.block_sparse_moe.experts.233.w1", "model.layers.25.block_sparse_moe.experts.234.w1", "model.layers.25.block_sparse_moe.experts.235.w1", "model.layers.25.block_sparse_moe.experts.236.w1", "model.layers.25.block_sparse_moe.experts.237.w1", "model.layers.25.block_sparse_moe.experts.238.w1", "model.layers.25.block_sparse_moe.experts.239.w1", "model.layers.25.block_sparse_moe.experts.240.w1", "model.layers.25.block_sparse_moe.experts.241.w1", "model.layers.25.block_sparse_moe.experts.242.w1", "model.layers.25.block_sparse_moe.experts.243.w1", "model.layers.25.block_sparse_moe.experts.244.w1", "model.layers.25.block_sparse_moe.experts.245.w1", "model.layers.25.block_sparse_moe.experts.246.w1", "model.layers.25.block_sparse_moe.experts.247.w1", "model.layers.25.block_sparse_moe.experts.248.w1", "model.layers.25.block_sparse_moe.experts.249.w1", "model.layers.25.block_sparse_moe.experts.250.w1", "model.layers.25.block_sparse_moe.experts.251.w1", "model.layers.25.block_sparse_moe.experts.252.w1", "model.layers.25.block_sparse_moe.experts.253.w1", "model.layers.25.block_sparse_moe.experts.254.w1", "model.layers.25.block_sparse_moe.experts.255.w1", "model.layers.25.block_sparse_moe.experts.0.w3", "model.layers.25.block_sparse_moe.experts.1.w3", "model.layers.25.block_sparse_moe.experts.2.w3", "model.layers.25.block_sparse_moe.experts.3.w3", "model.layers.25.block_sparse_moe.experts.4.w3", "model.layers.25.block_sparse_moe.experts.5.w3", "model.layers.25.block_sparse_moe.experts.6.w3", "model.layers.25.block_sparse_moe.experts.7.w3", "model.layers.25.block_sparse_moe.experts.8.w3", "model.layers.25.block_sparse_moe.experts.9.w3", "model.layers.25.block_sparse_moe.experts.10.w3", "model.layers.25.block_sparse_moe.experts.11.w3", "model.layers.25.block_sparse_moe.experts.12.w3", "model.layers.25.block_sparse_moe.experts.13.w3", "model.layers.25.block_sparse_moe.experts.14.w3", "model.layers.25.block_sparse_moe.experts.15.w3", "model.layers.25.block_sparse_moe.experts.16.w3", "model.layers.25.block_sparse_moe.experts.17.w3", "model.layers.25.block_sparse_moe.experts.18.w3", "model.layers.25.block_sparse_moe.experts.19.w3", "model.layers.25.block_sparse_moe.experts.20.w3", "model.layers.25.block_sparse_moe.experts.21.w3", "model.layers.25.block_sparse_moe.experts.22.w3", "model.layers.25.block_sparse_moe.experts.23.w3", "model.layers.25.block_sparse_moe.experts.24.w3", "model.layers.25.block_sparse_moe.experts.25.w3", "model.layers.25.block_sparse_moe.experts.26.w3", "model.layers.25.block_sparse_moe.experts.27.w3", "model.layers.25.block_sparse_moe.experts.28.w3", "model.layers.25.block_sparse_moe.experts.29.w3", "model.layers.25.block_sparse_moe.experts.30.w3", "model.layers.25.block_sparse_moe.experts.31.w3", "model.layers.25.block_sparse_moe.experts.32.w3", "model.layers.25.block_sparse_moe.experts.33.w3", "model.layers.25.block_sparse_moe.experts.34.w3", "model.layers.25.block_sparse_moe.experts.35.w3", "model.layers.25.block_sparse_moe.experts.36.w3", "model.layers.25.block_sparse_moe.experts.37.w3", "model.layers.25.block_sparse_moe.experts.38.w3", "model.layers.25.block_sparse_moe.experts.39.w3", "model.layers.25.block_sparse_moe.experts.40.w3", "model.layers.25.block_sparse_moe.experts.41.w3", "model.layers.25.block_sparse_moe.experts.42.w3", "model.layers.25.block_sparse_moe.experts.43.w3", "model.layers.25.block_sparse_moe.experts.44.w3", "model.layers.25.block_sparse_moe.experts.45.w3", "model.layers.25.block_sparse_moe.experts.46.w3", "model.layers.25.block_sparse_moe.experts.47.w3", "model.layers.25.block_sparse_moe.experts.48.w3", "model.layers.25.block_sparse_moe.experts.49.w3", "model.layers.25.block_sparse_moe.experts.50.w3", "model.layers.25.block_sparse_moe.experts.51.w3", "model.layers.25.block_sparse_moe.experts.52.w3", "model.layers.25.block_sparse_moe.experts.53.w3", "model.layers.25.block_sparse_moe.experts.54.w3", "model.layers.25.block_sparse_moe.experts.55.w3", "model.layers.25.block_sparse_moe.experts.56.w3", "model.layers.25.block_sparse_moe.experts.57.w3", "model.layers.25.block_sparse_moe.experts.58.w3", "model.layers.25.block_sparse_moe.experts.59.w3", "model.layers.25.block_sparse_moe.experts.60.w3", "model.layers.25.block_sparse_moe.experts.61.w3", "model.layers.25.block_sparse_moe.experts.62.w3", "model.layers.25.block_sparse_moe.experts.63.w3", "model.layers.25.block_sparse_moe.experts.64.w3", "model.layers.25.block_sparse_moe.experts.65.w3", "model.layers.25.block_sparse_moe.experts.66.w3", "model.layers.25.block_sparse_moe.experts.67.w3", "model.layers.25.block_sparse_moe.experts.68.w3", "model.layers.25.block_sparse_moe.experts.69.w3", "model.layers.25.block_sparse_moe.experts.70.w3", "model.layers.25.block_sparse_moe.experts.71.w3", "model.layers.25.block_sparse_moe.experts.72.w3", "model.layers.25.block_sparse_moe.experts.73.w3", "model.layers.25.block_sparse_moe.experts.74.w3", "model.layers.25.block_sparse_moe.experts.75.w3", "model.layers.25.block_sparse_moe.experts.76.w3", "model.layers.25.block_sparse_moe.experts.77.w3", "model.layers.25.block_sparse_moe.experts.78.w3", "model.layers.25.block_sparse_moe.experts.79.w3", "model.layers.25.block_sparse_moe.experts.80.w3", "model.layers.25.block_sparse_moe.experts.81.w3", "model.layers.25.block_sparse_moe.experts.82.w3", "model.layers.25.block_sparse_moe.experts.83.w3", "model.layers.25.block_sparse_moe.experts.84.w3", "model.layers.25.block_sparse_moe.experts.85.w3", "model.layers.25.block_sparse_moe.experts.86.w3", "model.layers.25.block_sparse_moe.experts.87.w3", "model.layers.25.block_sparse_moe.experts.88.w3", "model.layers.25.block_sparse_moe.experts.89.w3", "model.layers.25.block_sparse_moe.experts.90.w3", "model.layers.25.block_sparse_moe.experts.91.w3", "model.layers.25.block_sparse_moe.experts.92.w3", "model.layers.25.block_sparse_moe.experts.93.w3", "model.layers.25.block_sparse_moe.experts.94.w3", "model.layers.25.block_sparse_moe.experts.95.w3", "model.layers.25.block_sparse_moe.experts.96.w3", "model.layers.25.block_sparse_moe.experts.97.w3", "model.layers.25.block_sparse_moe.experts.98.w3", "model.layers.25.block_sparse_moe.experts.99.w3", "model.layers.25.block_sparse_moe.experts.100.w3", "model.layers.25.block_sparse_moe.experts.101.w3", "model.layers.25.block_sparse_moe.experts.102.w3", "model.layers.25.block_sparse_moe.experts.103.w3", "model.layers.25.block_sparse_moe.experts.104.w3", "model.layers.25.block_sparse_moe.experts.105.w3", "model.layers.25.block_sparse_moe.experts.106.w3", "model.layers.25.block_sparse_moe.experts.107.w3", "model.layers.25.block_sparse_moe.experts.108.w3", "model.layers.25.block_sparse_moe.experts.109.w3", "model.layers.25.block_sparse_moe.experts.110.w3", "model.layers.25.block_sparse_moe.experts.111.w3", "model.layers.25.block_sparse_moe.experts.112.w3", "model.layers.25.block_sparse_moe.experts.113.w3", "model.layers.25.block_sparse_moe.experts.114.w3", "model.layers.25.block_sparse_moe.experts.115.w3", "model.layers.25.block_sparse_moe.experts.116.w3", "model.layers.25.block_sparse_moe.experts.117.w3", "model.layers.25.block_sparse_moe.experts.118.w3", "model.layers.25.block_sparse_moe.experts.119.w3", "model.layers.25.block_sparse_moe.experts.120.w3", "model.layers.25.block_sparse_moe.experts.121.w3", "model.layers.25.block_sparse_moe.experts.122.w3", "model.layers.25.block_sparse_moe.experts.123.w3", "model.layers.25.block_sparse_moe.experts.124.w3", "model.layers.25.block_sparse_moe.experts.125.w3", "model.layers.25.block_sparse_moe.experts.126.w3", "model.layers.25.block_sparse_moe.experts.127.w3", "model.layers.25.block_sparse_moe.experts.128.w3", "model.layers.25.block_sparse_moe.experts.129.w3", "model.layers.25.block_sparse_moe.experts.130.w3", "model.layers.25.block_sparse_moe.experts.131.w3", "model.layers.25.block_sparse_moe.experts.132.w3", "model.layers.25.block_sparse_moe.experts.133.w3", "model.layers.25.block_sparse_moe.experts.134.w3", "model.layers.25.block_sparse_moe.experts.135.w3", "model.layers.25.block_sparse_moe.experts.136.w3", "model.layers.25.block_sparse_moe.experts.137.w3", "model.layers.25.block_sparse_moe.experts.138.w3", "model.layers.25.block_sparse_moe.experts.139.w3", "model.layers.25.block_sparse_moe.experts.140.w3", "model.layers.25.block_sparse_moe.experts.141.w3", "model.layers.25.block_sparse_moe.experts.142.w3", "model.layers.25.block_sparse_moe.experts.143.w3", "model.layers.25.block_sparse_moe.experts.144.w3", "model.layers.25.block_sparse_moe.experts.145.w3", "model.layers.25.block_sparse_moe.experts.146.w3", "model.layers.25.block_sparse_moe.experts.147.w3", "model.layers.25.block_sparse_moe.experts.148.w3", "model.layers.25.block_sparse_moe.experts.149.w3", "model.layers.25.block_sparse_moe.experts.150.w3", "model.layers.25.block_sparse_moe.experts.151.w3", "model.layers.25.block_sparse_moe.experts.152.w3", "model.layers.25.block_sparse_moe.experts.153.w3", "model.layers.25.block_sparse_moe.experts.154.w3", "model.layers.25.block_sparse_moe.experts.155.w3", "model.layers.25.block_sparse_moe.experts.156.w3", "model.layers.25.block_sparse_moe.experts.157.w3", "model.layers.25.block_sparse_moe.experts.158.w3", "model.layers.25.block_sparse_moe.experts.159.w3", "model.layers.25.block_sparse_moe.experts.160.w3", "model.layers.25.block_sparse_moe.experts.161.w3", "model.layers.25.block_sparse_moe.experts.162.w3", "model.layers.25.block_sparse_moe.experts.163.w3", "model.layers.25.block_sparse_moe.experts.164.w3", "model.layers.25.block_sparse_moe.experts.165.w3", "model.layers.25.block_sparse_moe.experts.166.w3", "model.layers.25.block_sparse_moe.experts.167.w3", "model.layers.25.block_sparse_moe.experts.168.w3", "model.layers.25.block_sparse_moe.experts.169.w3", "model.layers.25.block_sparse_moe.experts.170.w3", "model.layers.25.block_sparse_moe.experts.171.w3", "model.layers.25.block_sparse_moe.experts.172.w3", "model.layers.25.block_sparse_moe.experts.173.w3", "model.layers.25.block_sparse_moe.experts.174.w3", "model.layers.25.block_sparse_moe.experts.175.w3", "model.layers.25.block_sparse_moe.experts.176.w3", "model.layers.25.block_sparse_moe.experts.177.w3", "model.layers.25.block_sparse_moe.experts.178.w3", "model.layers.25.block_sparse_moe.experts.179.w3", "model.layers.25.block_sparse_moe.experts.180.w3", "model.layers.25.block_sparse_moe.experts.181.w3", "model.layers.25.block_sparse_moe.experts.182.w3", "model.layers.25.block_sparse_moe.experts.183.w3", "model.layers.25.block_sparse_moe.experts.184.w3", "model.layers.25.block_sparse_moe.experts.185.w3", "model.layers.25.block_sparse_moe.experts.186.w3", "model.layers.25.block_sparse_moe.experts.187.w3", "model.layers.25.block_sparse_moe.experts.188.w3", "model.layers.25.block_sparse_moe.experts.189.w3", "model.layers.25.block_sparse_moe.experts.190.w3", "model.layers.25.block_sparse_moe.experts.191.w3", "model.layers.25.block_sparse_moe.experts.192.w3", "model.layers.25.block_sparse_moe.experts.193.w3", "model.layers.25.block_sparse_moe.experts.194.w3", "model.layers.25.block_sparse_moe.experts.195.w3", "model.layers.25.block_sparse_moe.experts.196.w3", "model.layers.25.block_sparse_moe.experts.197.w3", "model.layers.25.block_sparse_moe.experts.198.w3", "model.layers.25.block_sparse_moe.experts.199.w3", "model.layers.25.block_sparse_moe.experts.200.w3", "model.layers.25.block_sparse_moe.experts.201.w3", "model.layers.25.block_sparse_moe.experts.202.w3", "model.layers.25.block_sparse_moe.experts.203.w3", "model.layers.25.block_sparse_moe.experts.204.w3", "model.layers.25.block_sparse_moe.experts.205.w3", "model.layers.25.block_sparse_moe.experts.206.w3", "model.layers.25.block_sparse_moe.experts.207.w3", "model.layers.25.block_sparse_moe.experts.208.w3", "model.layers.25.block_sparse_moe.experts.209.w3", "model.layers.25.block_sparse_moe.experts.210.w3", "model.layers.25.block_sparse_moe.experts.211.w3", "model.layers.25.block_sparse_moe.experts.212.w3", "model.layers.25.block_sparse_moe.experts.213.w3", "model.layers.25.block_sparse_moe.experts.214.w3", "model.layers.25.block_sparse_moe.experts.215.w3", "model.layers.25.block_sparse_moe.experts.216.w3", "model.layers.25.block_sparse_moe.experts.217.w3", "model.layers.25.block_sparse_moe.experts.218.w3", "model.layers.25.block_sparse_moe.experts.219.w3", "model.layers.25.block_sparse_moe.experts.220.w3", "model.layers.25.block_sparse_moe.experts.221.w3", "model.layers.25.block_sparse_moe.experts.222.w3", "model.layers.25.block_sparse_moe.experts.223.w3", "model.layers.25.block_sparse_moe.experts.224.w3", "model.layers.25.block_sparse_moe.experts.225.w3", "model.layers.25.block_sparse_moe.experts.226.w3", "model.layers.25.block_sparse_moe.experts.227.w3", "model.layers.25.block_sparse_moe.experts.228.w3", "model.layers.25.block_sparse_moe.experts.229.w3", "model.layers.25.block_sparse_moe.experts.230.w3", "model.layers.25.block_sparse_moe.experts.231.w3", "model.layers.25.block_sparse_moe.experts.232.w3", "model.layers.25.block_sparse_moe.experts.233.w3", "model.layers.25.block_sparse_moe.experts.234.w3", "model.layers.25.block_sparse_moe.experts.235.w3", "model.layers.25.block_sparse_moe.experts.236.w3", "model.layers.25.block_sparse_moe.experts.237.w3", "model.layers.25.block_sparse_moe.experts.238.w3", "model.layers.25.block_sparse_moe.experts.239.w3", "model.layers.25.block_sparse_moe.experts.240.w3", "model.layers.25.block_sparse_moe.experts.241.w3", "model.layers.25.block_sparse_moe.experts.242.w3", "model.layers.25.block_sparse_moe.experts.243.w3", "model.layers.25.block_sparse_moe.experts.244.w3", "model.layers.25.block_sparse_moe.experts.245.w3", "model.layers.25.block_sparse_moe.experts.246.w3", "model.layers.25.block_sparse_moe.experts.247.w3", "model.layers.25.block_sparse_moe.experts.248.w3", "model.layers.25.block_sparse_moe.experts.249.w3", "model.layers.25.block_sparse_moe.experts.250.w3", "model.layers.25.block_sparse_moe.experts.251.w3", "model.layers.25.block_sparse_moe.experts.252.w3", "model.layers.25.block_sparse_moe.experts.253.w3", "model.layers.25.block_sparse_moe.experts.254.w3", "model.layers.25.block_sparse_moe.experts.255.w3", "model.layers.25.block_sparse_moe.experts.0.w2", "model.layers.25.block_sparse_moe.experts.1.w2", "model.layers.25.block_sparse_moe.experts.2.w2", "model.layers.25.block_sparse_moe.experts.3.w2", "model.layers.25.block_sparse_moe.experts.4.w2", "model.layers.25.block_sparse_moe.experts.5.w2", "model.layers.25.block_sparse_moe.experts.6.w2", "model.layers.25.block_sparse_moe.experts.7.w2", "model.layers.25.block_sparse_moe.experts.8.w2", "model.layers.25.block_sparse_moe.experts.9.w2", "model.layers.25.block_sparse_moe.experts.10.w2", "model.layers.25.block_sparse_moe.experts.11.w2", "model.layers.25.block_sparse_moe.experts.12.w2", "model.layers.25.block_sparse_moe.experts.13.w2", "model.layers.25.block_sparse_moe.experts.14.w2", "model.layers.25.block_sparse_moe.experts.15.w2", "model.layers.25.block_sparse_moe.experts.16.w2", "model.layers.25.block_sparse_moe.experts.17.w2", "model.layers.25.block_sparse_moe.experts.18.w2", "model.layers.25.block_sparse_moe.experts.19.w2", "model.layers.25.block_sparse_moe.experts.20.w2", "model.layers.25.block_sparse_moe.experts.21.w2", "model.layers.25.block_sparse_moe.experts.22.w2", "model.layers.25.block_sparse_moe.experts.23.w2", "model.layers.25.block_sparse_moe.experts.24.w2", "model.layers.25.block_sparse_moe.experts.25.w2", "model.layers.25.block_sparse_moe.experts.26.w2", "model.layers.25.block_sparse_moe.experts.27.w2", "model.layers.25.block_sparse_moe.experts.28.w2", "model.layers.25.block_sparse_moe.experts.29.w2", "model.layers.25.block_sparse_moe.experts.30.w2", "model.layers.25.block_sparse_moe.experts.31.w2", "model.layers.25.block_sparse_moe.experts.32.w2", "model.layers.25.block_sparse_moe.experts.33.w2", "model.layers.25.block_sparse_moe.experts.34.w2", "model.layers.25.block_sparse_moe.experts.35.w2", "model.layers.25.block_sparse_moe.experts.36.w2", "model.layers.25.block_sparse_moe.experts.37.w2", "model.layers.25.block_sparse_moe.experts.38.w2", "model.layers.25.block_sparse_moe.experts.39.w2", "model.layers.25.block_sparse_moe.experts.40.w2", "model.layers.25.block_sparse_moe.experts.41.w2", "model.layers.25.block_sparse_moe.experts.42.w2", "model.layers.25.block_sparse_moe.experts.43.w2", "model.layers.25.block_sparse_moe.experts.44.w2", "model.layers.25.block_sparse_moe.experts.45.w2", "model.layers.25.block_sparse_moe.experts.46.w2", "model.layers.25.block_sparse_moe.experts.47.w2", "model.layers.25.block_sparse_moe.experts.48.w2", "model.layers.25.block_sparse_moe.experts.49.w2", "model.layers.25.block_sparse_moe.experts.50.w2", "model.layers.25.block_sparse_moe.experts.51.w2", "model.layers.25.block_sparse_moe.experts.52.w2", "model.layers.25.block_sparse_moe.experts.53.w2", "model.layers.25.block_sparse_moe.experts.54.w2", "model.layers.25.block_sparse_moe.experts.55.w2", "model.layers.25.block_sparse_moe.experts.56.w2", "model.layers.25.block_sparse_moe.experts.57.w2", "model.layers.25.block_sparse_moe.experts.58.w2", "model.layers.25.block_sparse_moe.experts.59.w2", "model.layers.25.block_sparse_moe.experts.60.w2", "model.layers.25.block_sparse_moe.experts.61.w2", "model.layers.25.block_sparse_moe.experts.62.w2", "model.layers.25.block_sparse_moe.experts.63.w2", "model.layers.25.block_sparse_moe.experts.64.w2", "model.layers.25.block_sparse_moe.experts.65.w2", "model.layers.25.block_sparse_moe.experts.66.w2", "model.layers.25.block_sparse_moe.experts.67.w2", "model.layers.25.block_sparse_moe.experts.68.w2", "model.layers.25.block_sparse_moe.experts.69.w2", "model.layers.25.block_sparse_moe.experts.70.w2", "model.layers.25.block_sparse_moe.experts.71.w2", "model.layers.25.block_sparse_moe.experts.72.w2", "model.layers.25.block_sparse_moe.experts.73.w2", "model.layers.25.block_sparse_moe.experts.74.w2", "model.layers.25.block_sparse_moe.experts.75.w2", "model.layers.25.block_sparse_moe.experts.76.w2", "model.layers.25.block_sparse_moe.experts.77.w2", "model.layers.25.block_sparse_moe.experts.78.w2", "model.layers.25.block_sparse_moe.experts.79.w2", "model.layers.25.block_sparse_moe.experts.80.w2", "model.layers.25.block_sparse_moe.experts.81.w2", "model.layers.25.block_sparse_moe.experts.82.w2", "model.layers.25.block_sparse_moe.experts.83.w2", "model.layers.25.block_sparse_moe.experts.84.w2", "model.layers.25.block_sparse_moe.experts.85.w2", "model.layers.25.block_sparse_moe.experts.86.w2", "model.layers.25.block_sparse_moe.experts.87.w2", "model.layers.25.block_sparse_moe.experts.88.w2", "model.layers.25.block_sparse_moe.experts.89.w2", "model.layers.25.block_sparse_moe.experts.90.w2", "model.layers.25.block_sparse_moe.experts.91.w2", "model.layers.25.block_sparse_moe.experts.92.w2", "model.layers.25.block_sparse_moe.experts.93.w2", "model.layers.25.block_sparse_moe.experts.94.w2", "model.layers.25.block_sparse_moe.experts.95.w2", "model.layers.25.block_sparse_moe.experts.96.w2", "model.layers.25.block_sparse_moe.experts.97.w2", "model.layers.25.block_sparse_moe.experts.98.w2", "model.layers.25.block_sparse_moe.experts.99.w2", "model.layers.25.block_sparse_moe.experts.100.w2", "model.layers.25.block_sparse_moe.experts.101.w2", "model.layers.25.block_sparse_moe.experts.102.w2", "model.layers.25.block_sparse_moe.experts.103.w2", "model.layers.25.block_sparse_moe.experts.104.w2", "model.layers.25.block_sparse_moe.experts.105.w2", "model.layers.25.block_sparse_moe.experts.106.w2", "model.layers.25.block_sparse_moe.experts.107.w2", "model.layers.25.block_sparse_moe.experts.108.w2", "model.layers.25.block_sparse_moe.experts.109.w2", "model.layers.25.block_sparse_moe.experts.110.w2", "model.layers.25.block_sparse_moe.experts.111.w2", "model.layers.25.block_sparse_moe.experts.112.w2", "model.layers.25.block_sparse_moe.experts.113.w2", "model.layers.25.block_sparse_moe.experts.114.w2", "model.layers.25.block_sparse_moe.experts.115.w2", "model.layers.25.block_sparse_moe.experts.116.w2", "model.layers.25.block_sparse_moe.experts.117.w2", "model.layers.25.block_sparse_moe.experts.118.w2", "model.layers.25.block_sparse_moe.experts.119.w2", "model.layers.25.block_sparse_moe.experts.120.w2", "model.layers.25.block_sparse_moe.experts.121.w2", "model.layers.25.block_sparse_moe.experts.122.w2", "model.layers.25.block_sparse_moe.experts.123.w2", "model.layers.25.block_sparse_moe.experts.124.w2", "model.layers.25.block_sparse_moe.experts.125.w2", "model.layers.25.block_sparse_moe.experts.126.w2", "model.layers.25.block_sparse_moe.experts.127.w2", "model.layers.25.block_sparse_moe.experts.128.w2", "model.layers.25.block_sparse_moe.experts.129.w2", "model.layers.25.block_sparse_moe.experts.130.w2", "model.layers.25.block_sparse_moe.experts.131.w2", "model.layers.25.block_sparse_moe.experts.132.w2", "model.layers.25.block_sparse_moe.experts.133.w2", "model.layers.25.block_sparse_moe.experts.134.w2", "model.layers.25.block_sparse_moe.experts.135.w2", "model.layers.25.block_sparse_moe.experts.136.w2", "model.layers.25.block_sparse_moe.experts.137.w2", "model.layers.25.block_sparse_moe.experts.138.w2", "model.layers.25.block_sparse_moe.experts.139.w2", "model.layers.25.block_sparse_moe.experts.140.w2", "model.layers.25.block_sparse_moe.experts.141.w2", "model.layers.25.block_sparse_moe.experts.142.w2", "model.layers.25.block_sparse_moe.experts.143.w2", "model.layers.25.block_sparse_moe.experts.144.w2", "model.layers.25.block_sparse_moe.experts.145.w2", "model.layers.25.block_sparse_moe.experts.146.w2", "model.layers.25.block_sparse_moe.experts.147.w2", "model.layers.25.block_sparse_moe.experts.148.w2", "model.layers.25.block_sparse_moe.experts.149.w2", "model.layers.25.block_sparse_moe.experts.150.w2", "model.layers.25.block_sparse_moe.experts.151.w2", "model.layers.25.block_sparse_moe.experts.152.w2", "model.layers.25.block_sparse_moe.experts.153.w2", "model.layers.25.block_sparse_moe.experts.154.w2", "model.layers.25.block_sparse_moe.experts.155.w2", "model.layers.25.block_sparse_moe.experts.156.w2", "model.layers.25.block_sparse_moe.experts.157.w2", "model.layers.25.block_sparse_moe.experts.158.w2", "model.layers.25.block_sparse_moe.experts.159.w2", "model.layers.25.block_sparse_moe.experts.160.w2", "model.layers.25.block_sparse_moe.experts.161.w2", "model.layers.25.block_sparse_moe.experts.162.w2", "model.layers.25.block_sparse_moe.experts.163.w2", "model.layers.25.block_sparse_moe.experts.164.w2", "model.layers.25.block_sparse_moe.experts.165.w2", "model.layers.25.block_sparse_moe.experts.166.w2", "model.layers.25.block_sparse_moe.experts.167.w2", "model.layers.25.block_sparse_moe.experts.168.w2", "model.layers.25.block_sparse_moe.experts.169.w2", "model.layers.25.block_sparse_moe.experts.170.w2", "model.layers.25.block_sparse_moe.experts.171.w2", "model.layers.25.block_sparse_moe.experts.172.w2", "model.layers.25.block_sparse_moe.experts.173.w2", "model.layers.25.block_sparse_moe.experts.174.w2", "model.layers.25.block_sparse_moe.experts.175.w2", "model.layers.25.block_sparse_moe.experts.176.w2", "model.layers.25.block_sparse_moe.experts.177.w2", "model.layers.25.block_sparse_moe.experts.178.w2", "model.layers.25.block_sparse_moe.experts.179.w2", "model.layers.25.block_sparse_moe.experts.180.w2", "model.layers.25.block_sparse_moe.experts.181.w2", "model.layers.25.block_sparse_moe.experts.182.w2", "model.layers.25.block_sparse_moe.experts.183.w2", "model.layers.25.block_sparse_moe.experts.184.w2", "model.layers.25.block_sparse_moe.experts.185.w2", "model.layers.25.block_sparse_moe.experts.186.w2", "model.layers.25.block_sparse_moe.experts.187.w2", "model.layers.25.block_sparse_moe.experts.188.w2", "model.layers.25.block_sparse_moe.experts.189.w2", "model.layers.25.block_sparse_moe.experts.190.w2", "model.layers.25.block_sparse_moe.experts.191.w2", "model.layers.25.block_sparse_moe.experts.192.w2", "model.layers.25.block_sparse_moe.experts.193.w2", "model.layers.25.block_sparse_moe.experts.194.w2", "model.layers.25.block_sparse_moe.experts.195.w2", "model.layers.25.block_sparse_moe.experts.196.w2", "model.layers.25.block_sparse_moe.experts.197.w2", "model.layers.25.block_sparse_moe.experts.198.w2", "model.layers.25.block_sparse_moe.experts.199.w2", "model.layers.25.block_sparse_moe.experts.200.w2", "model.layers.25.block_sparse_moe.experts.201.w2", "model.layers.25.block_sparse_moe.experts.202.w2", "model.layers.25.block_sparse_moe.experts.203.w2", "model.layers.25.block_sparse_moe.experts.204.w2", "model.layers.25.block_sparse_moe.experts.205.w2", "model.layers.25.block_sparse_moe.experts.206.w2", "model.layers.25.block_sparse_moe.experts.207.w2", "model.layers.25.block_sparse_moe.experts.208.w2", "model.layers.25.block_sparse_moe.experts.209.w2", "model.layers.25.block_sparse_moe.experts.210.w2", "model.layers.25.block_sparse_moe.experts.211.w2", "model.layers.25.block_sparse_moe.experts.212.w2", "model.layers.25.block_sparse_moe.experts.213.w2", "model.layers.25.block_sparse_moe.experts.214.w2", "model.layers.25.block_sparse_moe.experts.215.w2", "model.layers.25.block_sparse_moe.experts.216.w2", "model.layers.25.block_sparse_moe.experts.217.w2", "model.layers.25.block_sparse_moe.experts.218.w2", "model.layers.25.block_sparse_moe.experts.219.w2", "model.layers.25.block_sparse_moe.experts.220.w2", "model.layers.25.block_sparse_moe.experts.221.w2", "model.layers.25.block_sparse_moe.experts.222.w2", "model.layers.25.block_sparse_moe.experts.223.w2", "model.layers.25.block_sparse_moe.experts.224.w2", "model.layers.25.block_sparse_moe.experts.225.w2", "model.layers.25.block_sparse_moe.experts.226.w2", "model.layers.25.block_sparse_moe.experts.227.w2", "model.layers.25.block_sparse_moe.experts.228.w2", "model.layers.25.block_sparse_moe.experts.229.w2", "model.layers.25.block_sparse_moe.experts.230.w2", "model.layers.25.block_sparse_moe.experts.231.w2", "model.layers.25.block_sparse_moe.experts.232.w2", "model.layers.25.block_sparse_moe.experts.233.w2", "model.layers.25.block_sparse_moe.experts.234.w2", "model.layers.25.block_sparse_moe.experts.235.w2", "model.layers.25.block_sparse_moe.experts.236.w2", "model.layers.25.block_sparse_moe.experts.237.w2", "model.layers.25.block_sparse_moe.experts.238.w2", "model.layers.25.block_sparse_moe.experts.239.w2", "model.layers.25.block_sparse_moe.experts.240.w2", "model.layers.25.block_sparse_moe.experts.241.w2", "model.layers.25.block_sparse_moe.experts.242.w2", "model.layers.25.block_sparse_moe.experts.243.w2", "model.layers.25.block_sparse_moe.experts.244.w2", "model.layers.25.block_sparse_moe.experts.245.w2", "model.layers.25.block_sparse_moe.experts.246.w2", "model.layers.25.block_sparse_moe.experts.247.w2", "model.layers.25.block_sparse_moe.experts.248.w2", "model.layers.25.block_sparse_moe.experts.249.w2", "model.layers.25.block_sparse_moe.experts.250.w2", "model.layers.25.block_sparse_moe.experts.251.w2", "model.layers.25.block_sparse_moe.experts.252.w2", "model.layers.25.block_sparse_moe.experts.253.w2", "model.layers.25.block_sparse_moe.experts.254.w2", "model.layers.25.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0006850117817521206, "dbits": 3623878656 } ] }, { "idx": 52, "layers": [ "model.layers.26.self_attn.q_proj", "model.layers.26.self_attn.k_proj", "model.layers.26.self_attn.v_proj", "model.layers.26.self_attn.o_proj" ], "candidates": [ { "dkld": 0.004159562662243876, "dbits": 44040192 } ] }, { "idx": 53, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w1", "model.layers.26.block_sparse_moe.experts.1.w1", "model.layers.26.block_sparse_moe.experts.2.w1", "model.layers.26.block_sparse_moe.experts.3.w1", "model.layers.26.block_sparse_moe.experts.4.w1", "model.layers.26.block_sparse_moe.experts.5.w1", "model.layers.26.block_sparse_moe.experts.6.w1", "model.layers.26.block_sparse_moe.experts.7.w1", "model.layers.26.block_sparse_moe.experts.8.w1", "model.layers.26.block_sparse_moe.experts.9.w1", "model.layers.26.block_sparse_moe.experts.10.w1", "model.layers.26.block_sparse_moe.experts.11.w1", "model.layers.26.block_sparse_moe.experts.12.w1", "model.layers.26.block_sparse_moe.experts.13.w1", "model.layers.26.block_sparse_moe.experts.14.w1", "model.layers.26.block_sparse_moe.experts.15.w1", "model.layers.26.block_sparse_moe.experts.16.w1", "model.layers.26.block_sparse_moe.experts.17.w1", "model.layers.26.block_sparse_moe.experts.18.w1", "model.layers.26.block_sparse_moe.experts.19.w1", "model.layers.26.block_sparse_moe.experts.20.w1", "model.layers.26.block_sparse_moe.experts.21.w1", "model.layers.26.block_sparse_moe.experts.22.w1", "model.layers.26.block_sparse_moe.experts.23.w1", "model.layers.26.block_sparse_moe.experts.24.w1", "model.layers.26.block_sparse_moe.experts.25.w1", "model.layers.26.block_sparse_moe.experts.26.w1", "model.layers.26.block_sparse_moe.experts.27.w1", "model.layers.26.block_sparse_moe.experts.28.w1", "model.layers.26.block_sparse_moe.experts.29.w1", "model.layers.26.block_sparse_moe.experts.30.w1", "model.layers.26.block_sparse_moe.experts.31.w1", "model.layers.26.block_sparse_moe.experts.32.w1", "model.layers.26.block_sparse_moe.experts.33.w1", "model.layers.26.block_sparse_moe.experts.34.w1", "model.layers.26.block_sparse_moe.experts.35.w1", "model.layers.26.block_sparse_moe.experts.36.w1", "model.layers.26.block_sparse_moe.experts.37.w1", "model.layers.26.block_sparse_moe.experts.38.w1", "model.layers.26.block_sparse_moe.experts.39.w1", "model.layers.26.block_sparse_moe.experts.40.w1", "model.layers.26.block_sparse_moe.experts.41.w1", "model.layers.26.block_sparse_moe.experts.42.w1", "model.layers.26.block_sparse_moe.experts.43.w1", "model.layers.26.block_sparse_moe.experts.44.w1", "model.layers.26.block_sparse_moe.experts.45.w1", "model.layers.26.block_sparse_moe.experts.46.w1", "model.layers.26.block_sparse_moe.experts.47.w1", "model.layers.26.block_sparse_moe.experts.48.w1", "model.layers.26.block_sparse_moe.experts.49.w1", "model.layers.26.block_sparse_moe.experts.50.w1", "model.layers.26.block_sparse_moe.experts.51.w1", "model.layers.26.block_sparse_moe.experts.52.w1", "model.layers.26.block_sparse_moe.experts.53.w1", "model.layers.26.block_sparse_moe.experts.54.w1", "model.layers.26.block_sparse_moe.experts.55.w1", "model.layers.26.block_sparse_moe.experts.56.w1", "model.layers.26.block_sparse_moe.experts.57.w1", "model.layers.26.block_sparse_moe.experts.58.w1", "model.layers.26.block_sparse_moe.experts.59.w1", "model.layers.26.block_sparse_moe.experts.60.w1", "model.layers.26.block_sparse_moe.experts.61.w1", "model.layers.26.block_sparse_moe.experts.62.w1", "model.layers.26.block_sparse_moe.experts.63.w1", "model.layers.26.block_sparse_moe.experts.64.w1", "model.layers.26.block_sparse_moe.experts.65.w1", "model.layers.26.block_sparse_moe.experts.66.w1", "model.layers.26.block_sparse_moe.experts.67.w1", "model.layers.26.block_sparse_moe.experts.68.w1", "model.layers.26.block_sparse_moe.experts.69.w1", "model.layers.26.block_sparse_moe.experts.70.w1", "model.layers.26.block_sparse_moe.experts.71.w1", "model.layers.26.block_sparse_moe.experts.72.w1", "model.layers.26.block_sparse_moe.experts.73.w1", "model.layers.26.block_sparse_moe.experts.74.w1", "model.layers.26.block_sparse_moe.experts.75.w1", "model.layers.26.block_sparse_moe.experts.76.w1", "model.layers.26.block_sparse_moe.experts.77.w1", "model.layers.26.block_sparse_moe.experts.78.w1", "model.layers.26.block_sparse_moe.experts.79.w1", "model.layers.26.block_sparse_moe.experts.80.w1", "model.layers.26.block_sparse_moe.experts.81.w1", "model.layers.26.block_sparse_moe.experts.82.w1", "model.layers.26.block_sparse_moe.experts.83.w1", "model.layers.26.block_sparse_moe.experts.84.w1", "model.layers.26.block_sparse_moe.experts.85.w1", "model.layers.26.block_sparse_moe.experts.86.w1", "model.layers.26.block_sparse_moe.experts.87.w1", "model.layers.26.block_sparse_moe.experts.88.w1", "model.layers.26.block_sparse_moe.experts.89.w1", "model.layers.26.block_sparse_moe.experts.90.w1", "model.layers.26.block_sparse_moe.experts.91.w1", "model.layers.26.block_sparse_moe.experts.92.w1", "model.layers.26.block_sparse_moe.experts.93.w1", "model.layers.26.block_sparse_moe.experts.94.w1", "model.layers.26.block_sparse_moe.experts.95.w1", "model.layers.26.block_sparse_moe.experts.96.w1", "model.layers.26.block_sparse_moe.experts.97.w1", "model.layers.26.block_sparse_moe.experts.98.w1", "model.layers.26.block_sparse_moe.experts.99.w1", "model.layers.26.block_sparse_moe.experts.100.w1", "model.layers.26.block_sparse_moe.experts.101.w1", "model.layers.26.block_sparse_moe.experts.102.w1", "model.layers.26.block_sparse_moe.experts.103.w1", "model.layers.26.block_sparse_moe.experts.104.w1", "model.layers.26.block_sparse_moe.experts.105.w1", "model.layers.26.block_sparse_moe.experts.106.w1", "model.layers.26.block_sparse_moe.experts.107.w1", "model.layers.26.block_sparse_moe.experts.108.w1", "model.layers.26.block_sparse_moe.experts.109.w1", "model.layers.26.block_sparse_moe.experts.110.w1", "model.layers.26.block_sparse_moe.experts.111.w1", "model.layers.26.block_sparse_moe.experts.112.w1", "model.layers.26.block_sparse_moe.experts.113.w1", "model.layers.26.block_sparse_moe.experts.114.w1", "model.layers.26.block_sparse_moe.experts.115.w1", "model.layers.26.block_sparse_moe.experts.116.w1", "model.layers.26.block_sparse_moe.experts.117.w1", "model.layers.26.block_sparse_moe.experts.118.w1", "model.layers.26.block_sparse_moe.experts.119.w1", "model.layers.26.block_sparse_moe.experts.120.w1", "model.layers.26.block_sparse_moe.experts.121.w1", "model.layers.26.block_sparse_moe.experts.122.w1", "model.layers.26.block_sparse_moe.experts.123.w1", "model.layers.26.block_sparse_moe.experts.124.w1", "model.layers.26.block_sparse_moe.experts.125.w1", "model.layers.26.block_sparse_moe.experts.126.w1", "model.layers.26.block_sparse_moe.experts.127.w1", "model.layers.26.block_sparse_moe.experts.128.w1", "model.layers.26.block_sparse_moe.experts.129.w1", "model.layers.26.block_sparse_moe.experts.130.w1", "model.layers.26.block_sparse_moe.experts.131.w1", "model.layers.26.block_sparse_moe.experts.132.w1", "model.layers.26.block_sparse_moe.experts.133.w1", "model.layers.26.block_sparse_moe.experts.134.w1", "model.layers.26.block_sparse_moe.experts.135.w1", "model.layers.26.block_sparse_moe.experts.136.w1", "model.layers.26.block_sparse_moe.experts.137.w1", "model.layers.26.block_sparse_moe.experts.138.w1", "model.layers.26.block_sparse_moe.experts.139.w1", "model.layers.26.block_sparse_moe.experts.140.w1", "model.layers.26.block_sparse_moe.experts.141.w1", "model.layers.26.block_sparse_moe.experts.142.w1", "model.layers.26.block_sparse_moe.experts.143.w1", "model.layers.26.block_sparse_moe.experts.144.w1", "model.layers.26.block_sparse_moe.experts.145.w1", "model.layers.26.block_sparse_moe.experts.146.w1", "model.layers.26.block_sparse_moe.experts.147.w1", "model.layers.26.block_sparse_moe.experts.148.w1", "model.layers.26.block_sparse_moe.experts.149.w1", "model.layers.26.block_sparse_moe.experts.150.w1", "model.layers.26.block_sparse_moe.experts.151.w1", "model.layers.26.block_sparse_moe.experts.152.w1", "model.layers.26.block_sparse_moe.experts.153.w1", "model.layers.26.block_sparse_moe.experts.154.w1", "model.layers.26.block_sparse_moe.experts.155.w1", "model.layers.26.block_sparse_moe.experts.156.w1", "model.layers.26.block_sparse_moe.experts.157.w1", "model.layers.26.block_sparse_moe.experts.158.w1", "model.layers.26.block_sparse_moe.experts.159.w1", "model.layers.26.block_sparse_moe.experts.160.w1", "model.layers.26.block_sparse_moe.experts.161.w1", "model.layers.26.block_sparse_moe.experts.162.w1", "model.layers.26.block_sparse_moe.experts.163.w1", "model.layers.26.block_sparse_moe.experts.164.w1", "model.layers.26.block_sparse_moe.experts.165.w1", "model.layers.26.block_sparse_moe.experts.166.w1", "model.layers.26.block_sparse_moe.experts.167.w1", "model.layers.26.block_sparse_moe.experts.168.w1", "model.layers.26.block_sparse_moe.experts.169.w1", "model.layers.26.block_sparse_moe.experts.170.w1", "model.layers.26.block_sparse_moe.experts.171.w1", "model.layers.26.block_sparse_moe.experts.172.w1", "model.layers.26.block_sparse_moe.experts.173.w1", "model.layers.26.block_sparse_moe.experts.174.w1", "model.layers.26.block_sparse_moe.experts.175.w1", "model.layers.26.block_sparse_moe.experts.176.w1", "model.layers.26.block_sparse_moe.experts.177.w1", "model.layers.26.block_sparse_moe.experts.178.w1", "model.layers.26.block_sparse_moe.experts.179.w1", "model.layers.26.block_sparse_moe.experts.180.w1", "model.layers.26.block_sparse_moe.experts.181.w1", "model.layers.26.block_sparse_moe.experts.182.w1", "model.layers.26.block_sparse_moe.experts.183.w1", "model.layers.26.block_sparse_moe.experts.184.w1", "model.layers.26.block_sparse_moe.experts.185.w1", "model.layers.26.block_sparse_moe.experts.186.w1", "model.layers.26.block_sparse_moe.experts.187.w1", "model.layers.26.block_sparse_moe.experts.188.w1", "model.layers.26.block_sparse_moe.experts.189.w1", "model.layers.26.block_sparse_moe.experts.190.w1", "model.layers.26.block_sparse_moe.experts.191.w1", "model.layers.26.block_sparse_moe.experts.192.w1", "model.layers.26.block_sparse_moe.experts.193.w1", "model.layers.26.block_sparse_moe.experts.194.w1", "model.layers.26.block_sparse_moe.experts.195.w1", "model.layers.26.block_sparse_moe.experts.196.w1", "model.layers.26.block_sparse_moe.experts.197.w1", "model.layers.26.block_sparse_moe.experts.198.w1", "model.layers.26.block_sparse_moe.experts.199.w1", "model.layers.26.block_sparse_moe.experts.200.w1", "model.layers.26.block_sparse_moe.experts.201.w1", "model.layers.26.block_sparse_moe.experts.202.w1", "model.layers.26.block_sparse_moe.experts.203.w1", "model.layers.26.block_sparse_moe.experts.204.w1", "model.layers.26.block_sparse_moe.experts.205.w1", "model.layers.26.block_sparse_moe.experts.206.w1", "model.layers.26.block_sparse_moe.experts.207.w1", "model.layers.26.block_sparse_moe.experts.208.w1", "model.layers.26.block_sparse_moe.experts.209.w1", "model.layers.26.block_sparse_moe.experts.210.w1", "model.layers.26.block_sparse_moe.experts.211.w1", "model.layers.26.block_sparse_moe.experts.212.w1", "model.layers.26.block_sparse_moe.experts.213.w1", "model.layers.26.block_sparse_moe.experts.214.w1", "model.layers.26.block_sparse_moe.experts.215.w1", "model.layers.26.block_sparse_moe.experts.216.w1", "model.layers.26.block_sparse_moe.experts.217.w1", "model.layers.26.block_sparse_moe.experts.218.w1", "model.layers.26.block_sparse_moe.experts.219.w1", "model.layers.26.block_sparse_moe.experts.220.w1", "model.layers.26.block_sparse_moe.experts.221.w1", "model.layers.26.block_sparse_moe.experts.222.w1", "model.layers.26.block_sparse_moe.experts.223.w1", "model.layers.26.block_sparse_moe.experts.224.w1", "model.layers.26.block_sparse_moe.experts.225.w1", "model.layers.26.block_sparse_moe.experts.226.w1", "model.layers.26.block_sparse_moe.experts.227.w1", "model.layers.26.block_sparse_moe.experts.228.w1", "model.layers.26.block_sparse_moe.experts.229.w1", "model.layers.26.block_sparse_moe.experts.230.w1", "model.layers.26.block_sparse_moe.experts.231.w1", "model.layers.26.block_sparse_moe.experts.232.w1", "model.layers.26.block_sparse_moe.experts.233.w1", "model.layers.26.block_sparse_moe.experts.234.w1", "model.layers.26.block_sparse_moe.experts.235.w1", "model.layers.26.block_sparse_moe.experts.236.w1", "model.layers.26.block_sparse_moe.experts.237.w1", "model.layers.26.block_sparse_moe.experts.238.w1", "model.layers.26.block_sparse_moe.experts.239.w1", "model.layers.26.block_sparse_moe.experts.240.w1", "model.layers.26.block_sparse_moe.experts.241.w1", "model.layers.26.block_sparse_moe.experts.242.w1", "model.layers.26.block_sparse_moe.experts.243.w1", "model.layers.26.block_sparse_moe.experts.244.w1", "model.layers.26.block_sparse_moe.experts.245.w1", "model.layers.26.block_sparse_moe.experts.246.w1", "model.layers.26.block_sparse_moe.experts.247.w1", "model.layers.26.block_sparse_moe.experts.248.w1", "model.layers.26.block_sparse_moe.experts.249.w1", "model.layers.26.block_sparse_moe.experts.250.w1", "model.layers.26.block_sparse_moe.experts.251.w1", "model.layers.26.block_sparse_moe.experts.252.w1", "model.layers.26.block_sparse_moe.experts.253.w1", "model.layers.26.block_sparse_moe.experts.254.w1", "model.layers.26.block_sparse_moe.experts.255.w1", "model.layers.26.block_sparse_moe.experts.0.w3", "model.layers.26.block_sparse_moe.experts.1.w3", "model.layers.26.block_sparse_moe.experts.2.w3", "model.layers.26.block_sparse_moe.experts.3.w3", "model.layers.26.block_sparse_moe.experts.4.w3", "model.layers.26.block_sparse_moe.experts.5.w3", "model.layers.26.block_sparse_moe.experts.6.w3", "model.layers.26.block_sparse_moe.experts.7.w3", "model.layers.26.block_sparse_moe.experts.8.w3", "model.layers.26.block_sparse_moe.experts.9.w3", "model.layers.26.block_sparse_moe.experts.10.w3", "model.layers.26.block_sparse_moe.experts.11.w3", "model.layers.26.block_sparse_moe.experts.12.w3", "model.layers.26.block_sparse_moe.experts.13.w3", "model.layers.26.block_sparse_moe.experts.14.w3", "model.layers.26.block_sparse_moe.experts.15.w3", "model.layers.26.block_sparse_moe.experts.16.w3", "model.layers.26.block_sparse_moe.experts.17.w3", "model.layers.26.block_sparse_moe.experts.18.w3", "model.layers.26.block_sparse_moe.experts.19.w3", "model.layers.26.block_sparse_moe.experts.20.w3", "model.layers.26.block_sparse_moe.experts.21.w3", "model.layers.26.block_sparse_moe.experts.22.w3", "model.layers.26.block_sparse_moe.experts.23.w3", "model.layers.26.block_sparse_moe.experts.24.w3", "model.layers.26.block_sparse_moe.experts.25.w3", "model.layers.26.block_sparse_moe.experts.26.w3", "model.layers.26.block_sparse_moe.experts.27.w3", "model.layers.26.block_sparse_moe.experts.28.w3", "model.layers.26.block_sparse_moe.experts.29.w3", "model.layers.26.block_sparse_moe.experts.30.w3", "model.layers.26.block_sparse_moe.experts.31.w3", "model.layers.26.block_sparse_moe.experts.32.w3", "model.layers.26.block_sparse_moe.experts.33.w3", "model.layers.26.block_sparse_moe.experts.34.w3", "model.layers.26.block_sparse_moe.experts.35.w3", "model.layers.26.block_sparse_moe.experts.36.w3", "model.layers.26.block_sparse_moe.experts.37.w3", "model.layers.26.block_sparse_moe.experts.38.w3", "model.layers.26.block_sparse_moe.experts.39.w3", "model.layers.26.block_sparse_moe.experts.40.w3", "model.layers.26.block_sparse_moe.experts.41.w3", "model.layers.26.block_sparse_moe.experts.42.w3", "model.layers.26.block_sparse_moe.experts.43.w3", "model.layers.26.block_sparse_moe.experts.44.w3", "model.layers.26.block_sparse_moe.experts.45.w3", "model.layers.26.block_sparse_moe.experts.46.w3", "model.layers.26.block_sparse_moe.experts.47.w3", "model.layers.26.block_sparse_moe.experts.48.w3", "model.layers.26.block_sparse_moe.experts.49.w3", "model.layers.26.block_sparse_moe.experts.50.w3", "model.layers.26.block_sparse_moe.experts.51.w3", "model.layers.26.block_sparse_moe.experts.52.w3", "model.layers.26.block_sparse_moe.experts.53.w3", "model.layers.26.block_sparse_moe.experts.54.w3", "model.layers.26.block_sparse_moe.experts.55.w3", "model.layers.26.block_sparse_moe.experts.56.w3", "model.layers.26.block_sparse_moe.experts.57.w3", "model.layers.26.block_sparse_moe.experts.58.w3", "model.layers.26.block_sparse_moe.experts.59.w3", "model.layers.26.block_sparse_moe.experts.60.w3", "model.layers.26.block_sparse_moe.experts.61.w3", "model.layers.26.block_sparse_moe.experts.62.w3", "model.layers.26.block_sparse_moe.experts.63.w3", "model.layers.26.block_sparse_moe.experts.64.w3", "model.layers.26.block_sparse_moe.experts.65.w3", "model.layers.26.block_sparse_moe.experts.66.w3", "model.layers.26.block_sparse_moe.experts.67.w3", "model.layers.26.block_sparse_moe.experts.68.w3", "model.layers.26.block_sparse_moe.experts.69.w3", "model.layers.26.block_sparse_moe.experts.70.w3", "model.layers.26.block_sparse_moe.experts.71.w3", "model.layers.26.block_sparse_moe.experts.72.w3", "model.layers.26.block_sparse_moe.experts.73.w3", "model.layers.26.block_sparse_moe.experts.74.w3", "model.layers.26.block_sparse_moe.experts.75.w3", "model.layers.26.block_sparse_moe.experts.76.w3", "model.layers.26.block_sparse_moe.experts.77.w3", "model.layers.26.block_sparse_moe.experts.78.w3", "model.layers.26.block_sparse_moe.experts.79.w3", "model.layers.26.block_sparse_moe.experts.80.w3", "model.layers.26.block_sparse_moe.experts.81.w3", "model.layers.26.block_sparse_moe.experts.82.w3", "model.layers.26.block_sparse_moe.experts.83.w3", "model.layers.26.block_sparse_moe.experts.84.w3", "model.layers.26.block_sparse_moe.experts.85.w3", "model.layers.26.block_sparse_moe.experts.86.w3", "model.layers.26.block_sparse_moe.experts.87.w3", "model.layers.26.block_sparse_moe.experts.88.w3", "model.layers.26.block_sparse_moe.experts.89.w3", "model.layers.26.block_sparse_moe.experts.90.w3", "model.layers.26.block_sparse_moe.experts.91.w3", "model.layers.26.block_sparse_moe.experts.92.w3", "model.layers.26.block_sparse_moe.experts.93.w3", "model.layers.26.block_sparse_moe.experts.94.w3", "model.layers.26.block_sparse_moe.experts.95.w3", "model.layers.26.block_sparse_moe.experts.96.w3", "model.layers.26.block_sparse_moe.experts.97.w3", "model.layers.26.block_sparse_moe.experts.98.w3", "model.layers.26.block_sparse_moe.experts.99.w3", "model.layers.26.block_sparse_moe.experts.100.w3", "model.layers.26.block_sparse_moe.experts.101.w3", "model.layers.26.block_sparse_moe.experts.102.w3", "model.layers.26.block_sparse_moe.experts.103.w3", "model.layers.26.block_sparse_moe.experts.104.w3", "model.layers.26.block_sparse_moe.experts.105.w3", "model.layers.26.block_sparse_moe.experts.106.w3", "model.layers.26.block_sparse_moe.experts.107.w3", "model.layers.26.block_sparse_moe.experts.108.w3", "model.layers.26.block_sparse_moe.experts.109.w3", "model.layers.26.block_sparse_moe.experts.110.w3", "model.layers.26.block_sparse_moe.experts.111.w3", "model.layers.26.block_sparse_moe.experts.112.w3", "model.layers.26.block_sparse_moe.experts.113.w3", "model.layers.26.block_sparse_moe.experts.114.w3", "model.layers.26.block_sparse_moe.experts.115.w3", "model.layers.26.block_sparse_moe.experts.116.w3", "model.layers.26.block_sparse_moe.experts.117.w3", "model.layers.26.block_sparse_moe.experts.118.w3", "model.layers.26.block_sparse_moe.experts.119.w3", "model.layers.26.block_sparse_moe.experts.120.w3", "model.layers.26.block_sparse_moe.experts.121.w3", "model.layers.26.block_sparse_moe.experts.122.w3", "model.layers.26.block_sparse_moe.experts.123.w3", "model.layers.26.block_sparse_moe.experts.124.w3", "model.layers.26.block_sparse_moe.experts.125.w3", "model.layers.26.block_sparse_moe.experts.126.w3", "model.layers.26.block_sparse_moe.experts.127.w3", "model.layers.26.block_sparse_moe.experts.128.w3", "model.layers.26.block_sparse_moe.experts.129.w3", "model.layers.26.block_sparse_moe.experts.130.w3", "model.layers.26.block_sparse_moe.experts.131.w3", "model.layers.26.block_sparse_moe.experts.132.w3", "model.layers.26.block_sparse_moe.experts.133.w3", "model.layers.26.block_sparse_moe.experts.134.w3", "model.layers.26.block_sparse_moe.experts.135.w3", "model.layers.26.block_sparse_moe.experts.136.w3", "model.layers.26.block_sparse_moe.experts.137.w3", "model.layers.26.block_sparse_moe.experts.138.w3", "model.layers.26.block_sparse_moe.experts.139.w3", "model.layers.26.block_sparse_moe.experts.140.w3", "model.layers.26.block_sparse_moe.experts.141.w3", "model.layers.26.block_sparse_moe.experts.142.w3", "model.layers.26.block_sparse_moe.experts.143.w3", "model.layers.26.block_sparse_moe.experts.144.w3", "model.layers.26.block_sparse_moe.experts.145.w3", "model.layers.26.block_sparse_moe.experts.146.w3", "model.layers.26.block_sparse_moe.experts.147.w3", "model.layers.26.block_sparse_moe.experts.148.w3", "model.layers.26.block_sparse_moe.experts.149.w3", "model.layers.26.block_sparse_moe.experts.150.w3", "model.layers.26.block_sparse_moe.experts.151.w3", "model.layers.26.block_sparse_moe.experts.152.w3", "model.layers.26.block_sparse_moe.experts.153.w3", "model.layers.26.block_sparse_moe.experts.154.w3", "model.layers.26.block_sparse_moe.experts.155.w3", "model.layers.26.block_sparse_moe.experts.156.w3", "model.layers.26.block_sparse_moe.experts.157.w3", "model.layers.26.block_sparse_moe.experts.158.w3", "model.layers.26.block_sparse_moe.experts.159.w3", "model.layers.26.block_sparse_moe.experts.160.w3", "model.layers.26.block_sparse_moe.experts.161.w3", "model.layers.26.block_sparse_moe.experts.162.w3", "model.layers.26.block_sparse_moe.experts.163.w3", "model.layers.26.block_sparse_moe.experts.164.w3", "model.layers.26.block_sparse_moe.experts.165.w3", "model.layers.26.block_sparse_moe.experts.166.w3", "model.layers.26.block_sparse_moe.experts.167.w3", "model.layers.26.block_sparse_moe.experts.168.w3", "model.layers.26.block_sparse_moe.experts.169.w3", "model.layers.26.block_sparse_moe.experts.170.w3", "model.layers.26.block_sparse_moe.experts.171.w3", "model.layers.26.block_sparse_moe.experts.172.w3", "model.layers.26.block_sparse_moe.experts.173.w3", "model.layers.26.block_sparse_moe.experts.174.w3", "model.layers.26.block_sparse_moe.experts.175.w3", "model.layers.26.block_sparse_moe.experts.176.w3", "model.layers.26.block_sparse_moe.experts.177.w3", "model.layers.26.block_sparse_moe.experts.178.w3", "model.layers.26.block_sparse_moe.experts.179.w3", "model.layers.26.block_sparse_moe.experts.180.w3", "model.layers.26.block_sparse_moe.experts.181.w3", "model.layers.26.block_sparse_moe.experts.182.w3", "model.layers.26.block_sparse_moe.experts.183.w3", "model.layers.26.block_sparse_moe.experts.184.w3", "model.layers.26.block_sparse_moe.experts.185.w3", "model.layers.26.block_sparse_moe.experts.186.w3", "model.layers.26.block_sparse_moe.experts.187.w3", "model.layers.26.block_sparse_moe.experts.188.w3", "model.layers.26.block_sparse_moe.experts.189.w3", "model.layers.26.block_sparse_moe.experts.190.w3", "model.layers.26.block_sparse_moe.experts.191.w3", "model.layers.26.block_sparse_moe.experts.192.w3", "model.layers.26.block_sparse_moe.experts.193.w3", "model.layers.26.block_sparse_moe.experts.194.w3", "model.layers.26.block_sparse_moe.experts.195.w3", "model.layers.26.block_sparse_moe.experts.196.w3", "model.layers.26.block_sparse_moe.experts.197.w3", "model.layers.26.block_sparse_moe.experts.198.w3", "model.layers.26.block_sparse_moe.experts.199.w3", "model.layers.26.block_sparse_moe.experts.200.w3", "model.layers.26.block_sparse_moe.experts.201.w3", "model.layers.26.block_sparse_moe.experts.202.w3", "model.layers.26.block_sparse_moe.experts.203.w3", "model.layers.26.block_sparse_moe.experts.204.w3", "model.layers.26.block_sparse_moe.experts.205.w3", "model.layers.26.block_sparse_moe.experts.206.w3", "model.layers.26.block_sparse_moe.experts.207.w3", "model.layers.26.block_sparse_moe.experts.208.w3", "model.layers.26.block_sparse_moe.experts.209.w3", "model.layers.26.block_sparse_moe.experts.210.w3", "model.layers.26.block_sparse_moe.experts.211.w3", "model.layers.26.block_sparse_moe.experts.212.w3", "model.layers.26.block_sparse_moe.experts.213.w3", "model.layers.26.block_sparse_moe.experts.214.w3", "model.layers.26.block_sparse_moe.experts.215.w3", "model.layers.26.block_sparse_moe.experts.216.w3", "model.layers.26.block_sparse_moe.experts.217.w3", "model.layers.26.block_sparse_moe.experts.218.w3", "model.layers.26.block_sparse_moe.experts.219.w3", "model.layers.26.block_sparse_moe.experts.220.w3", "model.layers.26.block_sparse_moe.experts.221.w3", "model.layers.26.block_sparse_moe.experts.222.w3", "model.layers.26.block_sparse_moe.experts.223.w3", "model.layers.26.block_sparse_moe.experts.224.w3", "model.layers.26.block_sparse_moe.experts.225.w3", "model.layers.26.block_sparse_moe.experts.226.w3", "model.layers.26.block_sparse_moe.experts.227.w3", "model.layers.26.block_sparse_moe.experts.228.w3", "model.layers.26.block_sparse_moe.experts.229.w3", "model.layers.26.block_sparse_moe.experts.230.w3", "model.layers.26.block_sparse_moe.experts.231.w3", "model.layers.26.block_sparse_moe.experts.232.w3", "model.layers.26.block_sparse_moe.experts.233.w3", "model.layers.26.block_sparse_moe.experts.234.w3", "model.layers.26.block_sparse_moe.experts.235.w3", "model.layers.26.block_sparse_moe.experts.236.w3", "model.layers.26.block_sparse_moe.experts.237.w3", "model.layers.26.block_sparse_moe.experts.238.w3", "model.layers.26.block_sparse_moe.experts.239.w3", "model.layers.26.block_sparse_moe.experts.240.w3", "model.layers.26.block_sparse_moe.experts.241.w3", "model.layers.26.block_sparse_moe.experts.242.w3", "model.layers.26.block_sparse_moe.experts.243.w3", "model.layers.26.block_sparse_moe.experts.244.w3", "model.layers.26.block_sparse_moe.experts.245.w3", "model.layers.26.block_sparse_moe.experts.246.w3", "model.layers.26.block_sparse_moe.experts.247.w3", "model.layers.26.block_sparse_moe.experts.248.w3", "model.layers.26.block_sparse_moe.experts.249.w3", "model.layers.26.block_sparse_moe.experts.250.w3", "model.layers.26.block_sparse_moe.experts.251.w3", "model.layers.26.block_sparse_moe.experts.252.w3", "model.layers.26.block_sparse_moe.experts.253.w3", "model.layers.26.block_sparse_moe.experts.254.w3", "model.layers.26.block_sparse_moe.experts.255.w3", "model.layers.26.block_sparse_moe.experts.0.w2", "model.layers.26.block_sparse_moe.experts.1.w2", "model.layers.26.block_sparse_moe.experts.2.w2", "model.layers.26.block_sparse_moe.experts.3.w2", "model.layers.26.block_sparse_moe.experts.4.w2", "model.layers.26.block_sparse_moe.experts.5.w2", "model.layers.26.block_sparse_moe.experts.6.w2", "model.layers.26.block_sparse_moe.experts.7.w2", "model.layers.26.block_sparse_moe.experts.8.w2", "model.layers.26.block_sparse_moe.experts.9.w2", "model.layers.26.block_sparse_moe.experts.10.w2", "model.layers.26.block_sparse_moe.experts.11.w2", "model.layers.26.block_sparse_moe.experts.12.w2", "model.layers.26.block_sparse_moe.experts.13.w2", "model.layers.26.block_sparse_moe.experts.14.w2", "model.layers.26.block_sparse_moe.experts.15.w2", "model.layers.26.block_sparse_moe.experts.16.w2", "model.layers.26.block_sparse_moe.experts.17.w2", "model.layers.26.block_sparse_moe.experts.18.w2", "model.layers.26.block_sparse_moe.experts.19.w2", "model.layers.26.block_sparse_moe.experts.20.w2", "model.layers.26.block_sparse_moe.experts.21.w2", "model.layers.26.block_sparse_moe.experts.22.w2", "model.layers.26.block_sparse_moe.experts.23.w2", "model.layers.26.block_sparse_moe.experts.24.w2", "model.layers.26.block_sparse_moe.experts.25.w2", "model.layers.26.block_sparse_moe.experts.26.w2", "model.layers.26.block_sparse_moe.experts.27.w2", "model.layers.26.block_sparse_moe.experts.28.w2", "model.layers.26.block_sparse_moe.experts.29.w2", "model.layers.26.block_sparse_moe.experts.30.w2", "model.layers.26.block_sparse_moe.experts.31.w2", "model.layers.26.block_sparse_moe.experts.32.w2", "model.layers.26.block_sparse_moe.experts.33.w2", "model.layers.26.block_sparse_moe.experts.34.w2", "model.layers.26.block_sparse_moe.experts.35.w2", "model.layers.26.block_sparse_moe.experts.36.w2", "model.layers.26.block_sparse_moe.experts.37.w2", "model.layers.26.block_sparse_moe.experts.38.w2", "model.layers.26.block_sparse_moe.experts.39.w2", "model.layers.26.block_sparse_moe.experts.40.w2", "model.layers.26.block_sparse_moe.experts.41.w2", "model.layers.26.block_sparse_moe.experts.42.w2", "model.layers.26.block_sparse_moe.experts.43.w2", "model.layers.26.block_sparse_moe.experts.44.w2", "model.layers.26.block_sparse_moe.experts.45.w2", "model.layers.26.block_sparse_moe.experts.46.w2", "model.layers.26.block_sparse_moe.experts.47.w2", "model.layers.26.block_sparse_moe.experts.48.w2", "model.layers.26.block_sparse_moe.experts.49.w2", "model.layers.26.block_sparse_moe.experts.50.w2", "model.layers.26.block_sparse_moe.experts.51.w2", "model.layers.26.block_sparse_moe.experts.52.w2", "model.layers.26.block_sparse_moe.experts.53.w2", "model.layers.26.block_sparse_moe.experts.54.w2", "model.layers.26.block_sparse_moe.experts.55.w2", "model.layers.26.block_sparse_moe.experts.56.w2", "model.layers.26.block_sparse_moe.experts.57.w2", "model.layers.26.block_sparse_moe.experts.58.w2", "model.layers.26.block_sparse_moe.experts.59.w2", "model.layers.26.block_sparse_moe.experts.60.w2", "model.layers.26.block_sparse_moe.experts.61.w2", "model.layers.26.block_sparse_moe.experts.62.w2", "model.layers.26.block_sparse_moe.experts.63.w2", "model.layers.26.block_sparse_moe.experts.64.w2", "model.layers.26.block_sparse_moe.experts.65.w2", "model.layers.26.block_sparse_moe.experts.66.w2", "model.layers.26.block_sparse_moe.experts.67.w2", "model.layers.26.block_sparse_moe.experts.68.w2", "model.layers.26.block_sparse_moe.experts.69.w2", "model.layers.26.block_sparse_moe.experts.70.w2", "model.layers.26.block_sparse_moe.experts.71.w2", "model.layers.26.block_sparse_moe.experts.72.w2", "model.layers.26.block_sparse_moe.experts.73.w2", "model.layers.26.block_sparse_moe.experts.74.w2", "model.layers.26.block_sparse_moe.experts.75.w2", "model.layers.26.block_sparse_moe.experts.76.w2", "model.layers.26.block_sparse_moe.experts.77.w2", "model.layers.26.block_sparse_moe.experts.78.w2", "model.layers.26.block_sparse_moe.experts.79.w2", "model.layers.26.block_sparse_moe.experts.80.w2", "model.layers.26.block_sparse_moe.experts.81.w2", "model.layers.26.block_sparse_moe.experts.82.w2", "model.layers.26.block_sparse_moe.experts.83.w2", "model.layers.26.block_sparse_moe.experts.84.w2", "model.layers.26.block_sparse_moe.experts.85.w2", "model.layers.26.block_sparse_moe.experts.86.w2", "model.layers.26.block_sparse_moe.experts.87.w2", "model.layers.26.block_sparse_moe.experts.88.w2", "model.layers.26.block_sparse_moe.experts.89.w2", "model.layers.26.block_sparse_moe.experts.90.w2", "model.layers.26.block_sparse_moe.experts.91.w2", "model.layers.26.block_sparse_moe.experts.92.w2", "model.layers.26.block_sparse_moe.experts.93.w2", "model.layers.26.block_sparse_moe.experts.94.w2", "model.layers.26.block_sparse_moe.experts.95.w2", "model.layers.26.block_sparse_moe.experts.96.w2", "model.layers.26.block_sparse_moe.experts.97.w2", "model.layers.26.block_sparse_moe.experts.98.w2", "model.layers.26.block_sparse_moe.experts.99.w2", "model.layers.26.block_sparse_moe.experts.100.w2", "model.layers.26.block_sparse_moe.experts.101.w2", "model.layers.26.block_sparse_moe.experts.102.w2", "model.layers.26.block_sparse_moe.experts.103.w2", "model.layers.26.block_sparse_moe.experts.104.w2", "model.layers.26.block_sparse_moe.experts.105.w2", "model.layers.26.block_sparse_moe.experts.106.w2", "model.layers.26.block_sparse_moe.experts.107.w2", "model.layers.26.block_sparse_moe.experts.108.w2", "model.layers.26.block_sparse_moe.experts.109.w2", "model.layers.26.block_sparse_moe.experts.110.w2", "model.layers.26.block_sparse_moe.experts.111.w2", "model.layers.26.block_sparse_moe.experts.112.w2", "model.layers.26.block_sparse_moe.experts.113.w2", "model.layers.26.block_sparse_moe.experts.114.w2", "model.layers.26.block_sparse_moe.experts.115.w2", "model.layers.26.block_sparse_moe.experts.116.w2", "model.layers.26.block_sparse_moe.experts.117.w2", "model.layers.26.block_sparse_moe.experts.118.w2", "model.layers.26.block_sparse_moe.experts.119.w2", "model.layers.26.block_sparse_moe.experts.120.w2", "model.layers.26.block_sparse_moe.experts.121.w2", "model.layers.26.block_sparse_moe.experts.122.w2", "model.layers.26.block_sparse_moe.experts.123.w2", "model.layers.26.block_sparse_moe.experts.124.w2", "model.layers.26.block_sparse_moe.experts.125.w2", "model.layers.26.block_sparse_moe.experts.126.w2", "model.layers.26.block_sparse_moe.experts.127.w2", "model.layers.26.block_sparse_moe.experts.128.w2", "model.layers.26.block_sparse_moe.experts.129.w2", "model.layers.26.block_sparse_moe.experts.130.w2", "model.layers.26.block_sparse_moe.experts.131.w2", "model.layers.26.block_sparse_moe.experts.132.w2", "model.layers.26.block_sparse_moe.experts.133.w2", "model.layers.26.block_sparse_moe.experts.134.w2", "model.layers.26.block_sparse_moe.experts.135.w2", "model.layers.26.block_sparse_moe.experts.136.w2", "model.layers.26.block_sparse_moe.experts.137.w2", "model.layers.26.block_sparse_moe.experts.138.w2", "model.layers.26.block_sparse_moe.experts.139.w2", "model.layers.26.block_sparse_moe.experts.140.w2", "model.layers.26.block_sparse_moe.experts.141.w2", "model.layers.26.block_sparse_moe.experts.142.w2", "model.layers.26.block_sparse_moe.experts.143.w2", "model.layers.26.block_sparse_moe.experts.144.w2", "model.layers.26.block_sparse_moe.experts.145.w2", "model.layers.26.block_sparse_moe.experts.146.w2", "model.layers.26.block_sparse_moe.experts.147.w2", "model.layers.26.block_sparse_moe.experts.148.w2", "model.layers.26.block_sparse_moe.experts.149.w2", "model.layers.26.block_sparse_moe.experts.150.w2", "model.layers.26.block_sparse_moe.experts.151.w2", "model.layers.26.block_sparse_moe.experts.152.w2", "model.layers.26.block_sparse_moe.experts.153.w2", "model.layers.26.block_sparse_moe.experts.154.w2", "model.layers.26.block_sparse_moe.experts.155.w2", "model.layers.26.block_sparse_moe.experts.156.w2", "model.layers.26.block_sparse_moe.experts.157.w2", "model.layers.26.block_sparse_moe.experts.158.w2", "model.layers.26.block_sparse_moe.experts.159.w2", "model.layers.26.block_sparse_moe.experts.160.w2", "model.layers.26.block_sparse_moe.experts.161.w2", "model.layers.26.block_sparse_moe.experts.162.w2", "model.layers.26.block_sparse_moe.experts.163.w2", "model.layers.26.block_sparse_moe.experts.164.w2", "model.layers.26.block_sparse_moe.experts.165.w2", "model.layers.26.block_sparse_moe.experts.166.w2", "model.layers.26.block_sparse_moe.experts.167.w2", "model.layers.26.block_sparse_moe.experts.168.w2", "model.layers.26.block_sparse_moe.experts.169.w2", "model.layers.26.block_sparse_moe.experts.170.w2", "model.layers.26.block_sparse_moe.experts.171.w2", "model.layers.26.block_sparse_moe.experts.172.w2", "model.layers.26.block_sparse_moe.experts.173.w2", "model.layers.26.block_sparse_moe.experts.174.w2", "model.layers.26.block_sparse_moe.experts.175.w2", "model.layers.26.block_sparse_moe.experts.176.w2", "model.layers.26.block_sparse_moe.experts.177.w2", "model.layers.26.block_sparse_moe.experts.178.w2", "model.layers.26.block_sparse_moe.experts.179.w2", "model.layers.26.block_sparse_moe.experts.180.w2", "model.layers.26.block_sparse_moe.experts.181.w2", "model.layers.26.block_sparse_moe.experts.182.w2", "model.layers.26.block_sparse_moe.experts.183.w2", "model.layers.26.block_sparse_moe.experts.184.w2", "model.layers.26.block_sparse_moe.experts.185.w2", "model.layers.26.block_sparse_moe.experts.186.w2", "model.layers.26.block_sparse_moe.experts.187.w2", "model.layers.26.block_sparse_moe.experts.188.w2", "model.layers.26.block_sparse_moe.experts.189.w2", "model.layers.26.block_sparse_moe.experts.190.w2", "model.layers.26.block_sparse_moe.experts.191.w2", "model.layers.26.block_sparse_moe.experts.192.w2", "model.layers.26.block_sparse_moe.experts.193.w2", "model.layers.26.block_sparse_moe.experts.194.w2", "model.layers.26.block_sparse_moe.experts.195.w2", "model.layers.26.block_sparse_moe.experts.196.w2", "model.layers.26.block_sparse_moe.experts.197.w2", "model.layers.26.block_sparse_moe.experts.198.w2", "model.layers.26.block_sparse_moe.experts.199.w2", "model.layers.26.block_sparse_moe.experts.200.w2", "model.layers.26.block_sparse_moe.experts.201.w2", "model.layers.26.block_sparse_moe.experts.202.w2", "model.layers.26.block_sparse_moe.experts.203.w2", "model.layers.26.block_sparse_moe.experts.204.w2", "model.layers.26.block_sparse_moe.experts.205.w2", "model.layers.26.block_sparse_moe.experts.206.w2", "model.layers.26.block_sparse_moe.experts.207.w2", "model.layers.26.block_sparse_moe.experts.208.w2", "model.layers.26.block_sparse_moe.experts.209.w2", "model.layers.26.block_sparse_moe.experts.210.w2", "model.layers.26.block_sparse_moe.experts.211.w2", "model.layers.26.block_sparse_moe.experts.212.w2", "model.layers.26.block_sparse_moe.experts.213.w2", "model.layers.26.block_sparse_moe.experts.214.w2", "model.layers.26.block_sparse_moe.experts.215.w2", "model.layers.26.block_sparse_moe.experts.216.w2", "model.layers.26.block_sparse_moe.experts.217.w2", "model.layers.26.block_sparse_moe.experts.218.w2", "model.layers.26.block_sparse_moe.experts.219.w2", "model.layers.26.block_sparse_moe.experts.220.w2", "model.layers.26.block_sparse_moe.experts.221.w2", "model.layers.26.block_sparse_moe.experts.222.w2", "model.layers.26.block_sparse_moe.experts.223.w2", "model.layers.26.block_sparse_moe.experts.224.w2", "model.layers.26.block_sparse_moe.experts.225.w2", "model.layers.26.block_sparse_moe.experts.226.w2", "model.layers.26.block_sparse_moe.experts.227.w2", "model.layers.26.block_sparse_moe.experts.228.w2", "model.layers.26.block_sparse_moe.experts.229.w2", "model.layers.26.block_sparse_moe.experts.230.w2", "model.layers.26.block_sparse_moe.experts.231.w2", "model.layers.26.block_sparse_moe.experts.232.w2", "model.layers.26.block_sparse_moe.experts.233.w2", "model.layers.26.block_sparse_moe.experts.234.w2", "model.layers.26.block_sparse_moe.experts.235.w2", "model.layers.26.block_sparse_moe.experts.236.w2", "model.layers.26.block_sparse_moe.experts.237.w2", "model.layers.26.block_sparse_moe.experts.238.w2", "model.layers.26.block_sparse_moe.experts.239.w2", "model.layers.26.block_sparse_moe.experts.240.w2", "model.layers.26.block_sparse_moe.experts.241.w2", "model.layers.26.block_sparse_moe.experts.242.w2", "model.layers.26.block_sparse_moe.experts.243.w2", "model.layers.26.block_sparse_moe.experts.244.w2", "model.layers.26.block_sparse_moe.experts.245.w2", "model.layers.26.block_sparse_moe.experts.246.w2", "model.layers.26.block_sparse_moe.experts.247.w2", "model.layers.26.block_sparse_moe.experts.248.w2", "model.layers.26.block_sparse_moe.experts.249.w2", "model.layers.26.block_sparse_moe.experts.250.w2", "model.layers.26.block_sparse_moe.experts.251.w2", "model.layers.26.block_sparse_moe.experts.252.w2", "model.layers.26.block_sparse_moe.experts.253.w2", "model.layers.26.block_sparse_moe.experts.254.w2", "model.layers.26.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0019535452127456554, "dbits": 3623878656 } ] }, { "idx": 54, "layers": [ "model.layers.27.self_attn.q_proj", "model.layers.27.self_attn.k_proj", "model.layers.27.self_attn.v_proj", "model.layers.27.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0030271559953689575, "dbits": 44040192 } ] }, { "idx": 55, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w1", "model.layers.27.block_sparse_moe.experts.1.w1", "model.layers.27.block_sparse_moe.experts.2.w1", "model.layers.27.block_sparse_moe.experts.3.w1", "model.layers.27.block_sparse_moe.experts.4.w1", "model.layers.27.block_sparse_moe.experts.5.w1", "model.layers.27.block_sparse_moe.experts.6.w1", "model.layers.27.block_sparse_moe.experts.7.w1", "model.layers.27.block_sparse_moe.experts.8.w1", "model.layers.27.block_sparse_moe.experts.9.w1", "model.layers.27.block_sparse_moe.experts.10.w1", "model.layers.27.block_sparse_moe.experts.11.w1", "model.layers.27.block_sparse_moe.experts.12.w1", "model.layers.27.block_sparse_moe.experts.13.w1", "model.layers.27.block_sparse_moe.experts.14.w1", "model.layers.27.block_sparse_moe.experts.15.w1", "model.layers.27.block_sparse_moe.experts.16.w1", "model.layers.27.block_sparse_moe.experts.17.w1", "model.layers.27.block_sparse_moe.experts.18.w1", "model.layers.27.block_sparse_moe.experts.19.w1", "model.layers.27.block_sparse_moe.experts.20.w1", "model.layers.27.block_sparse_moe.experts.21.w1", "model.layers.27.block_sparse_moe.experts.22.w1", "model.layers.27.block_sparse_moe.experts.23.w1", "model.layers.27.block_sparse_moe.experts.24.w1", "model.layers.27.block_sparse_moe.experts.25.w1", "model.layers.27.block_sparse_moe.experts.26.w1", "model.layers.27.block_sparse_moe.experts.27.w1", "model.layers.27.block_sparse_moe.experts.28.w1", "model.layers.27.block_sparse_moe.experts.29.w1", "model.layers.27.block_sparse_moe.experts.30.w1", "model.layers.27.block_sparse_moe.experts.31.w1", "model.layers.27.block_sparse_moe.experts.32.w1", "model.layers.27.block_sparse_moe.experts.33.w1", "model.layers.27.block_sparse_moe.experts.34.w1", "model.layers.27.block_sparse_moe.experts.35.w1", "model.layers.27.block_sparse_moe.experts.36.w1", "model.layers.27.block_sparse_moe.experts.37.w1", "model.layers.27.block_sparse_moe.experts.38.w1", "model.layers.27.block_sparse_moe.experts.39.w1", "model.layers.27.block_sparse_moe.experts.40.w1", "model.layers.27.block_sparse_moe.experts.41.w1", "model.layers.27.block_sparse_moe.experts.42.w1", "model.layers.27.block_sparse_moe.experts.43.w1", "model.layers.27.block_sparse_moe.experts.44.w1", "model.layers.27.block_sparse_moe.experts.45.w1", "model.layers.27.block_sparse_moe.experts.46.w1", "model.layers.27.block_sparse_moe.experts.47.w1", "model.layers.27.block_sparse_moe.experts.48.w1", "model.layers.27.block_sparse_moe.experts.49.w1", "model.layers.27.block_sparse_moe.experts.50.w1", "model.layers.27.block_sparse_moe.experts.51.w1", "model.layers.27.block_sparse_moe.experts.52.w1", "model.layers.27.block_sparse_moe.experts.53.w1", "model.layers.27.block_sparse_moe.experts.54.w1", "model.layers.27.block_sparse_moe.experts.55.w1", "model.layers.27.block_sparse_moe.experts.56.w1", "model.layers.27.block_sparse_moe.experts.57.w1", "model.layers.27.block_sparse_moe.experts.58.w1", "model.layers.27.block_sparse_moe.experts.59.w1", "model.layers.27.block_sparse_moe.experts.60.w1", "model.layers.27.block_sparse_moe.experts.61.w1", "model.layers.27.block_sparse_moe.experts.62.w1", "model.layers.27.block_sparse_moe.experts.63.w1", "model.layers.27.block_sparse_moe.experts.64.w1", "model.layers.27.block_sparse_moe.experts.65.w1", "model.layers.27.block_sparse_moe.experts.66.w1", "model.layers.27.block_sparse_moe.experts.67.w1", "model.layers.27.block_sparse_moe.experts.68.w1", "model.layers.27.block_sparse_moe.experts.69.w1", "model.layers.27.block_sparse_moe.experts.70.w1", "model.layers.27.block_sparse_moe.experts.71.w1", "model.layers.27.block_sparse_moe.experts.72.w1", "model.layers.27.block_sparse_moe.experts.73.w1", "model.layers.27.block_sparse_moe.experts.74.w1", "model.layers.27.block_sparse_moe.experts.75.w1", "model.layers.27.block_sparse_moe.experts.76.w1", "model.layers.27.block_sparse_moe.experts.77.w1", "model.layers.27.block_sparse_moe.experts.78.w1", "model.layers.27.block_sparse_moe.experts.79.w1", "model.layers.27.block_sparse_moe.experts.80.w1", "model.layers.27.block_sparse_moe.experts.81.w1", "model.layers.27.block_sparse_moe.experts.82.w1", "model.layers.27.block_sparse_moe.experts.83.w1", "model.layers.27.block_sparse_moe.experts.84.w1", "model.layers.27.block_sparse_moe.experts.85.w1", "model.layers.27.block_sparse_moe.experts.86.w1", "model.layers.27.block_sparse_moe.experts.87.w1", "model.layers.27.block_sparse_moe.experts.88.w1", "model.layers.27.block_sparse_moe.experts.89.w1", "model.layers.27.block_sparse_moe.experts.90.w1", "model.layers.27.block_sparse_moe.experts.91.w1", "model.layers.27.block_sparse_moe.experts.92.w1", "model.layers.27.block_sparse_moe.experts.93.w1", "model.layers.27.block_sparse_moe.experts.94.w1", "model.layers.27.block_sparse_moe.experts.95.w1", "model.layers.27.block_sparse_moe.experts.96.w1", "model.layers.27.block_sparse_moe.experts.97.w1", "model.layers.27.block_sparse_moe.experts.98.w1", "model.layers.27.block_sparse_moe.experts.99.w1", "model.layers.27.block_sparse_moe.experts.100.w1", "model.layers.27.block_sparse_moe.experts.101.w1", "model.layers.27.block_sparse_moe.experts.102.w1", "model.layers.27.block_sparse_moe.experts.103.w1", "model.layers.27.block_sparse_moe.experts.104.w1", "model.layers.27.block_sparse_moe.experts.105.w1", "model.layers.27.block_sparse_moe.experts.106.w1", "model.layers.27.block_sparse_moe.experts.107.w1", "model.layers.27.block_sparse_moe.experts.108.w1", "model.layers.27.block_sparse_moe.experts.109.w1", "model.layers.27.block_sparse_moe.experts.110.w1", "model.layers.27.block_sparse_moe.experts.111.w1", "model.layers.27.block_sparse_moe.experts.112.w1", "model.layers.27.block_sparse_moe.experts.113.w1", "model.layers.27.block_sparse_moe.experts.114.w1", "model.layers.27.block_sparse_moe.experts.115.w1", "model.layers.27.block_sparse_moe.experts.116.w1", "model.layers.27.block_sparse_moe.experts.117.w1", "model.layers.27.block_sparse_moe.experts.118.w1", "model.layers.27.block_sparse_moe.experts.119.w1", "model.layers.27.block_sparse_moe.experts.120.w1", "model.layers.27.block_sparse_moe.experts.121.w1", "model.layers.27.block_sparse_moe.experts.122.w1", "model.layers.27.block_sparse_moe.experts.123.w1", "model.layers.27.block_sparse_moe.experts.124.w1", "model.layers.27.block_sparse_moe.experts.125.w1", "model.layers.27.block_sparse_moe.experts.126.w1", "model.layers.27.block_sparse_moe.experts.127.w1", "model.layers.27.block_sparse_moe.experts.128.w1", "model.layers.27.block_sparse_moe.experts.129.w1", "model.layers.27.block_sparse_moe.experts.130.w1", "model.layers.27.block_sparse_moe.experts.131.w1", "model.layers.27.block_sparse_moe.experts.132.w1", "model.layers.27.block_sparse_moe.experts.133.w1", "model.layers.27.block_sparse_moe.experts.134.w1", "model.layers.27.block_sparse_moe.experts.135.w1", "model.layers.27.block_sparse_moe.experts.136.w1", "model.layers.27.block_sparse_moe.experts.137.w1", "model.layers.27.block_sparse_moe.experts.138.w1", "model.layers.27.block_sparse_moe.experts.139.w1", "model.layers.27.block_sparse_moe.experts.140.w1", "model.layers.27.block_sparse_moe.experts.141.w1", "model.layers.27.block_sparse_moe.experts.142.w1", "model.layers.27.block_sparse_moe.experts.143.w1", "model.layers.27.block_sparse_moe.experts.144.w1", "model.layers.27.block_sparse_moe.experts.145.w1", "model.layers.27.block_sparse_moe.experts.146.w1", "model.layers.27.block_sparse_moe.experts.147.w1", "model.layers.27.block_sparse_moe.experts.148.w1", "model.layers.27.block_sparse_moe.experts.149.w1", "model.layers.27.block_sparse_moe.experts.150.w1", "model.layers.27.block_sparse_moe.experts.151.w1", "model.layers.27.block_sparse_moe.experts.152.w1", "model.layers.27.block_sparse_moe.experts.153.w1", "model.layers.27.block_sparse_moe.experts.154.w1", "model.layers.27.block_sparse_moe.experts.155.w1", "model.layers.27.block_sparse_moe.experts.156.w1", "model.layers.27.block_sparse_moe.experts.157.w1", "model.layers.27.block_sparse_moe.experts.158.w1", "model.layers.27.block_sparse_moe.experts.159.w1", "model.layers.27.block_sparse_moe.experts.160.w1", "model.layers.27.block_sparse_moe.experts.161.w1", "model.layers.27.block_sparse_moe.experts.162.w1", "model.layers.27.block_sparse_moe.experts.163.w1", "model.layers.27.block_sparse_moe.experts.164.w1", "model.layers.27.block_sparse_moe.experts.165.w1", "model.layers.27.block_sparse_moe.experts.166.w1", "model.layers.27.block_sparse_moe.experts.167.w1", "model.layers.27.block_sparse_moe.experts.168.w1", "model.layers.27.block_sparse_moe.experts.169.w1", "model.layers.27.block_sparse_moe.experts.170.w1", "model.layers.27.block_sparse_moe.experts.171.w1", "model.layers.27.block_sparse_moe.experts.172.w1", "model.layers.27.block_sparse_moe.experts.173.w1", "model.layers.27.block_sparse_moe.experts.174.w1", "model.layers.27.block_sparse_moe.experts.175.w1", "model.layers.27.block_sparse_moe.experts.176.w1", "model.layers.27.block_sparse_moe.experts.177.w1", "model.layers.27.block_sparse_moe.experts.178.w1", "model.layers.27.block_sparse_moe.experts.179.w1", "model.layers.27.block_sparse_moe.experts.180.w1", "model.layers.27.block_sparse_moe.experts.181.w1", "model.layers.27.block_sparse_moe.experts.182.w1", "model.layers.27.block_sparse_moe.experts.183.w1", "model.layers.27.block_sparse_moe.experts.184.w1", "model.layers.27.block_sparse_moe.experts.185.w1", "model.layers.27.block_sparse_moe.experts.186.w1", "model.layers.27.block_sparse_moe.experts.187.w1", "model.layers.27.block_sparse_moe.experts.188.w1", "model.layers.27.block_sparse_moe.experts.189.w1", "model.layers.27.block_sparse_moe.experts.190.w1", "model.layers.27.block_sparse_moe.experts.191.w1", "model.layers.27.block_sparse_moe.experts.192.w1", "model.layers.27.block_sparse_moe.experts.193.w1", "model.layers.27.block_sparse_moe.experts.194.w1", "model.layers.27.block_sparse_moe.experts.195.w1", "model.layers.27.block_sparse_moe.experts.196.w1", "model.layers.27.block_sparse_moe.experts.197.w1", "model.layers.27.block_sparse_moe.experts.198.w1", "model.layers.27.block_sparse_moe.experts.199.w1", "model.layers.27.block_sparse_moe.experts.200.w1", "model.layers.27.block_sparse_moe.experts.201.w1", "model.layers.27.block_sparse_moe.experts.202.w1", "model.layers.27.block_sparse_moe.experts.203.w1", "model.layers.27.block_sparse_moe.experts.204.w1", "model.layers.27.block_sparse_moe.experts.205.w1", "model.layers.27.block_sparse_moe.experts.206.w1", "model.layers.27.block_sparse_moe.experts.207.w1", "model.layers.27.block_sparse_moe.experts.208.w1", "model.layers.27.block_sparse_moe.experts.209.w1", "model.layers.27.block_sparse_moe.experts.210.w1", "model.layers.27.block_sparse_moe.experts.211.w1", "model.layers.27.block_sparse_moe.experts.212.w1", "model.layers.27.block_sparse_moe.experts.213.w1", "model.layers.27.block_sparse_moe.experts.214.w1", "model.layers.27.block_sparse_moe.experts.215.w1", "model.layers.27.block_sparse_moe.experts.216.w1", "model.layers.27.block_sparse_moe.experts.217.w1", "model.layers.27.block_sparse_moe.experts.218.w1", "model.layers.27.block_sparse_moe.experts.219.w1", "model.layers.27.block_sparse_moe.experts.220.w1", "model.layers.27.block_sparse_moe.experts.221.w1", "model.layers.27.block_sparse_moe.experts.222.w1", "model.layers.27.block_sparse_moe.experts.223.w1", "model.layers.27.block_sparse_moe.experts.224.w1", "model.layers.27.block_sparse_moe.experts.225.w1", "model.layers.27.block_sparse_moe.experts.226.w1", "model.layers.27.block_sparse_moe.experts.227.w1", "model.layers.27.block_sparse_moe.experts.228.w1", "model.layers.27.block_sparse_moe.experts.229.w1", "model.layers.27.block_sparse_moe.experts.230.w1", "model.layers.27.block_sparse_moe.experts.231.w1", "model.layers.27.block_sparse_moe.experts.232.w1", "model.layers.27.block_sparse_moe.experts.233.w1", "model.layers.27.block_sparse_moe.experts.234.w1", "model.layers.27.block_sparse_moe.experts.235.w1", "model.layers.27.block_sparse_moe.experts.236.w1", "model.layers.27.block_sparse_moe.experts.237.w1", "model.layers.27.block_sparse_moe.experts.238.w1", "model.layers.27.block_sparse_moe.experts.239.w1", "model.layers.27.block_sparse_moe.experts.240.w1", "model.layers.27.block_sparse_moe.experts.241.w1", "model.layers.27.block_sparse_moe.experts.242.w1", "model.layers.27.block_sparse_moe.experts.243.w1", "model.layers.27.block_sparse_moe.experts.244.w1", "model.layers.27.block_sparse_moe.experts.245.w1", "model.layers.27.block_sparse_moe.experts.246.w1", "model.layers.27.block_sparse_moe.experts.247.w1", "model.layers.27.block_sparse_moe.experts.248.w1", "model.layers.27.block_sparse_moe.experts.249.w1", "model.layers.27.block_sparse_moe.experts.250.w1", "model.layers.27.block_sparse_moe.experts.251.w1", "model.layers.27.block_sparse_moe.experts.252.w1", "model.layers.27.block_sparse_moe.experts.253.w1", "model.layers.27.block_sparse_moe.experts.254.w1", "model.layers.27.block_sparse_moe.experts.255.w1", "model.layers.27.block_sparse_moe.experts.0.w3", "model.layers.27.block_sparse_moe.experts.1.w3", "model.layers.27.block_sparse_moe.experts.2.w3", "model.layers.27.block_sparse_moe.experts.3.w3", "model.layers.27.block_sparse_moe.experts.4.w3", "model.layers.27.block_sparse_moe.experts.5.w3", "model.layers.27.block_sparse_moe.experts.6.w3", "model.layers.27.block_sparse_moe.experts.7.w3", "model.layers.27.block_sparse_moe.experts.8.w3", "model.layers.27.block_sparse_moe.experts.9.w3", "model.layers.27.block_sparse_moe.experts.10.w3", "model.layers.27.block_sparse_moe.experts.11.w3", "model.layers.27.block_sparse_moe.experts.12.w3", "model.layers.27.block_sparse_moe.experts.13.w3", "model.layers.27.block_sparse_moe.experts.14.w3", "model.layers.27.block_sparse_moe.experts.15.w3", "model.layers.27.block_sparse_moe.experts.16.w3", "model.layers.27.block_sparse_moe.experts.17.w3", "model.layers.27.block_sparse_moe.experts.18.w3", "model.layers.27.block_sparse_moe.experts.19.w3", "model.layers.27.block_sparse_moe.experts.20.w3", "model.layers.27.block_sparse_moe.experts.21.w3", "model.layers.27.block_sparse_moe.experts.22.w3", "model.layers.27.block_sparse_moe.experts.23.w3", "model.layers.27.block_sparse_moe.experts.24.w3", "model.layers.27.block_sparse_moe.experts.25.w3", "model.layers.27.block_sparse_moe.experts.26.w3", "model.layers.27.block_sparse_moe.experts.27.w3", "model.layers.27.block_sparse_moe.experts.28.w3", "model.layers.27.block_sparse_moe.experts.29.w3", "model.layers.27.block_sparse_moe.experts.30.w3", "model.layers.27.block_sparse_moe.experts.31.w3", "model.layers.27.block_sparse_moe.experts.32.w3", "model.layers.27.block_sparse_moe.experts.33.w3", "model.layers.27.block_sparse_moe.experts.34.w3", "model.layers.27.block_sparse_moe.experts.35.w3", "model.layers.27.block_sparse_moe.experts.36.w3", "model.layers.27.block_sparse_moe.experts.37.w3", "model.layers.27.block_sparse_moe.experts.38.w3", "model.layers.27.block_sparse_moe.experts.39.w3", "model.layers.27.block_sparse_moe.experts.40.w3", "model.layers.27.block_sparse_moe.experts.41.w3", "model.layers.27.block_sparse_moe.experts.42.w3", "model.layers.27.block_sparse_moe.experts.43.w3", "model.layers.27.block_sparse_moe.experts.44.w3", "model.layers.27.block_sparse_moe.experts.45.w3", "model.layers.27.block_sparse_moe.experts.46.w3", "model.layers.27.block_sparse_moe.experts.47.w3", "model.layers.27.block_sparse_moe.experts.48.w3", "model.layers.27.block_sparse_moe.experts.49.w3", "model.layers.27.block_sparse_moe.experts.50.w3", "model.layers.27.block_sparse_moe.experts.51.w3", "model.layers.27.block_sparse_moe.experts.52.w3", "model.layers.27.block_sparse_moe.experts.53.w3", "model.layers.27.block_sparse_moe.experts.54.w3", "model.layers.27.block_sparse_moe.experts.55.w3", "model.layers.27.block_sparse_moe.experts.56.w3", "model.layers.27.block_sparse_moe.experts.57.w3", "model.layers.27.block_sparse_moe.experts.58.w3", "model.layers.27.block_sparse_moe.experts.59.w3", "model.layers.27.block_sparse_moe.experts.60.w3", "model.layers.27.block_sparse_moe.experts.61.w3", "model.layers.27.block_sparse_moe.experts.62.w3", "model.layers.27.block_sparse_moe.experts.63.w3", "model.layers.27.block_sparse_moe.experts.64.w3", "model.layers.27.block_sparse_moe.experts.65.w3", "model.layers.27.block_sparse_moe.experts.66.w3", "model.layers.27.block_sparse_moe.experts.67.w3", "model.layers.27.block_sparse_moe.experts.68.w3", "model.layers.27.block_sparse_moe.experts.69.w3", "model.layers.27.block_sparse_moe.experts.70.w3", "model.layers.27.block_sparse_moe.experts.71.w3", "model.layers.27.block_sparse_moe.experts.72.w3", "model.layers.27.block_sparse_moe.experts.73.w3", "model.layers.27.block_sparse_moe.experts.74.w3", "model.layers.27.block_sparse_moe.experts.75.w3", "model.layers.27.block_sparse_moe.experts.76.w3", "model.layers.27.block_sparse_moe.experts.77.w3", "model.layers.27.block_sparse_moe.experts.78.w3", "model.layers.27.block_sparse_moe.experts.79.w3", "model.layers.27.block_sparse_moe.experts.80.w3", "model.layers.27.block_sparse_moe.experts.81.w3", "model.layers.27.block_sparse_moe.experts.82.w3", "model.layers.27.block_sparse_moe.experts.83.w3", "model.layers.27.block_sparse_moe.experts.84.w3", "model.layers.27.block_sparse_moe.experts.85.w3", "model.layers.27.block_sparse_moe.experts.86.w3", "model.layers.27.block_sparse_moe.experts.87.w3", "model.layers.27.block_sparse_moe.experts.88.w3", "model.layers.27.block_sparse_moe.experts.89.w3", "model.layers.27.block_sparse_moe.experts.90.w3", "model.layers.27.block_sparse_moe.experts.91.w3", "model.layers.27.block_sparse_moe.experts.92.w3", "model.layers.27.block_sparse_moe.experts.93.w3", "model.layers.27.block_sparse_moe.experts.94.w3", "model.layers.27.block_sparse_moe.experts.95.w3", "model.layers.27.block_sparse_moe.experts.96.w3", "model.layers.27.block_sparse_moe.experts.97.w3", "model.layers.27.block_sparse_moe.experts.98.w3", "model.layers.27.block_sparse_moe.experts.99.w3", "model.layers.27.block_sparse_moe.experts.100.w3", "model.layers.27.block_sparse_moe.experts.101.w3", "model.layers.27.block_sparse_moe.experts.102.w3", "model.layers.27.block_sparse_moe.experts.103.w3", "model.layers.27.block_sparse_moe.experts.104.w3", "model.layers.27.block_sparse_moe.experts.105.w3", "model.layers.27.block_sparse_moe.experts.106.w3", "model.layers.27.block_sparse_moe.experts.107.w3", "model.layers.27.block_sparse_moe.experts.108.w3", "model.layers.27.block_sparse_moe.experts.109.w3", "model.layers.27.block_sparse_moe.experts.110.w3", "model.layers.27.block_sparse_moe.experts.111.w3", "model.layers.27.block_sparse_moe.experts.112.w3", "model.layers.27.block_sparse_moe.experts.113.w3", "model.layers.27.block_sparse_moe.experts.114.w3", "model.layers.27.block_sparse_moe.experts.115.w3", "model.layers.27.block_sparse_moe.experts.116.w3", "model.layers.27.block_sparse_moe.experts.117.w3", "model.layers.27.block_sparse_moe.experts.118.w3", "model.layers.27.block_sparse_moe.experts.119.w3", "model.layers.27.block_sparse_moe.experts.120.w3", "model.layers.27.block_sparse_moe.experts.121.w3", "model.layers.27.block_sparse_moe.experts.122.w3", "model.layers.27.block_sparse_moe.experts.123.w3", "model.layers.27.block_sparse_moe.experts.124.w3", "model.layers.27.block_sparse_moe.experts.125.w3", "model.layers.27.block_sparse_moe.experts.126.w3", "model.layers.27.block_sparse_moe.experts.127.w3", "model.layers.27.block_sparse_moe.experts.128.w3", "model.layers.27.block_sparse_moe.experts.129.w3", "model.layers.27.block_sparse_moe.experts.130.w3", "model.layers.27.block_sparse_moe.experts.131.w3", "model.layers.27.block_sparse_moe.experts.132.w3", "model.layers.27.block_sparse_moe.experts.133.w3", "model.layers.27.block_sparse_moe.experts.134.w3", "model.layers.27.block_sparse_moe.experts.135.w3", "model.layers.27.block_sparse_moe.experts.136.w3", "model.layers.27.block_sparse_moe.experts.137.w3", "model.layers.27.block_sparse_moe.experts.138.w3", "model.layers.27.block_sparse_moe.experts.139.w3", "model.layers.27.block_sparse_moe.experts.140.w3", "model.layers.27.block_sparse_moe.experts.141.w3", "model.layers.27.block_sparse_moe.experts.142.w3", "model.layers.27.block_sparse_moe.experts.143.w3", "model.layers.27.block_sparse_moe.experts.144.w3", "model.layers.27.block_sparse_moe.experts.145.w3", "model.layers.27.block_sparse_moe.experts.146.w3", "model.layers.27.block_sparse_moe.experts.147.w3", "model.layers.27.block_sparse_moe.experts.148.w3", "model.layers.27.block_sparse_moe.experts.149.w3", "model.layers.27.block_sparse_moe.experts.150.w3", "model.layers.27.block_sparse_moe.experts.151.w3", "model.layers.27.block_sparse_moe.experts.152.w3", "model.layers.27.block_sparse_moe.experts.153.w3", "model.layers.27.block_sparse_moe.experts.154.w3", "model.layers.27.block_sparse_moe.experts.155.w3", "model.layers.27.block_sparse_moe.experts.156.w3", "model.layers.27.block_sparse_moe.experts.157.w3", "model.layers.27.block_sparse_moe.experts.158.w3", "model.layers.27.block_sparse_moe.experts.159.w3", "model.layers.27.block_sparse_moe.experts.160.w3", "model.layers.27.block_sparse_moe.experts.161.w3", "model.layers.27.block_sparse_moe.experts.162.w3", "model.layers.27.block_sparse_moe.experts.163.w3", "model.layers.27.block_sparse_moe.experts.164.w3", "model.layers.27.block_sparse_moe.experts.165.w3", "model.layers.27.block_sparse_moe.experts.166.w3", "model.layers.27.block_sparse_moe.experts.167.w3", "model.layers.27.block_sparse_moe.experts.168.w3", "model.layers.27.block_sparse_moe.experts.169.w3", "model.layers.27.block_sparse_moe.experts.170.w3", "model.layers.27.block_sparse_moe.experts.171.w3", "model.layers.27.block_sparse_moe.experts.172.w3", "model.layers.27.block_sparse_moe.experts.173.w3", "model.layers.27.block_sparse_moe.experts.174.w3", "model.layers.27.block_sparse_moe.experts.175.w3", "model.layers.27.block_sparse_moe.experts.176.w3", "model.layers.27.block_sparse_moe.experts.177.w3", "model.layers.27.block_sparse_moe.experts.178.w3", "model.layers.27.block_sparse_moe.experts.179.w3", "model.layers.27.block_sparse_moe.experts.180.w3", "model.layers.27.block_sparse_moe.experts.181.w3", "model.layers.27.block_sparse_moe.experts.182.w3", "model.layers.27.block_sparse_moe.experts.183.w3", "model.layers.27.block_sparse_moe.experts.184.w3", "model.layers.27.block_sparse_moe.experts.185.w3", "model.layers.27.block_sparse_moe.experts.186.w3", "model.layers.27.block_sparse_moe.experts.187.w3", "model.layers.27.block_sparse_moe.experts.188.w3", "model.layers.27.block_sparse_moe.experts.189.w3", "model.layers.27.block_sparse_moe.experts.190.w3", "model.layers.27.block_sparse_moe.experts.191.w3", "model.layers.27.block_sparse_moe.experts.192.w3", "model.layers.27.block_sparse_moe.experts.193.w3", "model.layers.27.block_sparse_moe.experts.194.w3", "model.layers.27.block_sparse_moe.experts.195.w3", "model.layers.27.block_sparse_moe.experts.196.w3", "model.layers.27.block_sparse_moe.experts.197.w3", "model.layers.27.block_sparse_moe.experts.198.w3", "model.layers.27.block_sparse_moe.experts.199.w3", "model.layers.27.block_sparse_moe.experts.200.w3", "model.layers.27.block_sparse_moe.experts.201.w3", "model.layers.27.block_sparse_moe.experts.202.w3", "model.layers.27.block_sparse_moe.experts.203.w3", "model.layers.27.block_sparse_moe.experts.204.w3", "model.layers.27.block_sparse_moe.experts.205.w3", "model.layers.27.block_sparse_moe.experts.206.w3", "model.layers.27.block_sparse_moe.experts.207.w3", "model.layers.27.block_sparse_moe.experts.208.w3", "model.layers.27.block_sparse_moe.experts.209.w3", "model.layers.27.block_sparse_moe.experts.210.w3", "model.layers.27.block_sparse_moe.experts.211.w3", "model.layers.27.block_sparse_moe.experts.212.w3", "model.layers.27.block_sparse_moe.experts.213.w3", "model.layers.27.block_sparse_moe.experts.214.w3", "model.layers.27.block_sparse_moe.experts.215.w3", "model.layers.27.block_sparse_moe.experts.216.w3", "model.layers.27.block_sparse_moe.experts.217.w3", "model.layers.27.block_sparse_moe.experts.218.w3", "model.layers.27.block_sparse_moe.experts.219.w3", "model.layers.27.block_sparse_moe.experts.220.w3", "model.layers.27.block_sparse_moe.experts.221.w3", "model.layers.27.block_sparse_moe.experts.222.w3", "model.layers.27.block_sparse_moe.experts.223.w3", "model.layers.27.block_sparse_moe.experts.224.w3", "model.layers.27.block_sparse_moe.experts.225.w3", "model.layers.27.block_sparse_moe.experts.226.w3", "model.layers.27.block_sparse_moe.experts.227.w3", "model.layers.27.block_sparse_moe.experts.228.w3", "model.layers.27.block_sparse_moe.experts.229.w3", "model.layers.27.block_sparse_moe.experts.230.w3", "model.layers.27.block_sparse_moe.experts.231.w3", "model.layers.27.block_sparse_moe.experts.232.w3", "model.layers.27.block_sparse_moe.experts.233.w3", "model.layers.27.block_sparse_moe.experts.234.w3", "model.layers.27.block_sparse_moe.experts.235.w3", "model.layers.27.block_sparse_moe.experts.236.w3", "model.layers.27.block_sparse_moe.experts.237.w3", "model.layers.27.block_sparse_moe.experts.238.w3", "model.layers.27.block_sparse_moe.experts.239.w3", "model.layers.27.block_sparse_moe.experts.240.w3", "model.layers.27.block_sparse_moe.experts.241.w3", "model.layers.27.block_sparse_moe.experts.242.w3", "model.layers.27.block_sparse_moe.experts.243.w3", "model.layers.27.block_sparse_moe.experts.244.w3", "model.layers.27.block_sparse_moe.experts.245.w3", "model.layers.27.block_sparse_moe.experts.246.w3", "model.layers.27.block_sparse_moe.experts.247.w3", "model.layers.27.block_sparse_moe.experts.248.w3", "model.layers.27.block_sparse_moe.experts.249.w3", "model.layers.27.block_sparse_moe.experts.250.w3", "model.layers.27.block_sparse_moe.experts.251.w3", "model.layers.27.block_sparse_moe.experts.252.w3", "model.layers.27.block_sparse_moe.experts.253.w3", "model.layers.27.block_sparse_moe.experts.254.w3", "model.layers.27.block_sparse_moe.experts.255.w3", "model.layers.27.block_sparse_moe.experts.0.w2", "model.layers.27.block_sparse_moe.experts.1.w2", "model.layers.27.block_sparse_moe.experts.2.w2", "model.layers.27.block_sparse_moe.experts.3.w2", "model.layers.27.block_sparse_moe.experts.4.w2", "model.layers.27.block_sparse_moe.experts.5.w2", "model.layers.27.block_sparse_moe.experts.6.w2", "model.layers.27.block_sparse_moe.experts.7.w2", "model.layers.27.block_sparse_moe.experts.8.w2", "model.layers.27.block_sparse_moe.experts.9.w2", "model.layers.27.block_sparse_moe.experts.10.w2", "model.layers.27.block_sparse_moe.experts.11.w2", "model.layers.27.block_sparse_moe.experts.12.w2", "model.layers.27.block_sparse_moe.experts.13.w2", "model.layers.27.block_sparse_moe.experts.14.w2", "model.layers.27.block_sparse_moe.experts.15.w2", "model.layers.27.block_sparse_moe.experts.16.w2", "model.layers.27.block_sparse_moe.experts.17.w2", "model.layers.27.block_sparse_moe.experts.18.w2", "model.layers.27.block_sparse_moe.experts.19.w2", "model.layers.27.block_sparse_moe.experts.20.w2", "model.layers.27.block_sparse_moe.experts.21.w2", "model.layers.27.block_sparse_moe.experts.22.w2", "model.layers.27.block_sparse_moe.experts.23.w2", "model.layers.27.block_sparse_moe.experts.24.w2", "model.layers.27.block_sparse_moe.experts.25.w2", "model.layers.27.block_sparse_moe.experts.26.w2", "model.layers.27.block_sparse_moe.experts.27.w2", "model.layers.27.block_sparse_moe.experts.28.w2", "model.layers.27.block_sparse_moe.experts.29.w2", "model.layers.27.block_sparse_moe.experts.30.w2", "model.layers.27.block_sparse_moe.experts.31.w2", "model.layers.27.block_sparse_moe.experts.32.w2", "model.layers.27.block_sparse_moe.experts.33.w2", "model.layers.27.block_sparse_moe.experts.34.w2", "model.layers.27.block_sparse_moe.experts.35.w2", "model.layers.27.block_sparse_moe.experts.36.w2", "model.layers.27.block_sparse_moe.experts.37.w2", "model.layers.27.block_sparse_moe.experts.38.w2", "model.layers.27.block_sparse_moe.experts.39.w2", "model.layers.27.block_sparse_moe.experts.40.w2", "model.layers.27.block_sparse_moe.experts.41.w2", "model.layers.27.block_sparse_moe.experts.42.w2", "model.layers.27.block_sparse_moe.experts.43.w2", "model.layers.27.block_sparse_moe.experts.44.w2", "model.layers.27.block_sparse_moe.experts.45.w2", "model.layers.27.block_sparse_moe.experts.46.w2", "model.layers.27.block_sparse_moe.experts.47.w2", "model.layers.27.block_sparse_moe.experts.48.w2", "model.layers.27.block_sparse_moe.experts.49.w2", "model.layers.27.block_sparse_moe.experts.50.w2", "model.layers.27.block_sparse_moe.experts.51.w2", "model.layers.27.block_sparse_moe.experts.52.w2", "model.layers.27.block_sparse_moe.experts.53.w2", "model.layers.27.block_sparse_moe.experts.54.w2", "model.layers.27.block_sparse_moe.experts.55.w2", "model.layers.27.block_sparse_moe.experts.56.w2", "model.layers.27.block_sparse_moe.experts.57.w2", "model.layers.27.block_sparse_moe.experts.58.w2", "model.layers.27.block_sparse_moe.experts.59.w2", "model.layers.27.block_sparse_moe.experts.60.w2", "model.layers.27.block_sparse_moe.experts.61.w2", "model.layers.27.block_sparse_moe.experts.62.w2", "model.layers.27.block_sparse_moe.experts.63.w2", "model.layers.27.block_sparse_moe.experts.64.w2", "model.layers.27.block_sparse_moe.experts.65.w2", "model.layers.27.block_sparse_moe.experts.66.w2", "model.layers.27.block_sparse_moe.experts.67.w2", "model.layers.27.block_sparse_moe.experts.68.w2", "model.layers.27.block_sparse_moe.experts.69.w2", "model.layers.27.block_sparse_moe.experts.70.w2", "model.layers.27.block_sparse_moe.experts.71.w2", "model.layers.27.block_sparse_moe.experts.72.w2", "model.layers.27.block_sparse_moe.experts.73.w2", "model.layers.27.block_sparse_moe.experts.74.w2", "model.layers.27.block_sparse_moe.experts.75.w2", "model.layers.27.block_sparse_moe.experts.76.w2", "model.layers.27.block_sparse_moe.experts.77.w2", "model.layers.27.block_sparse_moe.experts.78.w2", "model.layers.27.block_sparse_moe.experts.79.w2", "model.layers.27.block_sparse_moe.experts.80.w2", "model.layers.27.block_sparse_moe.experts.81.w2", "model.layers.27.block_sparse_moe.experts.82.w2", "model.layers.27.block_sparse_moe.experts.83.w2", "model.layers.27.block_sparse_moe.experts.84.w2", "model.layers.27.block_sparse_moe.experts.85.w2", "model.layers.27.block_sparse_moe.experts.86.w2", "model.layers.27.block_sparse_moe.experts.87.w2", "model.layers.27.block_sparse_moe.experts.88.w2", "model.layers.27.block_sparse_moe.experts.89.w2", "model.layers.27.block_sparse_moe.experts.90.w2", "model.layers.27.block_sparse_moe.experts.91.w2", "model.layers.27.block_sparse_moe.experts.92.w2", "model.layers.27.block_sparse_moe.experts.93.w2", "model.layers.27.block_sparse_moe.experts.94.w2", "model.layers.27.block_sparse_moe.experts.95.w2", "model.layers.27.block_sparse_moe.experts.96.w2", "model.layers.27.block_sparse_moe.experts.97.w2", "model.layers.27.block_sparse_moe.experts.98.w2", "model.layers.27.block_sparse_moe.experts.99.w2", "model.layers.27.block_sparse_moe.experts.100.w2", "model.layers.27.block_sparse_moe.experts.101.w2", "model.layers.27.block_sparse_moe.experts.102.w2", "model.layers.27.block_sparse_moe.experts.103.w2", "model.layers.27.block_sparse_moe.experts.104.w2", "model.layers.27.block_sparse_moe.experts.105.w2", "model.layers.27.block_sparse_moe.experts.106.w2", "model.layers.27.block_sparse_moe.experts.107.w2", "model.layers.27.block_sparse_moe.experts.108.w2", "model.layers.27.block_sparse_moe.experts.109.w2", "model.layers.27.block_sparse_moe.experts.110.w2", "model.layers.27.block_sparse_moe.experts.111.w2", "model.layers.27.block_sparse_moe.experts.112.w2", "model.layers.27.block_sparse_moe.experts.113.w2", "model.layers.27.block_sparse_moe.experts.114.w2", "model.layers.27.block_sparse_moe.experts.115.w2", "model.layers.27.block_sparse_moe.experts.116.w2", "model.layers.27.block_sparse_moe.experts.117.w2", "model.layers.27.block_sparse_moe.experts.118.w2", "model.layers.27.block_sparse_moe.experts.119.w2", "model.layers.27.block_sparse_moe.experts.120.w2", "model.layers.27.block_sparse_moe.experts.121.w2", "model.layers.27.block_sparse_moe.experts.122.w2", "model.layers.27.block_sparse_moe.experts.123.w2", "model.layers.27.block_sparse_moe.experts.124.w2", "model.layers.27.block_sparse_moe.experts.125.w2", "model.layers.27.block_sparse_moe.experts.126.w2", "model.layers.27.block_sparse_moe.experts.127.w2", "model.layers.27.block_sparse_moe.experts.128.w2", "model.layers.27.block_sparse_moe.experts.129.w2", "model.layers.27.block_sparse_moe.experts.130.w2", "model.layers.27.block_sparse_moe.experts.131.w2", "model.layers.27.block_sparse_moe.experts.132.w2", "model.layers.27.block_sparse_moe.experts.133.w2", "model.layers.27.block_sparse_moe.experts.134.w2", "model.layers.27.block_sparse_moe.experts.135.w2", "model.layers.27.block_sparse_moe.experts.136.w2", "model.layers.27.block_sparse_moe.experts.137.w2", "model.layers.27.block_sparse_moe.experts.138.w2", "model.layers.27.block_sparse_moe.experts.139.w2", "model.layers.27.block_sparse_moe.experts.140.w2", "model.layers.27.block_sparse_moe.experts.141.w2", "model.layers.27.block_sparse_moe.experts.142.w2", "model.layers.27.block_sparse_moe.experts.143.w2", "model.layers.27.block_sparse_moe.experts.144.w2", "model.layers.27.block_sparse_moe.experts.145.w2", "model.layers.27.block_sparse_moe.experts.146.w2", "model.layers.27.block_sparse_moe.experts.147.w2", "model.layers.27.block_sparse_moe.experts.148.w2", "model.layers.27.block_sparse_moe.experts.149.w2", "model.layers.27.block_sparse_moe.experts.150.w2", "model.layers.27.block_sparse_moe.experts.151.w2", "model.layers.27.block_sparse_moe.experts.152.w2", "model.layers.27.block_sparse_moe.experts.153.w2", "model.layers.27.block_sparse_moe.experts.154.w2", "model.layers.27.block_sparse_moe.experts.155.w2", "model.layers.27.block_sparse_moe.experts.156.w2", "model.layers.27.block_sparse_moe.experts.157.w2", "model.layers.27.block_sparse_moe.experts.158.w2", "model.layers.27.block_sparse_moe.experts.159.w2", "model.layers.27.block_sparse_moe.experts.160.w2", "model.layers.27.block_sparse_moe.experts.161.w2", "model.layers.27.block_sparse_moe.experts.162.w2", "model.layers.27.block_sparse_moe.experts.163.w2", "model.layers.27.block_sparse_moe.experts.164.w2", "model.layers.27.block_sparse_moe.experts.165.w2", "model.layers.27.block_sparse_moe.experts.166.w2", "model.layers.27.block_sparse_moe.experts.167.w2", "model.layers.27.block_sparse_moe.experts.168.w2", "model.layers.27.block_sparse_moe.experts.169.w2", "model.layers.27.block_sparse_moe.experts.170.w2", "model.layers.27.block_sparse_moe.experts.171.w2", "model.layers.27.block_sparse_moe.experts.172.w2", "model.layers.27.block_sparse_moe.experts.173.w2", "model.layers.27.block_sparse_moe.experts.174.w2", "model.layers.27.block_sparse_moe.experts.175.w2", "model.layers.27.block_sparse_moe.experts.176.w2", "model.layers.27.block_sparse_moe.experts.177.w2", "model.layers.27.block_sparse_moe.experts.178.w2", "model.layers.27.block_sparse_moe.experts.179.w2", "model.layers.27.block_sparse_moe.experts.180.w2", "model.layers.27.block_sparse_moe.experts.181.w2", "model.layers.27.block_sparse_moe.experts.182.w2", "model.layers.27.block_sparse_moe.experts.183.w2", "model.layers.27.block_sparse_moe.experts.184.w2", "model.layers.27.block_sparse_moe.experts.185.w2", "model.layers.27.block_sparse_moe.experts.186.w2", "model.layers.27.block_sparse_moe.experts.187.w2", "model.layers.27.block_sparse_moe.experts.188.w2", "model.layers.27.block_sparse_moe.experts.189.w2", "model.layers.27.block_sparse_moe.experts.190.w2", "model.layers.27.block_sparse_moe.experts.191.w2", "model.layers.27.block_sparse_moe.experts.192.w2", "model.layers.27.block_sparse_moe.experts.193.w2", "model.layers.27.block_sparse_moe.experts.194.w2", "model.layers.27.block_sparse_moe.experts.195.w2", "model.layers.27.block_sparse_moe.experts.196.w2", "model.layers.27.block_sparse_moe.experts.197.w2", "model.layers.27.block_sparse_moe.experts.198.w2", "model.layers.27.block_sparse_moe.experts.199.w2", "model.layers.27.block_sparse_moe.experts.200.w2", "model.layers.27.block_sparse_moe.experts.201.w2", "model.layers.27.block_sparse_moe.experts.202.w2", "model.layers.27.block_sparse_moe.experts.203.w2", "model.layers.27.block_sparse_moe.experts.204.w2", "model.layers.27.block_sparse_moe.experts.205.w2", "model.layers.27.block_sparse_moe.experts.206.w2", "model.layers.27.block_sparse_moe.experts.207.w2", "model.layers.27.block_sparse_moe.experts.208.w2", "model.layers.27.block_sparse_moe.experts.209.w2", "model.layers.27.block_sparse_moe.experts.210.w2", "model.layers.27.block_sparse_moe.experts.211.w2", "model.layers.27.block_sparse_moe.experts.212.w2", "model.layers.27.block_sparse_moe.experts.213.w2", "model.layers.27.block_sparse_moe.experts.214.w2", "model.layers.27.block_sparse_moe.experts.215.w2", "model.layers.27.block_sparse_moe.experts.216.w2", "model.layers.27.block_sparse_moe.experts.217.w2", "model.layers.27.block_sparse_moe.experts.218.w2", "model.layers.27.block_sparse_moe.experts.219.w2", "model.layers.27.block_sparse_moe.experts.220.w2", "model.layers.27.block_sparse_moe.experts.221.w2", "model.layers.27.block_sparse_moe.experts.222.w2", "model.layers.27.block_sparse_moe.experts.223.w2", "model.layers.27.block_sparse_moe.experts.224.w2", "model.layers.27.block_sparse_moe.experts.225.w2", "model.layers.27.block_sparse_moe.experts.226.w2", "model.layers.27.block_sparse_moe.experts.227.w2", "model.layers.27.block_sparse_moe.experts.228.w2", "model.layers.27.block_sparse_moe.experts.229.w2", "model.layers.27.block_sparse_moe.experts.230.w2", "model.layers.27.block_sparse_moe.experts.231.w2", "model.layers.27.block_sparse_moe.experts.232.w2", "model.layers.27.block_sparse_moe.experts.233.w2", "model.layers.27.block_sparse_moe.experts.234.w2", "model.layers.27.block_sparse_moe.experts.235.w2", "model.layers.27.block_sparse_moe.experts.236.w2", "model.layers.27.block_sparse_moe.experts.237.w2", "model.layers.27.block_sparse_moe.experts.238.w2", "model.layers.27.block_sparse_moe.experts.239.w2", "model.layers.27.block_sparse_moe.experts.240.w2", "model.layers.27.block_sparse_moe.experts.241.w2", "model.layers.27.block_sparse_moe.experts.242.w2", "model.layers.27.block_sparse_moe.experts.243.w2", "model.layers.27.block_sparse_moe.experts.244.w2", "model.layers.27.block_sparse_moe.experts.245.w2", "model.layers.27.block_sparse_moe.experts.246.w2", "model.layers.27.block_sparse_moe.experts.247.w2", "model.layers.27.block_sparse_moe.experts.248.w2", "model.layers.27.block_sparse_moe.experts.249.w2", "model.layers.27.block_sparse_moe.experts.250.w2", "model.layers.27.block_sparse_moe.experts.251.w2", "model.layers.27.block_sparse_moe.experts.252.w2", "model.layers.27.block_sparse_moe.experts.253.w2", "model.layers.27.block_sparse_moe.experts.254.w2", "model.layers.27.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0018750350922345782, "dbits": 3623878656 } ] }, { "idx": 56, "layers": [ "model.layers.28.self_attn.q_proj", "model.layers.28.self_attn.k_proj", "model.layers.28.self_attn.v_proj", "model.layers.28.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005188262090086937, "dbits": 44040192 } ] }, { "idx": 57, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w1", "model.layers.28.block_sparse_moe.experts.1.w1", "model.layers.28.block_sparse_moe.experts.2.w1", "model.layers.28.block_sparse_moe.experts.3.w1", "model.layers.28.block_sparse_moe.experts.4.w1", "model.layers.28.block_sparse_moe.experts.5.w1", "model.layers.28.block_sparse_moe.experts.6.w1", "model.layers.28.block_sparse_moe.experts.7.w1", "model.layers.28.block_sparse_moe.experts.8.w1", "model.layers.28.block_sparse_moe.experts.9.w1", "model.layers.28.block_sparse_moe.experts.10.w1", "model.layers.28.block_sparse_moe.experts.11.w1", "model.layers.28.block_sparse_moe.experts.12.w1", "model.layers.28.block_sparse_moe.experts.13.w1", "model.layers.28.block_sparse_moe.experts.14.w1", "model.layers.28.block_sparse_moe.experts.15.w1", "model.layers.28.block_sparse_moe.experts.16.w1", "model.layers.28.block_sparse_moe.experts.17.w1", "model.layers.28.block_sparse_moe.experts.18.w1", "model.layers.28.block_sparse_moe.experts.19.w1", "model.layers.28.block_sparse_moe.experts.20.w1", "model.layers.28.block_sparse_moe.experts.21.w1", "model.layers.28.block_sparse_moe.experts.22.w1", "model.layers.28.block_sparse_moe.experts.23.w1", "model.layers.28.block_sparse_moe.experts.24.w1", "model.layers.28.block_sparse_moe.experts.25.w1", "model.layers.28.block_sparse_moe.experts.26.w1", "model.layers.28.block_sparse_moe.experts.27.w1", "model.layers.28.block_sparse_moe.experts.28.w1", "model.layers.28.block_sparse_moe.experts.29.w1", "model.layers.28.block_sparse_moe.experts.30.w1", "model.layers.28.block_sparse_moe.experts.31.w1", "model.layers.28.block_sparse_moe.experts.32.w1", "model.layers.28.block_sparse_moe.experts.33.w1", "model.layers.28.block_sparse_moe.experts.34.w1", "model.layers.28.block_sparse_moe.experts.35.w1", "model.layers.28.block_sparse_moe.experts.36.w1", "model.layers.28.block_sparse_moe.experts.37.w1", "model.layers.28.block_sparse_moe.experts.38.w1", "model.layers.28.block_sparse_moe.experts.39.w1", "model.layers.28.block_sparse_moe.experts.40.w1", "model.layers.28.block_sparse_moe.experts.41.w1", "model.layers.28.block_sparse_moe.experts.42.w1", "model.layers.28.block_sparse_moe.experts.43.w1", "model.layers.28.block_sparse_moe.experts.44.w1", "model.layers.28.block_sparse_moe.experts.45.w1", "model.layers.28.block_sparse_moe.experts.46.w1", "model.layers.28.block_sparse_moe.experts.47.w1", "model.layers.28.block_sparse_moe.experts.48.w1", "model.layers.28.block_sparse_moe.experts.49.w1", "model.layers.28.block_sparse_moe.experts.50.w1", "model.layers.28.block_sparse_moe.experts.51.w1", "model.layers.28.block_sparse_moe.experts.52.w1", "model.layers.28.block_sparse_moe.experts.53.w1", "model.layers.28.block_sparse_moe.experts.54.w1", "model.layers.28.block_sparse_moe.experts.55.w1", "model.layers.28.block_sparse_moe.experts.56.w1", "model.layers.28.block_sparse_moe.experts.57.w1", "model.layers.28.block_sparse_moe.experts.58.w1", "model.layers.28.block_sparse_moe.experts.59.w1", "model.layers.28.block_sparse_moe.experts.60.w1", "model.layers.28.block_sparse_moe.experts.61.w1", "model.layers.28.block_sparse_moe.experts.62.w1", "model.layers.28.block_sparse_moe.experts.63.w1", "model.layers.28.block_sparse_moe.experts.64.w1", "model.layers.28.block_sparse_moe.experts.65.w1", "model.layers.28.block_sparse_moe.experts.66.w1", "model.layers.28.block_sparse_moe.experts.67.w1", "model.layers.28.block_sparse_moe.experts.68.w1", "model.layers.28.block_sparse_moe.experts.69.w1", "model.layers.28.block_sparse_moe.experts.70.w1", "model.layers.28.block_sparse_moe.experts.71.w1", "model.layers.28.block_sparse_moe.experts.72.w1", "model.layers.28.block_sparse_moe.experts.73.w1", "model.layers.28.block_sparse_moe.experts.74.w1", "model.layers.28.block_sparse_moe.experts.75.w1", "model.layers.28.block_sparse_moe.experts.76.w1", "model.layers.28.block_sparse_moe.experts.77.w1", "model.layers.28.block_sparse_moe.experts.78.w1", "model.layers.28.block_sparse_moe.experts.79.w1", "model.layers.28.block_sparse_moe.experts.80.w1", "model.layers.28.block_sparse_moe.experts.81.w1", "model.layers.28.block_sparse_moe.experts.82.w1", "model.layers.28.block_sparse_moe.experts.83.w1", "model.layers.28.block_sparse_moe.experts.84.w1", "model.layers.28.block_sparse_moe.experts.85.w1", "model.layers.28.block_sparse_moe.experts.86.w1", "model.layers.28.block_sparse_moe.experts.87.w1", "model.layers.28.block_sparse_moe.experts.88.w1", "model.layers.28.block_sparse_moe.experts.89.w1", "model.layers.28.block_sparse_moe.experts.90.w1", "model.layers.28.block_sparse_moe.experts.91.w1", "model.layers.28.block_sparse_moe.experts.92.w1", "model.layers.28.block_sparse_moe.experts.93.w1", "model.layers.28.block_sparse_moe.experts.94.w1", "model.layers.28.block_sparse_moe.experts.95.w1", "model.layers.28.block_sparse_moe.experts.96.w1", "model.layers.28.block_sparse_moe.experts.97.w1", "model.layers.28.block_sparse_moe.experts.98.w1", "model.layers.28.block_sparse_moe.experts.99.w1", "model.layers.28.block_sparse_moe.experts.100.w1", "model.layers.28.block_sparse_moe.experts.101.w1", "model.layers.28.block_sparse_moe.experts.102.w1", "model.layers.28.block_sparse_moe.experts.103.w1", "model.layers.28.block_sparse_moe.experts.104.w1", "model.layers.28.block_sparse_moe.experts.105.w1", "model.layers.28.block_sparse_moe.experts.106.w1", "model.layers.28.block_sparse_moe.experts.107.w1", "model.layers.28.block_sparse_moe.experts.108.w1", "model.layers.28.block_sparse_moe.experts.109.w1", "model.layers.28.block_sparse_moe.experts.110.w1", "model.layers.28.block_sparse_moe.experts.111.w1", "model.layers.28.block_sparse_moe.experts.112.w1", "model.layers.28.block_sparse_moe.experts.113.w1", "model.layers.28.block_sparse_moe.experts.114.w1", "model.layers.28.block_sparse_moe.experts.115.w1", "model.layers.28.block_sparse_moe.experts.116.w1", "model.layers.28.block_sparse_moe.experts.117.w1", "model.layers.28.block_sparse_moe.experts.118.w1", "model.layers.28.block_sparse_moe.experts.119.w1", "model.layers.28.block_sparse_moe.experts.120.w1", "model.layers.28.block_sparse_moe.experts.121.w1", "model.layers.28.block_sparse_moe.experts.122.w1", "model.layers.28.block_sparse_moe.experts.123.w1", "model.layers.28.block_sparse_moe.experts.124.w1", "model.layers.28.block_sparse_moe.experts.125.w1", "model.layers.28.block_sparse_moe.experts.126.w1", "model.layers.28.block_sparse_moe.experts.127.w1", "model.layers.28.block_sparse_moe.experts.128.w1", "model.layers.28.block_sparse_moe.experts.129.w1", "model.layers.28.block_sparse_moe.experts.130.w1", "model.layers.28.block_sparse_moe.experts.131.w1", "model.layers.28.block_sparse_moe.experts.132.w1", "model.layers.28.block_sparse_moe.experts.133.w1", "model.layers.28.block_sparse_moe.experts.134.w1", "model.layers.28.block_sparse_moe.experts.135.w1", "model.layers.28.block_sparse_moe.experts.136.w1", "model.layers.28.block_sparse_moe.experts.137.w1", "model.layers.28.block_sparse_moe.experts.138.w1", "model.layers.28.block_sparse_moe.experts.139.w1", "model.layers.28.block_sparse_moe.experts.140.w1", "model.layers.28.block_sparse_moe.experts.141.w1", "model.layers.28.block_sparse_moe.experts.142.w1", "model.layers.28.block_sparse_moe.experts.143.w1", "model.layers.28.block_sparse_moe.experts.144.w1", "model.layers.28.block_sparse_moe.experts.145.w1", "model.layers.28.block_sparse_moe.experts.146.w1", "model.layers.28.block_sparse_moe.experts.147.w1", "model.layers.28.block_sparse_moe.experts.148.w1", "model.layers.28.block_sparse_moe.experts.149.w1", "model.layers.28.block_sparse_moe.experts.150.w1", "model.layers.28.block_sparse_moe.experts.151.w1", "model.layers.28.block_sparse_moe.experts.152.w1", "model.layers.28.block_sparse_moe.experts.153.w1", "model.layers.28.block_sparse_moe.experts.154.w1", "model.layers.28.block_sparse_moe.experts.155.w1", "model.layers.28.block_sparse_moe.experts.156.w1", "model.layers.28.block_sparse_moe.experts.157.w1", "model.layers.28.block_sparse_moe.experts.158.w1", "model.layers.28.block_sparse_moe.experts.159.w1", "model.layers.28.block_sparse_moe.experts.160.w1", "model.layers.28.block_sparse_moe.experts.161.w1", "model.layers.28.block_sparse_moe.experts.162.w1", "model.layers.28.block_sparse_moe.experts.163.w1", "model.layers.28.block_sparse_moe.experts.164.w1", "model.layers.28.block_sparse_moe.experts.165.w1", "model.layers.28.block_sparse_moe.experts.166.w1", "model.layers.28.block_sparse_moe.experts.167.w1", "model.layers.28.block_sparse_moe.experts.168.w1", "model.layers.28.block_sparse_moe.experts.169.w1", "model.layers.28.block_sparse_moe.experts.170.w1", "model.layers.28.block_sparse_moe.experts.171.w1", "model.layers.28.block_sparse_moe.experts.172.w1", "model.layers.28.block_sparse_moe.experts.173.w1", "model.layers.28.block_sparse_moe.experts.174.w1", "model.layers.28.block_sparse_moe.experts.175.w1", "model.layers.28.block_sparse_moe.experts.176.w1", "model.layers.28.block_sparse_moe.experts.177.w1", "model.layers.28.block_sparse_moe.experts.178.w1", "model.layers.28.block_sparse_moe.experts.179.w1", "model.layers.28.block_sparse_moe.experts.180.w1", "model.layers.28.block_sparse_moe.experts.181.w1", "model.layers.28.block_sparse_moe.experts.182.w1", "model.layers.28.block_sparse_moe.experts.183.w1", "model.layers.28.block_sparse_moe.experts.184.w1", "model.layers.28.block_sparse_moe.experts.185.w1", "model.layers.28.block_sparse_moe.experts.186.w1", "model.layers.28.block_sparse_moe.experts.187.w1", "model.layers.28.block_sparse_moe.experts.188.w1", "model.layers.28.block_sparse_moe.experts.189.w1", "model.layers.28.block_sparse_moe.experts.190.w1", "model.layers.28.block_sparse_moe.experts.191.w1", "model.layers.28.block_sparse_moe.experts.192.w1", "model.layers.28.block_sparse_moe.experts.193.w1", "model.layers.28.block_sparse_moe.experts.194.w1", "model.layers.28.block_sparse_moe.experts.195.w1", "model.layers.28.block_sparse_moe.experts.196.w1", "model.layers.28.block_sparse_moe.experts.197.w1", "model.layers.28.block_sparse_moe.experts.198.w1", "model.layers.28.block_sparse_moe.experts.199.w1", "model.layers.28.block_sparse_moe.experts.200.w1", "model.layers.28.block_sparse_moe.experts.201.w1", "model.layers.28.block_sparse_moe.experts.202.w1", "model.layers.28.block_sparse_moe.experts.203.w1", "model.layers.28.block_sparse_moe.experts.204.w1", "model.layers.28.block_sparse_moe.experts.205.w1", "model.layers.28.block_sparse_moe.experts.206.w1", "model.layers.28.block_sparse_moe.experts.207.w1", "model.layers.28.block_sparse_moe.experts.208.w1", "model.layers.28.block_sparse_moe.experts.209.w1", "model.layers.28.block_sparse_moe.experts.210.w1", "model.layers.28.block_sparse_moe.experts.211.w1", "model.layers.28.block_sparse_moe.experts.212.w1", "model.layers.28.block_sparse_moe.experts.213.w1", "model.layers.28.block_sparse_moe.experts.214.w1", "model.layers.28.block_sparse_moe.experts.215.w1", "model.layers.28.block_sparse_moe.experts.216.w1", "model.layers.28.block_sparse_moe.experts.217.w1", "model.layers.28.block_sparse_moe.experts.218.w1", "model.layers.28.block_sparse_moe.experts.219.w1", "model.layers.28.block_sparse_moe.experts.220.w1", "model.layers.28.block_sparse_moe.experts.221.w1", "model.layers.28.block_sparse_moe.experts.222.w1", "model.layers.28.block_sparse_moe.experts.223.w1", "model.layers.28.block_sparse_moe.experts.224.w1", "model.layers.28.block_sparse_moe.experts.225.w1", "model.layers.28.block_sparse_moe.experts.226.w1", "model.layers.28.block_sparse_moe.experts.227.w1", "model.layers.28.block_sparse_moe.experts.228.w1", "model.layers.28.block_sparse_moe.experts.229.w1", "model.layers.28.block_sparse_moe.experts.230.w1", "model.layers.28.block_sparse_moe.experts.231.w1", "model.layers.28.block_sparse_moe.experts.232.w1", "model.layers.28.block_sparse_moe.experts.233.w1", "model.layers.28.block_sparse_moe.experts.234.w1", "model.layers.28.block_sparse_moe.experts.235.w1", "model.layers.28.block_sparse_moe.experts.236.w1", "model.layers.28.block_sparse_moe.experts.237.w1", "model.layers.28.block_sparse_moe.experts.238.w1", "model.layers.28.block_sparse_moe.experts.239.w1", "model.layers.28.block_sparse_moe.experts.240.w1", "model.layers.28.block_sparse_moe.experts.241.w1", "model.layers.28.block_sparse_moe.experts.242.w1", "model.layers.28.block_sparse_moe.experts.243.w1", "model.layers.28.block_sparse_moe.experts.244.w1", "model.layers.28.block_sparse_moe.experts.245.w1", "model.layers.28.block_sparse_moe.experts.246.w1", "model.layers.28.block_sparse_moe.experts.247.w1", "model.layers.28.block_sparse_moe.experts.248.w1", "model.layers.28.block_sparse_moe.experts.249.w1", "model.layers.28.block_sparse_moe.experts.250.w1", "model.layers.28.block_sparse_moe.experts.251.w1", "model.layers.28.block_sparse_moe.experts.252.w1", "model.layers.28.block_sparse_moe.experts.253.w1", "model.layers.28.block_sparse_moe.experts.254.w1", "model.layers.28.block_sparse_moe.experts.255.w1", "model.layers.28.block_sparse_moe.experts.0.w3", "model.layers.28.block_sparse_moe.experts.1.w3", "model.layers.28.block_sparse_moe.experts.2.w3", "model.layers.28.block_sparse_moe.experts.3.w3", "model.layers.28.block_sparse_moe.experts.4.w3", "model.layers.28.block_sparse_moe.experts.5.w3", "model.layers.28.block_sparse_moe.experts.6.w3", "model.layers.28.block_sparse_moe.experts.7.w3", "model.layers.28.block_sparse_moe.experts.8.w3", "model.layers.28.block_sparse_moe.experts.9.w3", "model.layers.28.block_sparse_moe.experts.10.w3", "model.layers.28.block_sparse_moe.experts.11.w3", "model.layers.28.block_sparse_moe.experts.12.w3", "model.layers.28.block_sparse_moe.experts.13.w3", "model.layers.28.block_sparse_moe.experts.14.w3", "model.layers.28.block_sparse_moe.experts.15.w3", "model.layers.28.block_sparse_moe.experts.16.w3", "model.layers.28.block_sparse_moe.experts.17.w3", "model.layers.28.block_sparse_moe.experts.18.w3", "model.layers.28.block_sparse_moe.experts.19.w3", "model.layers.28.block_sparse_moe.experts.20.w3", "model.layers.28.block_sparse_moe.experts.21.w3", "model.layers.28.block_sparse_moe.experts.22.w3", "model.layers.28.block_sparse_moe.experts.23.w3", "model.layers.28.block_sparse_moe.experts.24.w3", "model.layers.28.block_sparse_moe.experts.25.w3", "model.layers.28.block_sparse_moe.experts.26.w3", "model.layers.28.block_sparse_moe.experts.27.w3", "model.layers.28.block_sparse_moe.experts.28.w3", "model.layers.28.block_sparse_moe.experts.29.w3", "model.layers.28.block_sparse_moe.experts.30.w3", "model.layers.28.block_sparse_moe.experts.31.w3", "model.layers.28.block_sparse_moe.experts.32.w3", "model.layers.28.block_sparse_moe.experts.33.w3", "model.layers.28.block_sparse_moe.experts.34.w3", "model.layers.28.block_sparse_moe.experts.35.w3", "model.layers.28.block_sparse_moe.experts.36.w3", "model.layers.28.block_sparse_moe.experts.37.w3", "model.layers.28.block_sparse_moe.experts.38.w3", "model.layers.28.block_sparse_moe.experts.39.w3", "model.layers.28.block_sparse_moe.experts.40.w3", "model.layers.28.block_sparse_moe.experts.41.w3", "model.layers.28.block_sparse_moe.experts.42.w3", "model.layers.28.block_sparse_moe.experts.43.w3", "model.layers.28.block_sparse_moe.experts.44.w3", "model.layers.28.block_sparse_moe.experts.45.w3", "model.layers.28.block_sparse_moe.experts.46.w3", "model.layers.28.block_sparse_moe.experts.47.w3", "model.layers.28.block_sparse_moe.experts.48.w3", "model.layers.28.block_sparse_moe.experts.49.w3", "model.layers.28.block_sparse_moe.experts.50.w3", "model.layers.28.block_sparse_moe.experts.51.w3", "model.layers.28.block_sparse_moe.experts.52.w3", "model.layers.28.block_sparse_moe.experts.53.w3", "model.layers.28.block_sparse_moe.experts.54.w3", "model.layers.28.block_sparse_moe.experts.55.w3", "model.layers.28.block_sparse_moe.experts.56.w3", "model.layers.28.block_sparse_moe.experts.57.w3", "model.layers.28.block_sparse_moe.experts.58.w3", "model.layers.28.block_sparse_moe.experts.59.w3", "model.layers.28.block_sparse_moe.experts.60.w3", "model.layers.28.block_sparse_moe.experts.61.w3", "model.layers.28.block_sparse_moe.experts.62.w3", "model.layers.28.block_sparse_moe.experts.63.w3", "model.layers.28.block_sparse_moe.experts.64.w3", "model.layers.28.block_sparse_moe.experts.65.w3", "model.layers.28.block_sparse_moe.experts.66.w3", "model.layers.28.block_sparse_moe.experts.67.w3", "model.layers.28.block_sparse_moe.experts.68.w3", "model.layers.28.block_sparse_moe.experts.69.w3", "model.layers.28.block_sparse_moe.experts.70.w3", "model.layers.28.block_sparse_moe.experts.71.w3", "model.layers.28.block_sparse_moe.experts.72.w3", "model.layers.28.block_sparse_moe.experts.73.w3", "model.layers.28.block_sparse_moe.experts.74.w3", "model.layers.28.block_sparse_moe.experts.75.w3", "model.layers.28.block_sparse_moe.experts.76.w3", "model.layers.28.block_sparse_moe.experts.77.w3", "model.layers.28.block_sparse_moe.experts.78.w3", "model.layers.28.block_sparse_moe.experts.79.w3", "model.layers.28.block_sparse_moe.experts.80.w3", "model.layers.28.block_sparse_moe.experts.81.w3", "model.layers.28.block_sparse_moe.experts.82.w3", "model.layers.28.block_sparse_moe.experts.83.w3", "model.layers.28.block_sparse_moe.experts.84.w3", "model.layers.28.block_sparse_moe.experts.85.w3", "model.layers.28.block_sparse_moe.experts.86.w3", "model.layers.28.block_sparse_moe.experts.87.w3", "model.layers.28.block_sparse_moe.experts.88.w3", "model.layers.28.block_sparse_moe.experts.89.w3", "model.layers.28.block_sparse_moe.experts.90.w3", "model.layers.28.block_sparse_moe.experts.91.w3", "model.layers.28.block_sparse_moe.experts.92.w3", "model.layers.28.block_sparse_moe.experts.93.w3", "model.layers.28.block_sparse_moe.experts.94.w3", "model.layers.28.block_sparse_moe.experts.95.w3", "model.layers.28.block_sparse_moe.experts.96.w3", "model.layers.28.block_sparse_moe.experts.97.w3", "model.layers.28.block_sparse_moe.experts.98.w3", "model.layers.28.block_sparse_moe.experts.99.w3", "model.layers.28.block_sparse_moe.experts.100.w3", "model.layers.28.block_sparse_moe.experts.101.w3", "model.layers.28.block_sparse_moe.experts.102.w3", "model.layers.28.block_sparse_moe.experts.103.w3", "model.layers.28.block_sparse_moe.experts.104.w3", "model.layers.28.block_sparse_moe.experts.105.w3", "model.layers.28.block_sparse_moe.experts.106.w3", "model.layers.28.block_sparse_moe.experts.107.w3", "model.layers.28.block_sparse_moe.experts.108.w3", "model.layers.28.block_sparse_moe.experts.109.w3", "model.layers.28.block_sparse_moe.experts.110.w3", "model.layers.28.block_sparse_moe.experts.111.w3", "model.layers.28.block_sparse_moe.experts.112.w3", "model.layers.28.block_sparse_moe.experts.113.w3", "model.layers.28.block_sparse_moe.experts.114.w3", "model.layers.28.block_sparse_moe.experts.115.w3", "model.layers.28.block_sparse_moe.experts.116.w3", "model.layers.28.block_sparse_moe.experts.117.w3", "model.layers.28.block_sparse_moe.experts.118.w3", "model.layers.28.block_sparse_moe.experts.119.w3", "model.layers.28.block_sparse_moe.experts.120.w3", "model.layers.28.block_sparse_moe.experts.121.w3", "model.layers.28.block_sparse_moe.experts.122.w3", "model.layers.28.block_sparse_moe.experts.123.w3", "model.layers.28.block_sparse_moe.experts.124.w3", "model.layers.28.block_sparse_moe.experts.125.w3", "model.layers.28.block_sparse_moe.experts.126.w3", "model.layers.28.block_sparse_moe.experts.127.w3", "model.layers.28.block_sparse_moe.experts.128.w3", "model.layers.28.block_sparse_moe.experts.129.w3", "model.layers.28.block_sparse_moe.experts.130.w3", "model.layers.28.block_sparse_moe.experts.131.w3", "model.layers.28.block_sparse_moe.experts.132.w3", "model.layers.28.block_sparse_moe.experts.133.w3", "model.layers.28.block_sparse_moe.experts.134.w3", "model.layers.28.block_sparse_moe.experts.135.w3", "model.layers.28.block_sparse_moe.experts.136.w3", "model.layers.28.block_sparse_moe.experts.137.w3", "model.layers.28.block_sparse_moe.experts.138.w3", "model.layers.28.block_sparse_moe.experts.139.w3", "model.layers.28.block_sparse_moe.experts.140.w3", "model.layers.28.block_sparse_moe.experts.141.w3", "model.layers.28.block_sparse_moe.experts.142.w3", "model.layers.28.block_sparse_moe.experts.143.w3", "model.layers.28.block_sparse_moe.experts.144.w3", "model.layers.28.block_sparse_moe.experts.145.w3", "model.layers.28.block_sparse_moe.experts.146.w3", "model.layers.28.block_sparse_moe.experts.147.w3", "model.layers.28.block_sparse_moe.experts.148.w3", "model.layers.28.block_sparse_moe.experts.149.w3", "model.layers.28.block_sparse_moe.experts.150.w3", "model.layers.28.block_sparse_moe.experts.151.w3", "model.layers.28.block_sparse_moe.experts.152.w3", "model.layers.28.block_sparse_moe.experts.153.w3", "model.layers.28.block_sparse_moe.experts.154.w3", "model.layers.28.block_sparse_moe.experts.155.w3", "model.layers.28.block_sparse_moe.experts.156.w3", "model.layers.28.block_sparse_moe.experts.157.w3", "model.layers.28.block_sparse_moe.experts.158.w3", "model.layers.28.block_sparse_moe.experts.159.w3", "model.layers.28.block_sparse_moe.experts.160.w3", "model.layers.28.block_sparse_moe.experts.161.w3", "model.layers.28.block_sparse_moe.experts.162.w3", "model.layers.28.block_sparse_moe.experts.163.w3", "model.layers.28.block_sparse_moe.experts.164.w3", "model.layers.28.block_sparse_moe.experts.165.w3", "model.layers.28.block_sparse_moe.experts.166.w3", "model.layers.28.block_sparse_moe.experts.167.w3", "model.layers.28.block_sparse_moe.experts.168.w3", "model.layers.28.block_sparse_moe.experts.169.w3", "model.layers.28.block_sparse_moe.experts.170.w3", "model.layers.28.block_sparse_moe.experts.171.w3", "model.layers.28.block_sparse_moe.experts.172.w3", "model.layers.28.block_sparse_moe.experts.173.w3", "model.layers.28.block_sparse_moe.experts.174.w3", "model.layers.28.block_sparse_moe.experts.175.w3", "model.layers.28.block_sparse_moe.experts.176.w3", "model.layers.28.block_sparse_moe.experts.177.w3", "model.layers.28.block_sparse_moe.experts.178.w3", "model.layers.28.block_sparse_moe.experts.179.w3", "model.layers.28.block_sparse_moe.experts.180.w3", "model.layers.28.block_sparse_moe.experts.181.w3", "model.layers.28.block_sparse_moe.experts.182.w3", "model.layers.28.block_sparse_moe.experts.183.w3", "model.layers.28.block_sparse_moe.experts.184.w3", "model.layers.28.block_sparse_moe.experts.185.w3", "model.layers.28.block_sparse_moe.experts.186.w3", "model.layers.28.block_sparse_moe.experts.187.w3", "model.layers.28.block_sparse_moe.experts.188.w3", "model.layers.28.block_sparse_moe.experts.189.w3", "model.layers.28.block_sparse_moe.experts.190.w3", "model.layers.28.block_sparse_moe.experts.191.w3", "model.layers.28.block_sparse_moe.experts.192.w3", "model.layers.28.block_sparse_moe.experts.193.w3", "model.layers.28.block_sparse_moe.experts.194.w3", "model.layers.28.block_sparse_moe.experts.195.w3", "model.layers.28.block_sparse_moe.experts.196.w3", "model.layers.28.block_sparse_moe.experts.197.w3", "model.layers.28.block_sparse_moe.experts.198.w3", "model.layers.28.block_sparse_moe.experts.199.w3", "model.layers.28.block_sparse_moe.experts.200.w3", "model.layers.28.block_sparse_moe.experts.201.w3", "model.layers.28.block_sparse_moe.experts.202.w3", "model.layers.28.block_sparse_moe.experts.203.w3", "model.layers.28.block_sparse_moe.experts.204.w3", "model.layers.28.block_sparse_moe.experts.205.w3", "model.layers.28.block_sparse_moe.experts.206.w3", "model.layers.28.block_sparse_moe.experts.207.w3", "model.layers.28.block_sparse_moe.experts.208.w3", "model.layers.28.block_sparse_moe.experts.209.w3", "model.layers.28.block_sparse_moe.experts.210.w3", "model.layers.28.block_sparse_moe.experts.211.w3", "model.layers.28.block_sparse_moe.experts.212.w3", "model.layers.28.block_sparse_moe.experts.213.w3", "model.layers.28.block_sparse_moe.experts.214.w3", "model.layers.28.block_sparse_moe.experts.215.w3", "model.layers.28.block_sparse_moe.experts.216.w3", "model.layers.28.block_sparse_moe.experts.217.w3", "model.layers.28.block_sparse_moe.experts.218.w3", "model.layers.28.block_sparse_moe.experts.219.w3", "model.layers.28.block_sparse_moe.experts.220.w3", "model.layers.28.block_sparse_moe.experts.221.w3", "model.layers.28.block_sparse_moe.experts.222.w3", "model.layers.28.block_sparse_moe.experts.223.w3", "model.layers.28.block_sparse_moe.experts.224.w3", "model.layers.28.block_sparse_moe.experts.225.w3", "model.layers.28.block_sparse_moe.experts.226.w3", "model.layers.28.block_sparse_moe.experts.227.w3", "model.layers.28.block_sparse_moe.experts.228.w3", "model.layers.28.block_sparse_moe.experts.229.w3", "model.layers.28.block_sparse_moe.experts.230.w3", "model.layers.28.block_sparse_moe.experts.231.w3", "model.layers.28.block_sparse_moe.experts.232.w3", "model.layers.28.block_sparse_moe.experts.233.w3", "model.layers.28.block_sparse_moe.experts.234.w3", "model.layers.28.block_sparse_moe.experts.235.w3", "model.layers.28.block_sparse_moe.experts.236.w3", "model.layers.28.block_sparse_moe.experts.237.w3", "model.layers.28.block_sparse_moe.experts.238.w3", "model.layers.28.block_sparse_moe.experts.239.w3", "model.layers.28.block_sparse_moe.experts.240.w3", "model.layers.28.block_sparse_moe.experts.241.w3", "model.layers.28.block_sparse_moe.experts.242.w3", "model.layers.28.block_sparse_moe.experts.243.w3", "model.layers.28.block_sparse_moe.experts.244.w3", "model.layers.28.block_sparse_moe.experts.245.w3", "model.layers.28.block_sparse_moe.experts.246.w3", "model.layers.28.block_sparse_moe.experts.247.w3", "model.layers.28.block_sparse_moe.experts.248.w3", "model.layers.28.block_sparse_moe.experts.249.w3", "model.layers.28.block_sparse_moe.experts.250.w3", "model.layers.28.block_sparse_moe.experts.251.w3", "model.layers.28.block_sparse_moe.experts.252.w3", "model.layers.28.block_sparse_moe.experts.253.w3", "model.layers.28.block_sparse_moe.experts.254.w3", "model.layers.28.block_sparse_moe.experts.255.w3", "model.layers.28.block_sparse_moe.experts.0.w2", "model.layers.28.block_sparse_moe.experts.1.w2", "model.layers.28.block_sparse_moe.experts.2.w2", "model.layers.28.block_sparse_moe.experts.3.w2", "model.layers.28.block_sparse_moe.experts.4.w2", "model.layers.28.block_sparse_moe.experts.5.w2", "model.layers.28.block_sparse_moe.experts.6.w2", "model.layers.28.block_sparse_moe.experts.7.w2", "model.layers.28.block_sparse_moe.experts.8.w2", "model.layers.28.block_sparse_moe.experts.9.w2", "model.layers.28.block_sparse_moe.experts.10.w2", "model.layers.28.block_sparse_moe.experts.11.w2", "model.layers.28.block_sparse_moe.experts.12.w2", "model.layers.28.block_sparse_moe.experts.13.w2", "model.layers.28.block_sparse_moe.experts.14.w2", "model.layers.28.block_sparse_moe.experts.15.w2", "model.layers.28.block_sparse_moe.experts.16.w2", "model.layers.28.block_sparse_moe.experts.17.w2", "model.layers.28.block_sparse_moe.experts.18.w2", "model.layers.28.block_sparse_moe.experts.19.w2", "model.layers.28.block_sparse_moe.experts.20.w2", "model.layers.28.block_sparse_moe.experts.21.w2", "model.layers.28.block_sparse_moe.experts.22.w2", "model.layers.28.block_sparse_moe.experts.23.w2", "model.layers.28.block_sparse_moe.experts.24.w2", "model.layers.28.block_sparse_moe.experts.25.w2", "model.layers.28.block_sparse_moe.experts.26.w2", "model.layers.28.block_sparse_moe.experts.27.w2", "model.layers.28.block_sparse_moe.experts.28.w2", "model.layers.28.block_sparse_moe.experts.29.w2", "model.layers.28.block_sparse_moe.experts.30.w2", "model.layers.28.block_sparse_moe.experts.31.w2", "model.layers.28.block_sparse_moe.experts.32.w2", "model.layers.28.block_sparse_moe.experts.33.w2", "model.layers.28.block_sparse_moe.experts.34.w2", "model.layers.28.block_sparse_moe.experts.35.w2", "model.layers.28.block_sparse_moe.experts.36.w2", "model.layers.28.block_sparse_moe.experts.37.w2", "model.layers.28.block_sparse_moe.experts.38.w2", "model.layers.28.block_sparse_moe.experts.39.w2", "model.layers.28.block_sparse_moe.experts.40.w2", "model.layers.28.block_sparse_moe.experts.41.w2", "model.layers.28.block_sparse_moe.experts.42.w2", "model.layers.28.block_sparse_moe.experts.43.w2", "model.layers.28.block_sparse_moe.experts.44.w2", "model.layers.28.block_sparse_moe.experts.45.w2", "model.layers.28.block_sparse_moe.experts.46.w2", "model.layers.28.block_sparse_moe.experts.47.w2", "model.layers.28.block_sparse_moe.experts.48.w2", "model.layers.28.block_sparse_moe.experts.49.w2", "model.layers.28.block_sparse_moe.experts.50.w2", "model.layers.28.block_sparse_moe.experts.51.w2", "model.layers.28.block_sparse_moe.experts.52.w2", "model.layers.28.block_sparse_moe.experts.53.w2", "model.layers.28.block_sparse_moe.experts.54.w2", "model.layers.28.block_sparse_moe.experts.55.w2", "model.layers.28.block_sparse_moe.experts.56.w2", "model.layers.28.block_sparse_moe.experts.57.w2", "model.layers.28.block_sparse_moe.experts.58.w2", "model.layers.28.block_sparse_moe.experts.59.w2", "model.layers.28.block_sparse_moe.experts.60.w2", "model.layers.28.block_sparse_moe.experts.61.w2", "model.layers.28.block_sparse_moe.experts.62.w2", "model.layers.28.block_sparse_moe.experts.63.w2", "model.layers.28.block_sparse_moe.experts.64.w2", "model.layers.28.block_sparse_moe.experts.65.w2", "model.layers.28.block_sparse_moe.experts.66.w2", "model.layers.28.block_sparse_moe.experts.67.w2", "model.layers.28.block_sparse_moe.experts.68.w2", "model.layers.28.block_sparse_moe.experts.69.w2", "model.layers.28.block_sparse_moe.experts.70.w2", "model.layers.28.block_sparse_moe.experts.71.w2", "model.layers.28.block_sparse_moe.experts.72.w2", "model.layers.28.block_sparse_moe.experts.73.w2", "model.layers.28.block_sparse_moe.experts.74.w2", "model.layers.28.block_sparse_moe.experts.75.w2", "model.layers.28.block_sparse_moe.experts.76.w2", "model.layers.28.block_sparse_moe.experts.77.w2", "model.layers.28.block_sparse_moe.experts.78.w2", "model.layers.28.block_sparse_moe.experts.79.w2", "model.layers.28.block_sparse_moe.experts.80.w2", "model.layers.28.block_sparse_moe.experts.81.w2", "model.layers.28.block_sparse_moe.experts.82.w2", "model.layers.28.block_sparse_moe.experts.83.w2", "model.layers.28.block_sparse_moe.experts.84.w2", "model.layers.28.block_sparse_moe.experts.85.w2", "model.layers.28.block_sparse_moe.experts.86.w2", "model.layers.28.block_sparse_moe.experts.87.w2", "model.layers.28.block_sparse_moe.experts.88.w2", "model.layers.28.block_sparse_moe.experts.89.w2", "model.layers.28.block_sparse_moe.experts.90.w2", "model.layers.28.block_sparse_moe.experts.91.w2", "model.layers.28.block_sparse_moe.experts.92.w2", "model.layers.28.block_sparse_moe.experts.93.w2", "model.layers.28.block_sparse_moe.experts.94.w2", "model.layers.28.block_sparse_moe.experts.95.w2", "model.layers.28.block_sparse_moe.experts.96.w2", "model.layers.28.block_sparse_moe.experts.97.w2", "model.layers.28.block_sparse_moe.experts.98.w2", "model.layers.28.block_sparse_moe.experts.99.w2", "model.layers.28.block_sparse_moe.experts.100.w2", "model.layers.28.block_sparse_moe.experts.101.w2", "model.layers.28.block_sparse_moe.experts.102.w2", "model.layers.28.block_sparse_moe.experts.103.w2", "model.layers.28.block_sparse_moe.experts.104.w2", "model.layers.28.block_sparse_moe.experts.105.w2", "model.layers.28.block_sparse_moe.experts.106.w2", "model.layers.28.block_sparse_moe.experts.107.w2", "model.layers.28.block_sparse_moe.experts.108.w2", "model.layers.28.block_sparse_moe.experts.109.w2", "model.layers.28.block_sparse_moe.experts.110.w2", "model.layers.28.block_sparse_moe.experts.111.w2", "model.layers.28.block_sparse_moe.experts.112.w2", "model.layers.28.block_sparse_moe.experts.113.w2", "model.layers.28.block_sparse_moe.experts.114.w2", "model.layers.28.block_sparse_moe.experts.115.w2", "model.layers.28.block_sparse_moe.experts.116.w2", "model.layers.28.block_sparse_moe.experts.117.w2", "model.layers.28.block_sparse_moe.experts.118.w2", "model.layers.28.block_sparse_moe.experts.119.w2", "model.layers.28.block_sparse_moe.experts.120.w2", "model.layers.28.block_sparse_moe.experts.121.w2", "model.layers.28.block_sparse_moe.experts.122.w2", "model.layers.28.block_sparse_moe.experts.123.w2", "model.layers.28.block_sparse_moe.experts.124.w2", "model.layers.28.block_sparse_moe.experts.125.w2", "model.layers.28.block_sparse_moe.experts.126.w2", "model.layers.28.block_sparse_moe.experts.127.w2", "model.layers.28.block_sparse_moe.experts.128.w2", "model.layers.28.block_sparse_moe.experts.129.w2", "model.layers.28.block_sparse_moe.experts.130.w2", "model.layers.28.block_sparse_moe.experts.131.w2", "model.layers.28.block_sparse_moe.experts.132.w2", "model.layers.28.block_sparse_moe.experts.133.w2", "model.layers.28.block_sparse_moe.experts.134.w2", "model.layers.28.block_sparse_moe.experts.135.w2", "model.layers.28.block_sparse_moe.experts.136.w2", "model.layers.28.block_sparse_moe.experts.137.w2", "model.layers.28.block_sparse_moe.experts.138.w2", "model.layers.28.block_sparse_moe.experts.139.w2", "model.layers.28.block_sparse_moe.experts.140.w2", "model.layers.28.block_sparse_moe.experts.141.w2", "model.layers.28.block_sparse_moe.experts.142.w2", "model.layers.28.block_sparse_moe.experts.143.w2", "model.layers.28.block_sparse_moe.experts.144.w2", "model.layers.28.block_sparse_moe.experts.145.w2", "model.layers.28.block_sparse_moe.experts.146.w2", "model.layers.28.block_sparse_moe.experts.147.w2", "model.layers.28.block_sparse_moe.experts.148.w2", "model.layers.28.block_sparse_moe.experts.149.w2", "model.layers.28.block_sparse_moe.experts.150.w2", "model.layers.28.block_sparse_moe.experts.151.w2", "model.layers.28.block_sparse_moe.experts.152.w2", "model.layers.28.block_sparse_moe.experts.153.w2", "model.layers.28.block_sparse_moe.experts.154.w2", "model.layers.28.block_sparse_moe.experts.155.w2", "model.layers.28.block_sparse_moe.experts.156.w2", "model.layers.28.block_sparse_moe.experts.157.w2", "model.layers.28.block_sparse_moe.experts.158.w2", "model.layers.28.block_sparse_moe.experts.159.w2", "model.layers.28.block_sparse_moe.experts.160.w2", "model.layers.28.block_sparse_moe.experts.161.w2", "model.layers.28.block_sparse_moe.experts.162.w2", "model.layers.28.block_sparse_moe.experts.163.w2", "model.layers.28.block_sparse_moe.experts.164.w2", "model.layers.28.block_sparse_moe.experts.165.w2", "model.layers.28.block_sparse_moe.experts.166.w2", "model.layers.28.block_sparse_moe.experts.167.w2", "model.layers.28.block_sparse_moe.experts.168.w2", "model.layers.28.block_sparse_moe.experts.169.w2", "model.layers.28.block_sparse_moe.experts.170.w2", "model.layers.28.block_sparse_moe.experts.171.w2", "model.layers.28.block_sparse_moe.experts.172.w2", "model.layers.28.block_sparse_moe.experts.173.w2", "model.layers.28.block_sparse_moe.experts.174.w2", "model.layers.28.block_sparse_moe.experts.175.w2", "model.layers.28.block_sparse_moe.experts.176.w2", "model.layers.28.block_sparse_moe.experts.177.w2", "model.layers.28.block_sparse_moe.experts.178.w2", "model.layers.28.block_sparse_moe.experts.179.w2", "model.layers.28.block_sparse_moe.experts.180.w2", "model.layers.28.block_sparse_moe.experts.181.w2", "model.layers.28.block_sparse_moe.experts.182.w2", "model.layers.28.block_sparse_moe.experts.183.w2", "model.layers.28.block_sparse_moe.experts.184.w2", "model.layers.28.block_sparse_moe.experts.185.w2", "model.layers.28.block_sparse_moe.experts.186.w2", "model.layers.28.block_sparse_moe.experts.187.w2", "model.layers.28.block_sparse_moe.experts.188.w2", "model.layers.28.block_sparse_moe.experts.189.w2", "model.layers.28.block_sparse_moe.experts.190.w2", "model.layers.28.block_sparse_moe.experts.191.w2", "model.layers.28.block_sparse_moe.experts.192.w2", "model.layers.28.block_sparse_moe.experts.193.w2", "model.layers.28.block_sparse_moe.experts.194.w2", "model.layers.28.block_sparse_moe.experts.195.w2", "model.layers.28.block_sparse_moe.experts.196.w2", "model.layers.28.block_sparse_moe.experts.197.w2", "model.layers.28.block_sparse_moe.experts.198.w2", "model.layers.28.block_sparse_moe.experts.199.w2", "model.layers.28.block_sparse_moe.experts.200.w2", "model.layers.28.block_sparse_moe.experts.201.w2", "model.layers.28.block_sparse_moe.experts.202.w2", "model.layers.28.block_sparse_moe.experts.203.w2", "model.layers.28.block_sparse_moe.experts.204.w2", "model.layers.28.block_sparse_moe.experts.205.w2", "model.layers.28.block_sparse_moe.experts.206.w2", "model.layers.28.block_sparse_moe.experts.207.w2", "model.layers.28.block_sparse_moe.experts.208.w2", "model.layers.28.block_sparse_moe.experts.209.w2", "model.layers.28.block_sparse_moe.experts.210.w2", "model.layers.28.block_sparse_moe.experts.211.w2", "model.layers.28.block_sparse_moe.experts.212.w2", "model.layers.28.block_sparse_moe.experts.213.w2", "model.layers.28.block_sparse_moe.experts.214.w2", "model.layers.28.block_sparse_moe.experts.215.w2", "model.layers.28.block_sparse_moe.experts.216.w2", "model.layers.28.block_sparse_moe.experts.217.w2", "model.layers.28.block_sparse_moe.experts.218.w2", "model.layers.28.block_sparse_moe.experts.219.w2", "model.layers.28.block_sparse_moe.experts.220.w2", "model.layers.28.block_sparse_moe.experts.221.w2", "model.layers.28.block_sparse_moe.experts.222.w2", "model.layers.28.block_sparse_moe.experts.223.w2", "model.layers.28.block_sparse_moe.experts.224.w2", "model.layers.28.block_sparse_moe.experts.225.w2", "model.layers.28.block_sparse_moe.experts.226.w2", "model.layers.28.block_sparse_moe.experts.227.w2", "model.layers.28.block_sparse_moe.experts.228.w2", "model.layers.28.block_sparse_moe.experts.229.w2", "model.layers.28.block_sparse_moe.experts.230.w2", "model.layers.28.block_sparse_moe.experts.231.w2", "model.layers.28.block_sparse_moe.experts.232.w2", "model.layers.28.block_sparse_moe.experts.233.w2", "model.layers.28.block_sparse_moe.experts.234.w2", "model.layers.28.block_sparse_moe.experts.235.w2", "model.layers.28.block_sparse_moe.experts.236.w2", "model.layers.28.block_sparse_moe.experts.237.w2", "model.layers.28.block_sparse_moe.experts.238.w2", "model.layers.28.block_sparse_moe.experts.239.w2", "model.layers.28.block_sparse_moe.experts.240.w2", "model.layers.28.block_sparse_moe.experts.241.w2", "model.layers.28.block_sparse_moe.experts.242.w2", "model.layers.28.block_sparse_moe.experts.243.w2", "model.layers.28.block_sparse_moe.experts.244.w2", "model.layers.28.block_sparse_moe.experts.245.w2", "model.layers.28.block_sparse_moe.experts.246.w2", "model.layers.28.block_sparse_moe.experts.247.w2", "model.layers.28.block_sparse_moe.experts.248.w2", "model.layers.28.block_sparse_moe.experts.249.w2", "model.layers.28.block_sparse_moe.experts.250.w2", "model.layers.28.block_sparse_moe.experts.251.w2", "model.layers.28.block_sparse_moe.experts.252.w2", "model.layers.28.block_sparse_moe.experts.253.w2", "model.layers.28.block_sparse_moe.experts.254.w2", "model.layers.28.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003549288958311081, "dbits": 3623878656 } ] }, { "idx": 58, "layers": [ "model.layers.29.self_attn.q_proj", "model.layers.29.self_attn.k_proj", "model.layers.29.self_attn.v_proj", "model.layers.29.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0025847285985946544, "dbits": 44040192 } ] }, { "idx": 59, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w1", "model.layers.29.block_sparse_moe.experts.1.w1", "model.layers.29.block_sparse_moe.experts.2.w1", "model.layers.29.block_sparse_moe.experts.3.w1", "model.layers.29.block_sparse_moe.experts.4.w1", "model.layers.29.block_sparse_moe.experts.5.w1", "model.layers.29.block_sparse_moe.experts.6.w1", "model.layers.29.block_sparse_moe.experts.7.w1", "model.layers.29.block_sparse_moe.experts.8.w1", "model.layers.29.block_sparse_moe.experts.9.w1", "model.layers.29.block_sparse_moe.experts.10.w1", "model.layers.29.block_sparse_moe.experts.11.w1", "model.layers.29.block_sparse_moe.experts.12.w1", "model.layers.29.block_sparse_moe.experts.13.w1", "model.layers.29.block_sparse_moe.experts.14.w1", "model.layers.29.block_sparse_moe.experts.15.w1", "model.layers.29.block_sparse_moe.experts.16.w1", "model.layers.29.block_sparse_moe.experts.17.w1", "model.layers.29.block_sparse_moe.experts.18.w1", "model.layers.29.block_sparse_moe.experts.19.w1", "model.layers.29.block_sparse_moe.experts.20.w1", "model.layers.29.block_sparse_moe.experts.21.w1", "model.layers.29.block_sparse_moe.experts.22.w1", "model.layers.29.block_sparse_moe.experts.23.w1", "model.layers.29.block_sparse_moe.experts.24.w1", "model.layers.29.block_sparse_moe.experts.25.w1", "model.layers.29.block_sparse_moe.experts.26.w1", "model.layers.29.block_sparse_moe.experts.27.w1", "model.layers.29.block_sparse_moe.experts.28.w1", "model.layers.29.block_sparse_moe.experts.29.w1", "model.layers.29.block_sparse_moe.experts.30.w1", "model.layers.29.block_sparse_moe.experts.31.w1", "model.layers.29.block_sparse_moe.experts.32.w1", "model.layers.29.block_sparse_moe.experts.33.w1", "model.layers.29.block_sparse_moe.experts.34.w1", "model.layers.29.block_sparse_moe.experts.35.w1", "model.layers.29.block_sparse_moe.experts.36.w1", "model.layers.29.block_sparse_moe.experts.37.w1", "model.layers.29.block_sparse_moe.experts.38.w1", "model.layers.29.block_sparse_moe.experts.39.w1", "model.layers.29.block_sparse_moe.experts.40.w1", "model.layers.29.block_sparse_moe.experts.41.w1", "model.layers.29.block_sparse_moe.experts.42.w1", "model.layers.29.block_sparse_moe.experts.43.w1", "model.layers.29.block_sparse_moe.experts.44.w1", "model.layers.29.block_sparse_moe.experts.45.w1", "model.layers.29.block_sparse_moe.experts.46.w1", "model.layers.29.block_sparse_moe.experts.47.w1", "model.layers.29.block_sparse_moe.experts.48.w1", "model.layers.29.block_sparse_moe.experts.49.w1", "model.layers.29.block_sparse_moe.experts.50.w1", "model.layers.29.block_sparse_moe.experts.51.w1", "model.layers.29.block_sparse_moe.experts.52.w1", "model.layers.29.block_sparse_moe.experts.53.w1", "model.layers.29.block_sparse_moe.experts.54.w1", "model.layers.29.block_sparse_moe.experts.55.w1", "model.layers.29.block_sparse_moe.experts.56.w1", "model.layers.29.block_sparse_moe.experts.57.w1", "model.layers.29.block_sparse_moe.experts.58.w1", "model.layers.29.block_sparse_moe.experts.59.w1", "model.layers.29.block_sparse_moe.experts.60.w1", "model.layers.29.block_sparse_moe.experts.61.w1", "model.layers.29.block_sparse_moe.experts.62.w1", "model.layers.29.block_sparse_moe.experts.63.w1", "model.layers.29.block_sparse_moe.experts.64.w1", "model.layers.29.block_sparse_moe.experts.65.w1", "model.layers.29.block_sparse_moe.experts.66.w1", "model.layers.29.block_sparse_moe.experts.67.w1", "model.layers.29.block_sparse_moe.experts.68.w1", "model.layers.29.block_sparse_moe.experts.69.w1", "model.layers.29.block_sparse_moe.experts.70.w1", "model.layers.29.block_sparse_moe.experts.71.w1", "model.layers.29.block_sparse_moe.experts.72.w1", "model.layers.29.block_sparse_moe.experts.73.w1", "model.layers.29.block_sparse_moe.experts.74.w1", "model.layers.29.block_sparse_moe.experts.75.w1", "model.layers.29.block_sparse_moe.experts.76.w1", "model.layers.29.block_sparse_moe.experts.77.w1", "model.layers.29.block_sparse_moe.experts.78.w1", "model.layers.29.block_sparse_moe.experts.79.w1", "model.layers.29.block_sparse_moe.experts.80.w1", "model.layers.29.block_sparse_moe.experts.81.w1", "model.layers.29.block_sparse_moe.experts.82.w1", "model.layers.29.block_sparse_moe.experts.83.w1", "model.layers.29.block_sparse_moe.experts.84.w1", "model.layers.29.block_sparse_moe.experts.85.w1", "model.layers.29.block_sparse_moe.experts.86.w1", "model.layers.29.block_sparse_moe.experts.87.w1", "model.layers.29.block_sparse_moe.experts.88.w1", "model.layers.29.block_sparse_moe.experts.89.w1", "model.layers.29.block_sparse_moe.experts.90.w1", "model.layers.29.block_sparse_moe.experts.91.w1", "model.layers.29.block_sparse_moe.experts.92.w1", "model.layers.29.block_sparse_moe.experts.93.w1", "model.layers.29.block_sparse_moe.experts.94.w1", "model.layers.29.block_sparse_moe.experts.95.w1", "model.layers.29.block_sparse_moe.experts.96.w1", "model.layers.29.block_sparse_moe.experts.97.w1", "model.layers.29.block_sparse_moe.experts.98.w1", "model.layers.29.block_sparse_moe.experts.99.w1", "model.layers.29.block_sparse_moe.experts.100.w1", "model.layers.29.block_sparse_moe.experts.101.w1", "model.layers.29.block_sparse_moe.experts.102.w1", "model.layers.29.block_sparse_moe.experts.103.w1", "model.layers.29.block_sparse_moe.experts.104.w1", "model.layers.29.block_sparse_moe.experts.105.w1", "model.layers.29.block_sparse_moe.experts.106.w1", "model.layers.29.block_sparse_moe.experts.107.w1", "model.layers.29.block_sparse_moe.experts.108.w1", "model.layers.29.block_sparse_moe.experts.109.w1", "model.layers.29.block_sparse_moe.experts.110.w1", "model.layers.29.block_sparse_moe.experts.111.w1", "model.layers.29.block_sparse_moe.experts.112.w1", "model.layers.29.block_sparse_moe.experts.113.w1", "model.layers.29.block_sparse_moe.experts.114.w1", "model.layers.29.block_sparse_moe.experts.115.w1", "model.layers.29.block_sparse_moe.experts.116.w1", "model.layers.29.block_sparse_moe.experts.117.w1", "model.layers.29.block_sparse_moe.experts.118.w1", "model.layers.29.block_sparse_moe.experts.119.w1", "model.layers.29.block_sparse_moe.experts.120.w1", "model.layers.29.block_sparse_moe.experts.121.w1", "model.layers.29.block_sparse_moe.experts.122.w1", "model.layers.29.block_sparse_moe.experts.123.w1", "model.layers.29.block_sparse_moe.experts.124.w1", "model.layers.29.block_sparse_moe.experts.125.w1", "model.layers.29.block_sparse_moe.experts.126.w1", "model.layers.29.block_sparse_moe.experts.127.w1", "model.layers.29.block_sparse_moe.experts.128.w1", "model.layers.29.block_sparse_moe.experts.129.w1", "model.layers.29.block_sparse_moe.experts.130.w1", "model.layers.29.block_sparse_moe.experts.131.w1", "model.layers.29.block_sparse_moe.experts.132.w1", "model.layers.29.block_sparse_moe.experts.133.w1", "model.layers.29.block_sparse_moe.experts.134.w1", "model.layers.29.block_sparse_moe.experts.135.w1", "model.layers.29.block_sparse_moe.experts.136.w1", "model.layers.29.block_sparse_moe.experts.137.w1", "model.layers.29.block_sparse_moe.experts.138.w1", "model.layers.29.block_sparse_moe.experts.139.w1", "model.layers.29.block_sparse_moe.experts.140.w1", "model.layers.29.block_sparse_moe.experts.141.w1", "model.layers.29.block_sparse_moe.experts.142.w1", "model.layers.29.block_sparse_moe.experts.143.w1", "model.layers.29.block_sparse_moe.experts.144.w1", "model.layers.29.block_sparse_moe.experts.145.w1", "model.layers.29.block_sparse_moe.experts.146.w1", "model.layers.29.block_sparse_moe.experts.147.w1", "model.layers.29.block_sparse_moe.experts.148.w1", "model.layers.29.block_sparse_moe.experts.149.w1", "model.layers.29.block_sparse_moe.experts.150.w1", "model.layers.29.block_sparse_moe.experts.151.w1", "model.layers.29.block_sparse_moe.experts.152.w1", "model.layers.29.block_sparse_moe.experts.153.w1", "model.layers.29.block_sparse_moe.experts.154.w1", "model.layers.29.block_sparse_moe.experts.155.w1", "model.layers.29.block_sparse_moe.experts.156.w1", "model.layers.29.block_sparse_moe.experts.157.w1", "model.layers.29.block_sparse_moe.experts.158.w1", "model.layers.29.block_sparse_moe.experts.159.w1", "model.layers.29.block_sparse_moe.experts.160.w1", "model.layers.29.block_sparse_moe.experts.161.w1", "model.layers.29.block_sparse_moe.experts.162.w1", "model.layers.29.block_sparse_moe.experts.163.w1", "model.layers.29.block_sparse_moe.experts.164.w1", "model.layers.29.block_sparse_moe.experts.165.w1", "model.layers.29.block_sparse_moe.experts.166.w1", "model.layers.29.block_sparse_moe.experts.167.w1", "model.layers.29.block_sparse_moe.experts.168.w1", "model.layers.29.block_sparse_moe.experts.169.w1", "model.layers.29.block_sparse_moe.experts.170.w1", "model.layers.29.block_sparse_moe.experts.171.w1", "model.layers.29.block_sparse_moe.experts.172.w1", "model.layers.29.block_sparse_moe.experts.173.w1", "model.layers.29.block_sparse_moe.experts.174.w1", "model.layers.29.block_sparse_moe.experts.175.w1", "model.layers.29.block_sparse_moe.experts.176.w1", "model.layers.29.block_sparse_moe.experts.177.w1", "model.layers.29.block_sparse_moe.experts.178.w1", "model.layers.29.block_sparse_moe.experts.179.w1", "model.layers.29.block_sparse_moe.experts.180.w1", "model.layers.29.block_sparse_moe.experts.181.w1", "model.layers.29.block_sparse_moe.experts.182.w1", "model.layers.29.block_sparse_moe.experts.183.w1", "model.layers.29.block_sparse_moe.experts.184.w1", "model.layers.29.block_sparse_moe.experts.185.w1", "model.layers.29.block_sparse_moe.experts.186.w1", "model.layers.29.block_sparse_moe.experts.187.w1", "model.layers.29.block_sparse_moe.experts.188.w1", "model.layers.29.block_sparse_moe.experts.189.w1", "model.layers.29.block_sparse_moe.experts.190.w1", "model.layers.29.block_sparse_moe.experts.191.w1", "model.layers.29.block_sparse_moe.experts.192.w1", "model.layers.29.block_sparse_moe.experts.193.w1", "model.layers.29.block_sparse_moe.experts.194.w1", "model.layers.29.block_sparse_moe.experts.195.w1", "model.layers.29.block_sparse_moe.experts.196.w1", "model.layers.29.block_sparse_moe.experts.197.w1", "model.layers.29.block_sparse_moe.experts.198.w1", "model.layers.29.block_sparse_moe.experts.199.w1", "model.layers.29.block_sparse_moe.experts.200.w1", "model.layers.29.block_sparse_moe.experts.201.w1", "model.layers.29.block_sparse_moe.experts.202.w1", "model.layers.29.block_sparse_moe.experts.203.w1", "model.layers.29.block_sparse_moe.experts.204.w1", "model.layers.29.block_sparse_moe.experts.205.w1", "model.layers.29.block_sparse_moe.experts.206.w1", "model.layers.29.block_sparse_moe.experts.207.w1", "model.layers.29.block_sparse_moe.experts.208.w1", "model.layers.29.block_sparse_moe.experts.209.w1", "model.layers.29.block_sparse_moe.experts.210.w1", "model.layers.29.block_sparse_moe.experts.211.w1", "model.layers.29.block_sparse_moe.experts.212.w1", "model.layers.29.block_sparse_moe.experts.213.w1", "model.layers.29.block_sparse_moe.experts.214.w1", "model.layers.29.block_sparse_moe.experts.215.w1", "model.layers.29.block_sparse_moe.experts.216.w1", "model.layers.29.block_sparse_moe.experts.217.w1", "model.layers.29.block_sparse_moe.experts.218.w1", "model.layers.29.block_sparse_moe.experts.219.w1", "model.layers.29.block_sparse_moe.experts.220.w1", "model.layers.29.block_sparse_moe.experts.221.w1", "model.layers.29.block_sparse_moe.experts.222.w1", "model.layers.29.block_sparse_moe.experts.223.w1", "model.layers.29.block_sparse_moe.experts.224.w1", "model.layers.29.block_sparse_moe.experts.225.w1", "model.layers.29.block_sparse_moe.experts.226.w1", "model.layers.29.block_sparse_moe.experts.227.w1", "model.layers.29.block_sparse_moe.experts.228.w1", "model.layers.29.block_sparse_moe.experts.229.w1", "model.layers.29.block_sparse_moe.experts.230.w1", "model.layers.29.block_sparse_moe.experts.231.w1", "model.layers.29.block_sparse_moe.experts.232.w1", "model.layers.29.block_sparse_moe.experts.233.w1", "model.layers.29.block_sparse_moe.experts.234.w1", "model.layers.29.block_sparse_moe.experts.235.w1", "model.layers.29.block_sparse_moe.experts.236.w1", "model.layers.29.block_sparse_moe.experts.237.w1", "model.layers.29.block_sparse_moe.experts.238.w1", "model.layers.29.block_sparse_moe.experts.239.w1", "model.layers.29.block_sparse_moe.experts.240.w1", "model.layers.29.block_sparse_moe.experts.241.w1", "model.layers.29.block_sparse_moe.experts.242.w1", "model.layers.29.block_sparse_moe.experts.243.w1", "model.layers.29.block_sparse_moe.experts.244.w1", "model.layers.29.block_sparse_moe.experts.245.w1", "model.layers.29.block_sparse_moe.experts.246.w1", "model.layers.29.block_sparse_moe.experts.247.w1", "model.layers.29.block_sparse_moe.experts.248.w1", "model.layers.29.block_sparse_moe.experts.249.w1", "model.layers.29.block_sparse_moe.experts.250.w1", "model.layers.29.block_sparse_moe.experts.251.w1", "model.layers.29.block_sparse_moe.experts.252.w1", "model.layers.29.block_sparse_moe.experts.253.w1", "model.layers.29.block_sparse_moe.experts.254.w1", "model.layers.29.block_sparse_moe.experts.255.w1", "model.layers.29.block_sparse_moe.experts.0.w3", "model.layers.29.block_sparse_moe.experts.1.w3", "model.layers.29.block_sparse_moe.experts.2.w3", "model.layers.29.block_sparse_moe.experts.3.w3", "model.layers.29.block_sparse_moe.experts.4.w3", "model.layers.29.block_sparse_moe.experts.5.w3", "model.layers.29.block_sparse_moe.experts.6.w3", "model.layers.29.block_sparse_moe.experts.7.w3", "model.layers.29.block_sparse_moe.experts.8.w3", "model.layers.29.block_sparse_moe.experts.9.w3", "model.layers.29.block_sparse_moe.experts.10.w3", "model.layers.29.block_sparse_moe.experts.11.w3", "model.layers.29.block_sparse_moe.experts.12.w3", "model.layers.29.block_sparse_moe.experts.13.w3", "model.layers.29.block_sparse_moe.experts.14.w3", "model.layers.29.block_sparse_moe.experts.15.w3", "model.layers.29.block_sparse_moe.experts.16.w3", "model.layers.29.block_sparse_moe.experts.17.w3", "model.layers.29.block_sparse_moe.experts.18.w3", "model.layers.29.block_sparse_moe.experts.19.w3", "model.layers.29.block_sparse_moe.experts.20.w3", "model.layers.29.block_sparse_moe.experts.21.w3", "model.layers.29.block_sparse_moe.experts.22.w3", "model.layers.29.block_sparse_moe.experts.23.w3", "model.layers.29.block_sparse_moe.experts.24.w3", "model.layers.29.block_sparse_moe.experts.25.w3", "model.layers.29.block_sparse_moe.experts.26.w3", "model.layers.29.block_sparse_moe.experts.27.w3", "model.layers.29.block_sparse_moe.experts.28.w3", "model.layers.29.block_sparse_moe.experts.29.w3", "model.layers.29.block_sparse_moe.experts.30.w3", "model.layers.29.block_sparse_moe.experts.31.w3", "model.layers.29.block_sparse_moe.experts.32.w3", "model.layers.29.block_sparse_moe.experts.33.w3", "model.layers.29.block_sparse_moe.experts.34.w3", "model.layers.29.block_sparse_moe.experts.35.w3", "model.layers.29.block_sparse_moe.experts.36.w3", "model.layers.29.block_sparse_moe.experts.37.w3", "model.layers.29.block_sparse_moe.experts.38.w3", "model.layers.29.block_sparse_moe.experts.39.w3", "model.layers.29.block_sparse_moe.experts.40.w3", "model.layers.29.block_sparse_moe.experts.41.w3", "model.layers.29.block_sparse_moe.experts.42.w3", "model.layers.29.block_sparse_moe.experts.43.w3", "model.layers.29.block_sparse_moe.experts.44.w3", "model.layers.29.block_sparse_moe.experts.45.w3", "model.layers.29.block_sparse_moe.experts.46.w3", "model.layers.29.block_sparse_moe.experts.47.w3", "model.layers.29.block_sparse_moe.experts.48.w3", "model.layers.29.block_sparse_moe.experts.49.w3", "model.layers.29.block_sparse_moe.experts.50.w3", "model.layers.29.block_sparse_moe.experts.51.w3", "model.layers.29.block_sparse_moe.experts.52.w3", "model.layers.29.block_sparse_moe.experts.53.w3", "model.layers.29.block_sparse_moe.experts.54.w3", "model.layers.29.block_sparse_moe.experts.55.w3", "model.layers.29.block_sparse_moe.experts.56.w3", "model.layers.29.block_sparse_moe.experts.57.w3", "model.layers.29.block_sparse_moe.experts.58.w3", "model.layers.29.block_sparse_moe.experts.59.w3", "model.layers.29.block_sparse_moe.experts.60.w3", "model.layers.29.block_sparse_moe.experts.61.w3", "model.layers.29.block_sparse_moe.experts.62.w3", "model.layers.29.block_sparse_moe.experts.63.w3", "model.layers.29.block_sparse_moe.experts.64.w3", "model.layers.29.block_sparse_moe.experts.65.w3", "model.layers.29.block_sparse_moe.experts.66.w3", "model.layers.29.block_sparse_moe.experts.67.w3", "model.layers.29.block_sparse_moe.experts.68.w3", "model.layers.29.block_sparse_moe.experts.69.w3", "model.layers.29.block_sparse_moe.experts.70.w3", "model.layers.29.block_sparse_moe.experts.71.w3", "model.layers.29.block_sparse_moe.experts.72.w3", "model.layers.29.block_sparse_moe.experts.73.w3", "model.layers.29.block_sparse_moe.experts.74.w3", "model.layers.29.block_sparse_moe.experts.75.w3", "model.layers.29.block_sparse_moe.experts.76.w3", "model.layers.29.block_sparse_moe.experts.77.w3", "model.layers.29.block_sparse_moe.experts.78.w3", "model.layers.29.block_sparse_moe.experts.79.w3", "model.layers.29.block_sparse_moe.experts.80.w3", "model.layers.29.block_sparse_moe.experts.81.w3", "model.layers.29.block_sparse_moe.experts.82.w3", "model.layers.29.block_sparse_moe.experts.83.w3", "model.layers.29.block_sparse_moe.experts.84.w3", "model.layers.29.block_sparse_moe.experts.85.w3", "model.layers.29.block_sparse_moe.experts.86.w3", "model.layers.29.block_sparse_moe.experts.87.w3", "model.layers.29.block_sparse_moe.experts.88.w3", "model.layers.29.block_sparse_moe.experts.89.w3", "model.layers.29.block_sparse_moe.experts.90.w3", "model.layers.29.block_sparse_moe.experts.91.w3", "model.layers.29.block_sparse_moe.experts.92.w3", "model.layers.29.block_sparse_moe.experts.93.w3", "model.layers.29.block_sparse_moe.experts.94.w3", "model.layers.29.block_sparse_moe.experts.95.w3", "model.layers.29.block_sparse_moe.experts.96.w3", "model.layers.29.block_sparse_moe.experts.97.w3", "model.layers.29.block_sparse_moe.experts.98.w3", "model.layers.29.block_sparse_moe.experts.99.w3", "model.layers.29.block_sparse_moe.experts.100.w3", "model.layers.29.block_sparse_moe.experts.101.w3", "model.layers.29.block_sparse_moe.experts.102.w3", "model.layers.29.block_sparse_moe.experts.103.w3", "model.layers.29.block_sparse_moe.experts.104.w3", "model.layers.29.block_sparse_moe.experts.105.w3", "model.layers.29.block_sparse_moe.experts.106.w3", "model.layers.29.block_sparse_moe.experts.107.w3", "model.layers.29.block_sparse_moe.experts.108.w3", "model.layers.29.block_sparse_moe.experts.109.w3", "model.layers.29.block_sparse_moe.experts.110.w3", "model.layers.29.block_sparse_moe.experts.111.w3", "model.layers.29.block_sparse_moe.experts.112.w3", "model.layers.29.block_sparse_moe.experts.113.w3", "model.layers.29.block_sparse_moe.experts.114.w3", "model.layers.29.block_sparse_moe.experts.115.w3", "model.layers.29.block_sparse_moe.experts.116.w3", "model.layers.29.block_sparse_moe.experts.117.w3", "model.layers.29.block_sparse_moe.experts.118.w3", "model.layers.29.block_sparse_moe.experts.119.w3", "model.layers.29.block_sparse_moe.experts.120.w3", "model.layers.29.block_sparse_moe.experts.121.w3", "model.layers.29.block_sparse_moe.experts.122.w3", "model.layers.29.block_sparse_moe.experts.123.w3", "model.layers.29.block_sparse_moe.experts.124.w3", "model.layers.29.block_sparse_moe.experts.125.w3", "model.layers.29.block_sparse_moe.experts.126.w3", "model.layers.29.block_sparse_moe.experts.127.w3", "model.layers.29.block_sparse_moe.experts.128.w3", "model.layers.29.block_sparse_moe.experts.129.w3", "model.layers.29.block_sparse_moe.experts.130.w3", "model.layers.29.block_sparse_moe.experts.131.w3", "model.layers.29.block_sparse_moe.experts.132.w3", "model.layers.29.block_sparse_moe.experts.133.w3", "model.layers.29.block_sparse_moe.experts.134.w3", "model.layers.29.block_sparse_moe.experts.135.w3", "model.layers.29.block_sparse_moe.experts.136.w3", "model.layers.29.block_sparse_moe.experts.137.w3", "model.layers.29.block_sparse_moe.experts.138.w3", "model.layers.29.block_sparse_moe.experts.139.w3", "model.layers.29.block_sparse_moe.experts.140.w3", "model.layers.29.block_sparse_moe.experts.141.w3", "model.layers.29.block_sparse_moe.experts.142.w3", "model.layers.29.block_sparse_moe.experts.143.w3", "model.layers.29.block_sparse_moe.experts.144.w3", "model.layers.29.block_sparse_moe.experts.145.w3", "model.layers.29.block_sparse_moe.experts.146.w3", "model.layers.29.block_sparse_moe.experts.147.w3", "model.layers.29.block_sparse_moe.experts.148.w3", "model.layers.29.block_sparse_moe.experts.149.w3", "model.layers.29.block_sparse_moe.experts.150.w3", "model.layers.29.block_sparse_moe.experts.151.w3", "model.layers.29.block_sparse_moe.experts.152.w3", "model.layers.29.block_sparse_moe.experts.153.w3", "model.layers.29.block_sparse_moe.experts.154.w3", "model.layers.29.block_sparse_moe.experts.155.w3", "model.layers.29.block_sparse_moe.experts.156.w3", "model.layers.29.block_sparse_moe.experts.157.w3", "model.layers.29.block_sparse_moe.experts.158.w3", "model.layers.29.block_sparse_moe.experts.159.w3", "model.layers.29.block_sparse_moe.experts.160.w3", "model.layers.29.block_sparse_moe.experts.161.w3", "model.layers.29.block_sparse_moe.experts.162.w3", "model.layers.29.block_sparse_moe.experts.163.w3", "model.layers.29.block_sparse_moe.experts.164.w3", "model.layers.29.block_sparse_moe.experts.165.w3", "model.layers.29.block_sparse_moe.experts.166.w3", "model.layers.29.block_sparse_moe.experts.167.w3", "model.layers.29.block_sparse_moe.experts.168.w3", "model.layers.29.block_sparse_moe.experts.169.w3", "model.layers.29.block_sparse_moe.experts.170.w3", "model.layers.29.block_sparse_moe.experts.171.w3", "model.layers.29.block_sparse_moe.experts.172.w3", "model.layers.29.block_sparse_moe.experts.173.w3", "model.layers.29.block_sparse_moe.experts.174.w3", "model.layers.29.block_sparse_moe.experts.175.w3", "model.layers.29.block_sparse_moe.experts.176.w3", "model.layers.29.block_sparse_moe.experts.177.w3", "model.layers.29.block_sparse_moe.experts.178.w3", "model.layers.29.block_sparse_moe.experts.179.w3", "model.layers.29.block_sparse_moe.experts.180.w3", "model.layers.29.block_sparse_moe.experts.181.w3", "model.layers.29.block_sparse_moe.experts.182.w3", "model.layers.29.block_sparse_moe.experts.183.w3", "model.layers.29.block_sparse_moe.experts.184.w3", "model.layers.29.block_sparse_moe.experts.185.w3", "model.layers.29.block_sparse_moe.experts.186.w3", "model.layers.29.block_sparse_moe.experts.187.w3", "model.layers.29.block_sparse_moe.experts.188.w3", "model.layers.29.block_sparse_moe.experts.189.w3", "model.layers.29.block_sparse_moe.experts.190.w3", "model.layers.29.block_sparse_moe.experts.191.w3", "model.layers.29.block_sparse_moe.experts.192.w3", "model.layers.29.block_sparse_moe.experts.193.w3", "model.layers.29.block_sparse_moe.experts.194.w3", "model.layers.29.block_sparse_moe.experts.195.w3", "model.layers.29.block_sparse_moe.experts.196.w3", "model.layers.29.block_sparse_moe.experts.197.w3", "model.layers.29.block_sparse_moe.experts.198.w3", "model.layers.29.block_sparse_moe.experts.199.w3", "model.layers.29.block_sparse_moe.experts.200.w3", "model.layers.29.block_sparse_moe.experts.201.w3", "model.layers.29.block_sparse_moe.experts.202.w3", "model.layers.29.block_sparse_moe.experts.203.w3", "model.layers.29.block_sparse_moe.experts.204.w3", "model.layers.29.block_sparse_moe.experts.205.w3", "model.layers.29.block_sparse_moe.experts.206.w3", "model.layers.29.block_sparse_moe.experts.207.w3", "model.layers.29.block_sparse_moe.experts.208.w3", "model.layers.29.block_sparse_moe.experts.209.w3", "model.layers.29.block_sparse_moe.experts.210.w3", "model.layers.29.block_sparse_moe.experts.211.w3", "model.layers.29.block_sparse_moe.experts.212.w3", "model.layers.29.block_sparse_moe.experts.213.w3", "model.layers.29.block_sparse_moe.experts.214.w3", "model.layers.29.block_sparse_moe.experts.215.w3", "model.layers.29.block_sparse_moe.experts.216.w3", "model.layers.29.block_sparse_moe.experts.217.w3", "model.layers.29.block_sparse_moe.experts.218.w3", "model.layers.29.block_sparse_moe.experts.219.w3", "model.layers.29.block_sparse_moe.experts.220.w3", "model.layers.29.block_sparse_moe.experts.221.w3", "model.layers.29.block_sparse_moe.experts.222.w3", "model.layers.29.block_sparse_moe.experts.223.w3", "model.layers.29.block_sparse_moe.experts.224.w3", "model.layers.29.block_sparse_moe.experts.225.w3", "model.layers.29.block_sparse_moe.experts.226.w3", "model.layers.29.block_sparse_moe.experts.227.w3", "model.layers.29.block_sparse_moe.experts.228.w3", "model.layers.29.block_sparse_moe.experts.229.w3", "model.layers.29.block_sparse_moe.experts.230.w3", "model.layers.29.block_sparse_moe.experts.231.w3", "model.layers.29.block_sparse_moe.experts.232.w3", "model.layers.29.block_sparse_moe.experts.233.w3", "model.layers.29.block_sparse_moe.experts.234.w3", "model.layers.29.block_sparse_moe.experts.235.w3", "model.layers.29.block_sparse_moe.experts.236.w3", "model.layers.29.block_sparse_moe.experts.237.w3", "model.layers.29.block_sparse_moe.experts.238.w3", "model.layers.29.block_sparse_moe.experts.239.w3", "model.layers.29.block_sparse_moe.experts.240.w3", "model.layers.29.block_sparse_moe.experts.241.w3", "model.layers.29.block_sparse_moe.experts.242.w3", "model.layers.29.block_sparse_moe.experts.243.w3", "model.layers.29.block_sparse_moe.experts.244.w3", "model.layers.29.block_sparse_moe.experts.245.w3", "model.layers.29.block_sparse_moe.experts.246.w3", "model.layers.29.block_sparse_moe.experts.247.w3", "model.layers.29.block_sparse_moe.experts.248.w3", "model.layers.29.block_sparse_moe.experts.249.w3", "model.layers.29.block_sparse_moe.experts.250.w3", "model.layers.29.block_sparse_moe.experts.251.w3", "model.layers.29.block_sparse_moe.experts.252.w3", "model.layers.29.block_sparse_moe.experts.253.w3", "model.layers.29.block_sparse_moe.experts.254.w3", "model.layers.29.block_sparse_moe.experts.255.w3", "model.layers.29.block_sparse_moe.experts.0.w2", "model.layers.29.block_sparse_moe.experts.1.w2", "model.layers.29.block_sparse_moe.experts.2.w2", "model.layers.29.block_sparse_moe.experts.3.w2", "model.layers.29.block_sparse_moe.experts.4.w2", "model.layers.29.block_sparse_moe.experts.5.w2", "model.layers.29.block_sparse_moe.experts.6.w2", "model.layers.29.block_sparse_moe.experts.7.w2", "model.layers.29.block_sparse_moe.experts.8.w2", "model.layers.29.block_sparse_moe.experts.9.w2", "model.layers.29.block_sparse_moe.experts.10.w2", "model.layers.29.block_sparse_moe.experts.11.w2", "model.layers.29.block_sparse_moe.experts.12.w2", "model.layers.29.block_sparse_moe.experts.13.w2", "model.layers.29.block_sparse_moe.experts.14.w2", "model.layers.29.block_sparse_moe.experts.15.w2", "model.layers.29.block_sparse_moe.experts.16.w2", "model.layers.29.block_sparse_moe.experts.17.w2", "model.layers.29.block_sparse_moe.experts.18.w2", "model.layers.29.block_sparse_moe.experts.19.w2", "model.layers.29.block_sparse_moe.experts.20.w2", "model.layers.29.block_sparse_moe.experts.21.w2", "model.layers.29.block_sparse_moe.experts.22.w2", "model.layers.29.block_sparse_moe.experts.23.w2", "model.layers.29.block_sparse_moe.experts.24.w2", "model.layers.29.block_sparse_moe.experts.25.w2", "model.layers.29.block_sparse_moe.experts.26.w2", "model.layers.29.block_sparse_moe.experts.27.w2", "model.layers.29.block_sparse_moe.experts.28.w2", "model.layers.29.block_sparse_moe.experts.29.w2", "model.layers.29.block_sparse_moe.experts.30.w2", "model.layers.29.block_sparse_moe.experts.31.w2", "model.layers.29.block_sparse_moe.experts.32.w2", "model.layers.29.block_sparse_moe.experts.33.w2", "model.layers.29.block_sparse_moe.experts.34.w2", "model.layers.29.block_sparse_moe.experts.35.w2", "model.layers.29.block_sparse_moe.experts.36.w2", "model.layers.29.block_sparse_moe.experts.37.w2", "model.layers.29.block_sparse_moe.experts.38.w2", "model.layers.29.block_sparse_moe.experts.39.w2", "model.layers.29.block_sparse_moe.experts.40.w2", "model.layers.29.block_sparse_moe.experts.41.w2", "model.layers.29.block_sparse_moe.experts.42.w2", "model.layers.29.block_sparse_moe.experts.43.w2", "model.layers.29.block_sparse_moe.experts.44.w2", "model.layers.29.block_sparse_moe.experts.45.w2", "model.layers.29.block_sparse_moe.experts.46.w2", "model.layers.29.block_sparse_moe.experts.47.w2", "model.layers.29.block_sparse_moe.experts.48.w2", "model.layers.29.block_sparse_moe.experts.49.w2", "model.layers.29.block_sparse_moe.experts.50.w2", "model.layers.29.block_sparse_moe.experts.51.w2", "model.layers.29.block_sparse_moe.experts.52.w2", "model.layers.29.block_sparse_moe.experts.53.w2", "model.layers.29.block_sparse_moe.experts.54.w2", "model.layers.29.block_sparse_moe.experts.55.w2", "model.layers.29.block_sparse_moe.experts.56.w2", "model.layers.29.block_sparse_moe.experts.57.w2", "model.layers.29.block_sparse_moe.experts.58.w2", "model.layers.29.block_sparse_moe.experts.59.w2", "model.layers.29.block_sparse_moe.experts.60.w2", "model.layers.29.block_sparse_moe.experts.61.w2", "model.layers.29.block_sparse_moe.experts.62.w2", "model.layers.29.block_sparse_moe.experts.63.w2", "model.layers.29.block_sparse_moe.experts.64.w2", "model.layers.29.block_sparse_moe.experts.65.w2", "model.layers.29.block_sparse_moe.experts.66.w2", "model.layers.29.block_sparse_moe.experts.67.w2", "model.layers.29.block_sparse_moe.experts.68.w2", "model.layers.29.block_sparse_moe.experts.69.w2", "model.layers.29.block_sparse_moe.experts.70.w2", "model.layers.29.block_sparse_moe.experts.71.w2", "model.layers.29.block_sparse_moe.experts.72.w2", "model.layers.29.block_sparse_moe.experts.73.w2", "model.layers.29.block_sparse_moe.experts.74.w2", "model.layers.29.block_sparse_moe.experts.75.w2", "model.layers.29.block_sparse_moe.experts.76.w2", "model.layers.29.block_sparse_moe.experts.77.w2", "model.layers.29.block_sparse_moe.experts.78.w2", "model.layers.29.block_sparse_moe.experts.79.w2", "model.layers.29.block_sparse_moe.experts.80.w2", "model.layers.29.block_sparse_moe.experts.81.w2", "model.layers.29.block_sparse_moe.experts.82.w2", "model.layers.29.block_sparse_moe.experts.83.w2", "model.layers.29.block_sparse_moe.experts.84.w2", "model.layers.29.block_sparse_moe.experts.85.w2", "model.layers.29.block_sparse_moe.experts.86.w2", "model.layers.29.block_sparse_moe.experts.87.w2", "model.layers.29.block_sparse_moe.experts.88.w2", "model.layers.29.block_sparse_moe.experts.89.w2", "model.layers.29.block_sparse_moe.experts.90.w2", "model.layers.29.block_sparse_moe.experts.91.w2", "model.layers.29.block_sparse_moe.experts.92.w2", "model.layers.29.block_sparse_moe.experts.93.w2", "model.layers.29.block_sparse_moe.experts.94.w2", "model.layers.29.block_sparse_moe.experts.95.w2", "model.layers.29.block_sparse_moe.experts.96.w2", "model.layers.29.block_sparse_moe.experts.97.w2", "model.layers.29.block_sparse_moe.experts.98.w2", "model.layers.29.block_sparse_moe.experts.99.w2", "model.layers.29.block_sparse_moe.experts.100.w2", "model.layers.29.block_sparse_moe.experts.101.w2", "model.layers.29.block_sparse_moe.experts.102.w2", "model.layers.29.block_sparse_moe.experts.103.w2", "model.layers.29.block_sparse_moe.experts.104.w2", "model.layers.29.block_sparse_moe.experts.105.w2", "model.layers.29.block_sparse_moe.experts.106.w2", "model.layers.29.block_sparse_moe.experts.107.w2", "model.layers.29.block_sparse_moe.experts.108.w2", "model.layers.29.block_sparse_moe.experts.109.w2", "model.layers.29.block_sparse_moe.experts.110.w2", "model.layers.29.block_sparse_moe.experts.111.w2", "model.layers.29.block_sparse_moe.experts.112.w2", "model.layers.29.block_sparse_moe.experts.113.w2", "model.layers.29.block_sparse_moe.experts.114.w2", "model.layers.29.block_sparse_moe.experts.115.w2", "model.layers.29.block_sparse_moe.experts.116.w2", "model.layers.29.block_sparse_moe.experts.117.w2", "model.layers.29.block_sparse_moe.experts.118.w2", "model.layers.29.block_sparse_moe.experts.119.w2", "model.layers.29.block_sparse_moe.experts.120.w2", "model.layers.29.block_sparse_moe.experts.121.w2", "model.layers.29.block_sparse_moe.experts.122.w2", "model.layers.29.block_sparse_moe.experts.123.w2", "model.layers.29.block_sparse_moe.experts.124.w2", "model.layers.29.block_sparse_moe.experts.125.w2", "model.layers.29.block_sparse_moe.experts.126.w2", "model.layers.29.block_sparse_moe.experts.127.w2", "model.layers.29.block_sparse_moe.experts.128.w2", "model.layers.29.block_sparse_moe.experts.129.w2", "model.layers.29.block_sparse_moe.experts.130.w2", "model.layers.29.block_sparse_moe.experts.131.w2", "model.layers.29.block_sparse_moe.experts.132.w2", "model.layers.29.block_sparse_moe.experts.133.w2", "model.layers.29.block_sparse_moe.experts.134.w2", "model.layers.29.block_sparse_moe.experts.135.w2", "model.layers.29.block_sparse_moe.experts.136.w2", "model.layers.29.block_sparse_moe.experts.137.w2", "model.layers.29.block_sparse_moe.experts.138.w2", "model.layers.29.block_sparse_moe.experts.139.w2", "model.layers.29.block_sparse_moe.experts.140.w2", "model.layers.29.block_sparse_moe.experts.141.w2", "model.layers.29.block_sparse_moe.experts.142.w2", "model.layers.29.block_sparse_moe.experts.143.w2", "model.layers.29.block_sparse_moe.experts.144.w2", "model.layers.29.block_sparse_moe.experts.145.w2", "model.layers.29.block_sparse_moe.experts.146.w2", "model.layers.29.block_sparse_moe.experts.147.w2", "model.layers.29.block_sparse_moe.experts.148.w2", "model.layers.29.block_sparse_moe.experts.149.w2", "model.layers.29.block_sparse_moe.experts.150.w2", "model.layers.29.block_sparse_moe.experts.151.w2", "model.layers.29.block_sparse_moe.experts.152.w2", "model.layers.29.block_sparse_moe.experts.153.w2", "model.layers.29.block_sparse_moe.experts.154.w2", "model.layers.29.block_sparse_moe.experts.155.w2", "model.layers.29.block_sparse_moe.experts.156.w2", "model.layers.29.block_sparse_moe.experts.157.w2", "model.layers.29.block_sparse_moe.experts.158.w2", "model.layers.29.block_sparse_moe.experts.159.w2", "model.layers.29.block_sparse_moe.experts.160.w2", "model.layers.29.block_sparse_moe.experts.161.w2", "model.layers.29.block_sparse_moe.experts.162.w2", "model.layers.29.block_sparse_moe.experts.163.w2", "model.layers.29.block_sparse_moe.experts.164.w2", "model.layers.29.block_sparse_moe.experts.165.w2", "model.layers.29.block_sparse_moe.experts.166.w2", "model.layers.29.block_sparse_moe.experts.167.w2", "model.layers.29.block_sparse_moe.experts.168.w2", "model.layers.29.block_sparse_moe.experts.169.w2", "model.layers.29.block_sparse_moe.experts.170.w2", "model.layers.29.block_sparse_moe.experts.171.w2", "model.layers.29.block_sparse_moe.experts.172.w2", "model.layers.29.block_sparse_moe.experts.173.w2", "model.layers.29.block_sparse_moe.experts.174.w2", "model.layers.29.block_sparse_moe.experts.175.w2", "model.layers.29.block_sparse_moe.experts.176.w2", "model.layers.29.block_sparse_moe.experts.177.w2", "model.layers.29.block_sparse_moe.experts.178.w2", "model.layers.29.block_sparse_moe.experts.179.w2", "model.layers.29.block_sparse_moe.experts.180.w2", "model.layers.29.block_sparse_moe.experts.181.w2", "model.layers.29.block_sparse_moe.experts.182.w2", "model.layers.29.block_sparse_moe.experts.183.w2", "model.layers.29.block_sparse_moe.experts.184.w2", "model.layers.29.block_sparse_moe.experts.185.w2", "model.layers.29.block_sparse_moe.experts.186.w2", "model.layers.29.block_sparse_moe.experts.187.w2", "model.layers.29.block_sparse_moe.experts.188.w2", "model.layers.29.block_sparse_moe.experts.189.w2", "model.layers.29.block_sparse_moe.experts.190.w2", "model.layers.29.block_sparse_moe.experts.191.w2", "model.layers.29.block_sparse_moe.experts.192.w2", "model.layers.29.block_sparse_moe.experts.193.w2", "model.layers.29.block_sparse_moe.experts.194.w2", "model.layers.29.block_sparse_moe.experts.195.w2", "model.layers.29.block_sparse_moe.experts.196.w2", "model.layers.29.block_sparse_moe.experts.197.w2", "model.layers.29.block_sparse_moe.experts.198.w2", "model.layers.29.block_sparse_moe.experts.199.w2", "model.layers.29.block_sparse_moe.experts.200.w2", "model.layers.29.block_sparse_moe.experts.201.w2", "model.layers.29.block_sparse_moe.experts.202.w2", "model.layers.29.block_sparse_moe.experts.203.w2", "model.layers.29.block_sparse_moe.experts.204.w2", "model.layers.29.block_sparse_moe.experts.205.w2", "model.layers.29.block_sparse_moe.experts.206.w2", "model.layers.29.block_sparse_moe.experts.207.w2", "model.layers.29.block_sparse_moe.experts.208.w2", "model.layers.29.block_sparse_moe.experts.209.w2", "model.layers.29.block_sparse_moe.experts.210.w2", "model.layers.29.block_sparse_moe.experts.211.w2", "model.layers.29.block_sparse_moe.experts.212.w2", "model.layers.29.block_sparse_moe.experts.213.w2", "model.layers.29.block_sparse_moe.experts.214.w2", "model.layers.29.block_sparse_moe.experts.215.w2", "model.layers.29.block_sparse_moe.experts.216.w2", "model.layers.29.block_sparse_moe.experts.217.w2", "model.layers.29.block_sparse_moe.experts.218.w2", "model.layers.29.block_sparse_moe.experts.219.w2", "model.layers.29.block_sparse_moe.experts.220.w2", "model.layers.29.block_sparse_moe.experts.221.w2", "model.layers.29.block_sparse_moe.experts.222.w2", "model.layers.29.block_sparse_moe.experts.223.w2", "model.layers.29.block_sparse_moe.experts.224.w2", "model.layers.29.block_sparse_moe.experts.225.w2", "model.layers.29.block_sparse_moe.experts.226.w2", "model.layers.29.block_sparse_moe.experts.227.w2", "model.layers.29.block_sparse_moe.experts.228.w2", "model.layers.29.block_sparse_moe.experts.229.w2", "model.layers.29.block_sparse_moe.experts.230.w2", "model.layers.29.block_sparse_moe.experts.231.w2", "model.layers.29.block_sparse_moe.experts.232.w2", "model.layers.29.block_sparse_moe.experts.233.w2", "model.layers.29.block_sparse_moe.experts.234.w2", "model.layers.29.block_sparse_moe.experts.235.w2", "model.layers.29.block_sparse_moe.experts.236.w2", "model.layers.29.block_sparse_moe.experts.237.w2", "model.layers.29.block_sparse_moe.experts.238.w2", "model.layers.29.block_sparse_moe.experts.239.w2", "model.layers.29.block_sparse_moe.experts.240.w2", "model.layers.29.block_sparse_moe.experts.241.w2", "model.layers.29.block_sparse_moe.experts.242.w2", "model.layers.29.block_sparse_moe.experts.243.w2", "model.layers.29.block_sparse_moe.experts.244.w2", "model.layers.29.block_sparse_moe.experts.245.w2", "model.layers.29.block_sparse_moe.experts.246.w2", "model.layers.29.block_sparse_moe.experts.247.w2", "model.layers.29.block_sparse_moe.experts.248.w2", "model.layers.29.block_sparse_moe.experts.249.w2", "model.layers.29.block_sparse_moe.experts.250.w2", "model.layers.29.block_sparse_moe.experts.251.w2", "model.layers.29.block_sparse_moe.experts.252.w2", "model.layers.29.block_sparse_moe.experts.253.w2", "model.layers.29.block_sparse_moe.experts.254.w2", "model.layers.29.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0023641226813196914, "dbits": 3623878656 } ] }, { "idx": 60, "layers": [ "model.layers.30.self_attn.q_proj", "model.layers.30.self_attn.k_proj", "model.layers.30.self_attn.v_proj", "model.layers.30.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0022678978741169087, "dbits": 44040192 } ] }, { "idx": 61, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w1", "model.layers.30.block_sparse_moe.experts.1.w1", "model.layers.30.block_sparse_moe.experts.2.w1", "model.layers.30.block_sparse_moe.experts.3.w1", "model.layers.30.block_sparse_moe.experts.4.w1", "model.layers.30.block_sparse_moe.experts.5.w1", "model.layers.30.block_sparse_moe.experts.6.w1", "model.layers.30.block_sparse_moe.experts.7.w1", "model.layers.30.block_sparse_moe.experts.8.w1", "model.layers.30.block_sparse_moe.experts.9.w1", "model.layers.30.block_sparse_moe.experts.10.w1", "model.layers.30.block_sparse_moe.experts.11.w1", "model.layers.30.block_sparse_moe.experts.12.w1", "model.layers.30.block_sparse_moe.experts.13.w1", "model.layers.30.block_sparse_moe.experts.14.w1", "model.layers.30.block_sparse_moe.experts.15.w1", "model.layers.30.block_sparse_moe.experts.16.w1", "model.layers.30.block_sparse_moe.experts.17.w1", "model.layers.30.block_sparse_moe.experts.18.w1", "model.layers.30.block_sparse_moe.experts.19.w1", "model.layers.30.block_sparse_moe.experts.20.w1", "model.layers.30.block_sparse_moe.experts.21.w1", "model.layers.30.block_sparse_moe.experts.22.w1", "model.layers.30.block_sparse_moe.experts.23.w1", "model.layers.30.block_sparse_moe.experts.24.w1", "model.layers.30.block_sparse_moe.experts.25.w1", "model.layers.30.block_sparse_moe.experts.26.w1", "model.layers.30.block_sparse_moe.experts.27.w1", "model.layers.30.block_sparse_moe.experts.28.w1", "model.layers.30.block_sparse_moe.experts.29.w1", "model.layers.30.block_sparse_moe.experts.30.w1", "model.layers.30.block_sparse_moe.experts.31.w1", "model.layers.30.block_sparse_moe.experts.32.w1", "model.layers.30.block_sparse_moe.experts.33.w1", "model.layers.30.block_sparse_moe.experts.34.w1", "model.layers.30.block_sparse_moe.experts.35.w1", "model.layers.30.block_sparse_moe.experts.36.w1", "model.layers.30.block_sparse_moe.experts.37.w1", "model.layers.30.block_sparse_moe.experts.38.w1", "model.layers.30.block_sparse_moe.experts.39.w1", "model.layers.30.block_sparse_moe.experts.40.w1", "model.layers.30.block_sparse_moe.experts.41.w1", "model.layers.30.block_sparse_moe.experts.42.w1", "model.layers.30.block_sparse_moe.experts.43.w1", "model.layers.30.block_sparse_moe.experts.44.w1", "model.layers.30.block_sparse_moe.experts.45.w1", "model.layers.30.block_sparse_moe.experts.46.w1", "model.layers.30.block_sparse_moe.experts.47.w1", "model.layers.30.block_sparse_moe.experts.48.w1", "model.layers.30.block_sparse_moe.experts.49.w1", "model.layers.30.block_sparse_moe.experts.50.w1", "model.layers.30.block_sparse_moe.experts.51.w1", "model.layers.30.block_sparse_moe.experts.52.w1", "model.layers.30.block_sparse_moe.experts.53.w1", "model.layers.30.block_sparse_moe.experts.54.w1", "model.layers.30.block_sparse_moe.experts.55.w1", "model.layers.30.block_sparse_moe.experts.56.w1", "model.layers.30.block_sparse_moe.experts.57.w1", "model.layers.30.block_sparse_moe.experts.58.w1", "model.layers.30.block_sparse_moe.experts.59.w1", "model.layers.30.block_sparse_moe.experts.60.w1", "model.layers.30.block_sparse_moe.experts.61.w1", "model.layers.30.block_sparse_moe.experts.62.w1", "model.layers.30.block_sparse_moe.experts.63.w1", "model.layers.30.block_sparse_moe.experts.64.w1", "model.layers.30.block_sparse_moe.experts.65.w1", "model.layers.30.block_sparse_moe.experts.66.w1", "model.layers.30.block_sparse_moe.experts.67.w1", "model.layers.30.block_sparse_moe.experts.68.w1", "model.layers.30.block_sparse_moe.experts.69.w1", "model.layers.30.block_sparse_moe.experts.70.w1", "model.layers.30.block_sparse_moe.experts.71.w1", "model.layers.30.block_sparse_moe.experts.72.w1", "model.layers.30.block_sparse_moe.experts.73.w1", "model.layers.30.block_sparse_moe.experts.74.w1", "model.layers.30.block_sparse_moe.experts.75.w1", "model.layers.30.block_sparse_moe.experts.76.w1", "model.layers.30.block_sparse_moe.experts.77.w1", "model.layers.30.block_sparse_moe.experts.78.w1", "model.layers.30.block_sparse_moe.experts.79.w1", "model.layers.30.block_sparse_moe.experts.80.w1", "model.layers.30.block_sparse_moe.experts.81.w1", "model.layers.30.block_sparse_moe.experts.82.w1", "model.layers.30.block_sparse_moe.experts.83.w1", "model.layers.30.block_sparse_moe.experts.84.w1", "model.layers.30.block_sparse_moe.experts.85.w1", "model.layers.30.block_sparse_moe.experts.86.w1", "model.layers.30.block_sparse_moe.experts.87.w1", "model.layers.30.block_sparse_moe.experts.88.w1", "model.layers.30.block_sparse_moe.experts.89.w1", "model.layers.30.block_sparse_moe.experts.90.w1", "model.layers.30.block_sparse_moe.experts.91.w1", "model.layers.30.block_sparse_moe.experts.92.w1", "model.layers.30.block_sparse_moe.experts.93.w1", "model.layers.30.block_sparse_moe.experts.94.w1", "model.layers.30.block_sparse_moe.experts.95.w1", "model.layers.30.block_sparse_moe.experts.96.w1", "model.layers.30.block_sparse_moe.experts.97.w1", "model.layers.30.block_sparse_moe.experts.98.w1", "model.layers.30.block_sparse_moe.experts.99.w1", "model.layers.30.block_sparse_moe.experts.100.w1", "model.layers.30.block_sparse_moe.experts.101.w1", "model.layers.30.block_sparse_moe.experts.102.w1", "model.layers.30.block_sparse_moe.experts.103.w1", "model.layers.30.block_sparse_moe.experts.104.w1", "model.layers.30.block_sparse_moe.experts.105.w1", "model.layers.30.block_sparse_moe.experts.106.w1", "model.layers.30.block_sparse_moe.experts.107.w1", "model.layers.30.block_sparse_moe.experts.108.w1", "model.layers.30.block_sparse_moe.experts.109.w1", "model.layers.30.block_sparse_moe.experts.110.w1", "model.layers.30.block_sparse_moe.experts.111.w1", "model.layers.30.block_sparse_moe.experts.112.w1", "model.layers.30.block_sparse_moe.experts.113.w1", "model.layers.30.block_sparse_moe.experts.114.w1", "model.layers.30.block_sparse_moe.experts.115.w1", "model.layers.30.block_sparse_moe.experts.116.w1", "model.layers.30.block_sparse_moe.experts.117.w1", "model.layers.30.block_sparse_moe.experts.118.w1", "model.layers.30.block_sparse_moe.experts.119.w1", "model.layers.30.block_sparse_moe.experts.120.w1", "model.layers.30.block_sparse_moe.experts.121.w1", "model.layers.30.block_sparse_moe.experts.122.w1", "model.layers.30.block_sparse_moe.experts.123.w1", "model.layers.30.block_sparse_moe.experts.124.w1", "model.layers.30.block_sparse_moe.experts.125.w1", "model.layers.30.block_sparse_moe.experts.126.w1", "model.layers.30.block_sparse_moe.experts.127.w1", "model.layers.30.block_sparse_moe.experts.128.w1", "model.layers.30.block_sparse_moe.experts.129.w1", "model.layers.30.block_sparse_moe.experts.130.w1", "model.layers.30.block_sparse_moe.experts.131.w1", "model.layers.30.block_sparse_moe.experts.132.w1", "model.layers.30.block_sparse_moe.experts.133.w1", "model.layers.30.block_sparse_moe.experts.134.w1", "model.layers.30.block_sparse_moe.experts.135.w1", "model.layers.30.block_sparse_moe.experts.136.w1", "model.layers.30.block_sparse_moe.experts.137.w1", "model.layers.30.block_sparse_moe.experts.138.w1", "model.layers.30.block_sparse_moe.experts.139.w1", "model.layers.30.block_sparse_moe.experts.140.w1", "model.layers.30.block_sparse_moe.experts.141.w1", "model.layers.30.block_sparse_moe.experts.142.w1", "model.layers.30.block_sparse_moe.experts.143.w1", "model.layers.30.block_sparse_moe.experts.144.w1", "model.layers.30.block_sparse_moe.experts.145.w1", "model.layers.30.block_sparse_moe.experts.146.w1", "model.layers.30.block_sparse_moe.experts.147.w1", "model.layers.30.block_sparse_moe.experts.148.w1", "model.layers.30.block_sparse_moe.experts.149.w1", "model.layers.30.block_sparse_moe.experts.150.w1", "model.layers.30.block_sparse_moe.experts.151.w1", "model.layers.30.block_sparse_moe.experts.152.w1", "model.layers.30.block_sparse_moe.experts.153.w1", "model.layers.30.block_sparse_moe.experts.154.w1", "model.layers.30.block_sparse_moe.experts.155.w1", "model.layers.30.block_sparse_moe.experts.156.w1", "model.layers.30.block_sparse_moe.experts.157.w1", "model.layers.30.block_sparse_moe.experts.158.w1", "model.layers.30.block_sparse_moe.experts.159.w1", "model.layers.30.block_sparse_moe.experts.160.w1", "model.layers.30.block_sparse_moe.experts.161.w1", "model.layers.30.block_sparse_moe.experts.162.w1", "model.layers.30.block_sparse_moe.experts.163.w1", "model.layers.30.block_sparse_moe.experts.164.w1", "model.layers.30.block_sparse_moe.experts.165.w1", "model.layers.30.block_sparse_moe.experts.166.w1", "model.layers.30.block_sparse_moe.experts.167.w1", "model.layers.30.block_sparse_moe.experts.168.w1", "model.layers.30.block_sparse_moe.experts.169.w1", "model.layers.30.block_sparse_moe.experts.170.w1", "model.layers.30.block_sparse_moe.experts.171.w1", "model.layers.30.block_sparse_moe.experts.172.w1", "model.layers.30.block_sparse_moe.experts.173.w1", "model.layers.30.block_sparse_moe.experts.174.w1", "model.layers.30.block_sparse_moe.experts.175.w1", "model.layers.30.block_sparse_moe.experts.176.w1", "model.layers.30.block_sparse_moe.experts.177.w1", "model.layers.30.block_sparse_moe.experts.178.w1", "model.layers.30.block_sparse_moe.experts.179.w1", "model.layers.30.block_sparse_moe.experts.180.w1", "model.layers.30.block_sparse_moe.experts.181.w1", "model.layers.30.block_sparse_moe.experts.182.w1", "model.layers.30.block_sparse_moe.experts.183.w1", "model.layers.30.block_sparse_moe.experts.184.w1", "model.layers.30.block_sparse_moe.experts.185.w1", "model.layers.30.block_sparse_moe.experts.186.w1", "model.layers.30.block_sparse_moe.experts.187.w1", "model.layers.30.block_sparse_moe.experts.188.w1", "model.layers.30.block_sparse_moe.experts.189.w1", "model.layers.30.block_sparse_moe.experts.190.w1", "model.layers.30.block_sparse_moe.experts.191.w1", "model.layers.30.block_sparse_moe.experts.192.w1", "model.layers.30.block_sparse_moe.experts.193.w1", "model.layers.30.block_sparse_moe.experts.194.w1", "model.layers.30.block_sparse_moe.experts.195.w1", "model.layers.30.block_sparse_moe.experts.196.w1", "model.layers.30.block_sparse_moe.experts.197.w1", "model.layers.30.block_sparse_moe.experts.198.w1", "model.layers.30.block_sparse_moe.experts.199.w1", "model.layers.30.block_sparse_moe.experts.200.w1", "model.layers.30.block_sparse_moe.experts.201.w1", "model.layers.30.block_sparse_moe.experts.202.w1", "model.layers.30.block_sparse_moe.experts.203.w1", "model.layers.30.block_sparse_moe.experts.204.w1", "model.layers.30.block_sparse_moe.experts.205.w1", "model.layers.30.block_sparse_moe.experts.206.w1", "model.layers.30.block_sparse_moe.experts.207.w1", "model.layers.30.block_sparse_moe.experts.208.w1", "model.layers.30.block_sparse_moe.experts.209.w1", "model.layers.30.block_sparse_moe.experts.210.w1", "model.layers.30.block_sparse_moe.experts.211.w1", "model.layers.30.block_sparse_moe.experts.212.w1", "model.layers.30.block_sparse_moe.experts.213.w1", "model.layers.30.block_sparse_moe.experts.214.w1", "model.layers.30.block_sparse_moe.experts.215.w1", "model.layers.30.block_sparse_moe.experts.216.w1", "model.layers.30.block_sparse_moe.experts.217.w1", "model.layers.30.block_sparse_moe.experts.218.w1", "model.layers.30.block_sparse_moe.experts.219.w1", "model.layers.30.block_sparse_moe.experts.220.w1", "model.layers.30.block_sparse_moe.experts.221.w1", "model.layers.30.block_sparse_moe.experts.222.w1", "model.layers.30.block_sparse_moe.experts.223.w1", "model.layers.30.block_sparse_moe.experts.224.w1", "model.layers.30.block_sparse_moe.experts.225.w1", "model.layers.30.block_sparse_moe.experts.226.w1", "model.layers.30.block_sparse_moe.experts.227.w1", "model.layers.30.block_sparse_moe.experts.228.w1", "model.layers.30.block_sparse_moe.experts.229.w1", "model.layers.30.block_sparse_moe.experts.230.w1", "model.layers.30.block_sparse_moe.experts.231.w1", "model.layers.30.block_sparse_moe.experts.232.w1", "model.layers.30.block_sparse_moe.experts.233.w1", "model.layers.30.block_sparse_moe.experts.234.w1", "model.layers.30.block_sparse_moe.experts.235.w1", "model.layers.30.block_sparse_moe.experts.236.w1", "model.layers.30.block_sparse_moe.experts.237.w1", "model.layers.30.block_sparse_moe.experts.238.w1", "model.layers.30.block_sparse_moe.experts.239.w1", "model.layers.30.block_sparse_moe.experts.240.w1", "model.layers.30.block_sparse_moe.experts.241.w1", "model.layers.30.block_sparse_moe.experts.242.w1", "model.layers.30.block_sparse_moe.experts.243.w1", "model.layers.30.block_sparse_moe.experts.244.w1", "model.layers.30.block_sparse_moe.experts.245.w1", "model.layers.30.block_sparse_moe.experts.246.w1", "model.layers.30.block_sparse_moe.experts.247.w1", "model.layers.30.block_sparse_moe.experts.248.w1", "model.layers.30.block_sparse_moe.experts.249.w1", "model.layers.30.block_sparse_moe.experts.250.w1", "model.layers.30.block_sparse_moe.experts.251.w1", "model.layers.30.block_sparse_moe.experts.252.w1", "model.layers.30.block_sparse_moe.experts.253.w1", "model.layers.30.block_sparse_moe.experts.254.w1", "model.layers.30.block_sparse_moe.experts.255.w1", "model.layers.30.block_sparse_moe.experts.0.w3", "model.layers.30.block_sparse_moe.experts.1.w3", "model.layers.30.block_sparse_moe.experts.2.w3", "model.layers.30.block_sparse_moe.experts.3.w3", "model.layers.30.block_sparse_moe.experts.4.w3", "model.layers.30.block_sparse_moe.experts.5.w3", "model.layers.30.block_sparse_moe.experts.6.w3", "model.layers.30.block_sparse_moe.experts.7.w3", "model.layers.30.block_sparse_moe.experts.8.w3", "model.layers.30.block_sparse_moe.experts.9.w3", "model.layers.30.block_sparse_moe.experts.10.w3", "model.layers.30.block_sparse_moe.experts.11.w3", "model.layers.30.block_sparse_moe.experts.12.w3", "model.layers.30.block_sparse_moe.experts.13.w3", "model.layers.30.block_sparse_moe.experts.14.w3", "model.layers.30.block_sparse_moe.experts.15.w3", "model.layers.30.block_sparse_moe.experts.16.w3", "model.layers.30.block_sparse_moe.experts.17.w3", "model.layers.30.block_sparse_moe.experts.18.w3", "model.layers.30.block_sparse_moe.experts.19.w3", "model.layers.30.block_sparse_moe.experts.20.w3", "model.layers.30.block_sparse_moe.experts.21.w3", "model.layers.30.block_sparse_moe.experts.22.w3", "model.layers.30.block_sparse_moe.experts.23.w3", "model.layers.30.block_sparse_moe.experts.24.w3", "model.layers.30.block_sparse_moe.experts.25.w3", "model.layers.30.block_sparse_moe.experts.26.w3", "model.layers.30.block_sparse_moe.experts.27.w3", "model.layers.30.block_sparse_moe.experts.28.w3", "model.layers.30.block_sparse_moe.experts.29.w3", "model.layers.30.block_sparse_moe.experts.30.w3", "model.layers.30.block_sparse_moe.experts.31.w3", "model.layers.30.block_sparse_moe.experts.32.w3", "model.layers.30.block_sparse_moe.experts.33.w3", "model.layers.30.block_sparse_moe.experts.34.w3", "model.layers.30.block_sparse_moe.experts.35.w3", "model.layers.30.block_sparse_moe.experts.36.w3", "model.layers.30.block_sparse_moe.experts.37.w3", "model.layers.30.block_sparse_moe.experts.38.w3", "model.layers.30.block_sparse_moe.experts.39.w3", "model.layers.30.block_sparse_moe.experts.40.w3", "model.layers.30.block_sparse_moe.experts.41.w3", "model.layers.30.block_sparse_moe.experts.42.w3", "model.layers.30.block_sparse_moe.experts.43.w3", "model.layers.30.block_sparse_moe.experts.44.w3", "model.layers.30.block_sparse_moe.experts.45.w3", "model.layers.30.block_sparse_moe.experts.46.w3", "model.layers.30.block_sparse_moe.experts.47.w3", "model.layers.30.block_sparse_moe.experts.48.w3", "model.layers.30.block_sparse_moe.experts.49.w3", "model.layers.30.block_sparse_moe.experts.50.w3", "model.layers.30.block_sparse_moe.experts.51.w3", "model.layers.30.block_sparse_moe.experts.52.w3", "model.layers.30.block_sparse_moe.experts.53.w3", "model.layers.30.block_sparse_moe.experts.54.w3", "model.layers.30.block_sparse_moe.experts.55.w3", "model.layers.30.block_sparse_moe.experts.56.w3", "model.layers.30.block_sparse_moe.experts.57.w3", "model.layers.30.block_sparse_moe.experts.58.w3", "model.layers.30.block_sparse_moe.experts.59.w3", "model.layers.30.block_sparse_moe.experts.60.w3", "model.layers.30.block_sparse_moe.experts.61.w3", "model.layers.30.block_sparse_moe.experts.62.w3", "model.layers.30.block_sparse_moe.experts.63.w3", "model.layers.30.block_sparse_moe.experts.64.w3", "model.layers.30.block_sparse_moe.experts.65.w3", "model.layers.30.block_sparse_moe.experts.66.w3", "model.layers.30.block_sparse_moe.experts.67.w3", "model.layers.30.block_sparse_moe.experts.68.w3", "model.layers.30.block_sparse_moe.experts.69.w3", "model.layers.30.block_sparse_moe.experts.70.w3", "model.layers.30.block_sparse_moe.experts.71.w3", "model.layers.30.block_sparse_moe.experts.72.w3", "model.layers.30.block_sparse_moe.experts.73.w3", "model.layers.30.block_sparse_moe.experts.74.w3", "model.layers.30.block_sparse_moe.experts.75.w3", "model.layers.30.block_sparse_moe.experts.76.w3", "model.layers.30.block_sparse_moe.experts.77.w3", "model.layers.30.block_sparse_moe.experts.78.w3", "model.layers.30.block_sparse_moe.experts.79.w3", "model.layers.30.block_sparse_moe.experts.80.w3", "model.layers.30.block_sparse_moe.experts.81.w3", "model.layers.30.block_sparse_moe.experts.82.w3", "model.layers.30.block_sparse_moe.experts.83.w3", "model.layers.30.block_sparse_moe.experts.84.w3", "model.layers.30.block_sparse_moe.experts.85.w3", "model.layers.30.block_sparse_moe.experts.86.w3", "model.layers.30.block_sparse_moe.experts.87.w3", "model.layers.30.block_sparse_moe.experts.88.w3", "model.layers.30.block_sparse_moe.experts.89.w3", "model.layers.30.block_sparse_moe.experts.90.w3", "model.layers.30.block_sparse_moe.experts.91.w3", "model.layers.30.block_sparse_moe.experts.92.w3", "model.layers.30.block_sparse_moe.experts.93.w3", "model.layers.30.block_sparse_moe.experts.94.w3", "model.layers.30.block_sparse_moe.experts.95.w3", "model.layers.30.block_sparse_moe.experts.96.w3", "model.layers.30.block_sparse_moe.experts.97.w3", "model.layers.30.block_sparse_moe.experts.98.w3", "model.layers.30.block_sparse_moe.experts.99.w3", "model.layers.30.block_sparse_moe.experts.100.w3", "model.layers.30.block_sparse_moe.experts.101.w3", "model.layers.30.block_sparse_moe.experts.102.w3", "model.layers.30.block_sparse_moe.experts.103.w3", "model.layers.30.block_sparse_moe.experts.104.w3", "model.layers.30.block_sparse_moe.experts.105.w3", "model.layers.30.block_sparse_moe.experts.106.w3", "model.layers.30.block_sparse_moe.experts.107.w3", "model.layers.30.block_sparse_moe.experts.108.w3", "model.layers.30.block_sparse_moe.experts.109.w3", "model.layers.30.block_sparse_moe.experts.110.w3", "model.layers.30.block_sparse_moe.experts.111.w3", "model.layers.30.block_sparse_moe.experts.112.w3", "model.layers.30.block_sparse_moe.experts.113.w3", "model.layers.30.block_sparse_moe.experts.114.w3", "model.layers.30.block_sparse_moe.experts.115.w3", "model.layers.30.block_sparse_moe.experts.116.w3", "model.layers.30.block_sparse_moe.experts.117.w3", "model.layers.30.block_sparse_moe.experts.118.w3", "model.layers.30.block_sparse_moe.experts.119.w3", "model.layers.30.block_sparse_moe.experts.120.w3", "model.layers.30.block_sparse_moe.experts.121.w3", "model.layers.30.block_sparse_moe.experts.122.w3", "model.layers.30.block_sparse_moe.experts.123.w3", "model.layers.30.block_sparse_moe.experts.124.w3", "model.layers.30.block_sparse_moe.experts.125.w3", "model.layers.30.block_sparse_moe.experts.126.w3", "model.layers.30.block_sparse_moe.experts.127.w3", "model.layers.30.block_sparse_moe.experts.128.w3", "model.layers.30.block_sparse_moe.experts.129.w3", "model.layers.30.block_sparse_moe.experts.130.w3", "model.layers.30.block_sparse_moe.experts.131.w3", "model.layers.30.block_sparse_moe.experts.132.w3", "model.layers.30.block_sparse_moe.experts.133.w3", "model.layers.30.block_sparse_moe.experts.134.w3", "model.layers.30.block_sparse_moe.experts.135.w3", "model.layers.30.block_sparse_moe.experts.136.w3", "model.layers.30.block_sparse_moe.experts.137.w3", "model.layers.30.block_sparse_moe.experts.138.w3", "model.layers.30.block_sparse_moe.experts.139.w3", "model.layers.30.block_sparse_moe.experts.140.w3", "model.layers.30.block_sparse_moe.experts.141.w3", "model.layers.30.block_sparse_moe.experts.142.w3", "model.layers.30.block_sparse_moe.experts.143.w3", "model.layers.30.block_sparse_moe.experts.144.w3", "model.layers.30.block_sparse_moe.experts.145.w3", "model.layers.30.block_sparse_moe.experts.146.w3", "model.layers.30.block_sparse_moe.experts.147.w3", "model.layers.30.block_sparse_moe.experts.148.w3", "model.layers.30.block_sparse_moe.experts.149.w3", "model.layers.30.block_sparse_moe.experts.150.w3", "model.layers.30.block_sparse_moe.experts.151.w3", "model.layers.30.block_sparse_moe.experts.152.w3", "model.layers.30.block_sparse_moe.experts.153.w3", "model.layers.30.block_sparse_moe.experts.154.w3", "model.layers.30.block_sparse_moe.experts.155.w3", "model.layers.30.block_sparse_moe.experts.156.w3", "model.layers.30.block_sparse_moe.experts.157.w3", "model.layers.30.block_sparse_moe.experts.158.w3", "model.layers.30.block_sparse_moe.experts.159.w3", "model.layers.30.block_sparse_moe.experts.160.w3", "model.layers.30.block_sparse_moe.experts.161.w3", "model.layers.30.block_sparse_moe.experts.162.w3", "model.layers.30.block_sparse_moe.experts.163.w3", "model.layers.30.block_sparse_moe.experts.164.w3", "model.layers.30.block_sparse_moe.experts.165.w3", "model.layers.30.block_sparse_moe.experts.166.w3", "model.layers.30.block_sparse_moe.experts.167.w3", "model.layers.30.block_sparse_moe.experts.168.w3", "model.layers.30.block_sparse_moe.experts.169.w3", "model.layers.30.block_sparse_moe.experts.170.w3", "model.layers.30.block_sparse_moe.experts.171.w3", "model.layers.30.block_sparse_moe.experts.172.w3", "model.layers.30.block_sparse_moe.experts.173.w3", "model.layers.30.block_sparse_moe.experts.174.w3", "model.layers.30.block_sparse_moe.experts.175.w3", "model.layers.30.block_sparse_moe.experts.176.w3", "model.layers.30.block_sparse_moe.experts.177.w3", "model.layers.30.block_sparse_moe.experts.178.w3", "model.layers.30.block_sparse_moe.experts.179.w3", "model.layers.30.block_sparse_moe.experts.180.w3", "model.layers.30.block_sparse_moe.experts.181.w3", "model.layers.30.block_sparse_moe.experts.182.w3", "model.layers.30.block_sparse_moe.experts.183.w3", "model.layers.30.block_sparse_moe.experts.184.w3", "model.layers.30.block_sparse_moe.experts.185.w3", "model.layers.30.block_sparse_moe.experts.186.w3", "model.layers.30.block_sparse_moe.experts.187.w3", "model.layers.30.block_sparse_moe.experts.188.w3", "model.layers.30.block_sparse_moe.experts.189.w3", "model.layers.30.block_sparse_moe.experts.190.w3", "model.layers.30.block_sparse_moe.experts.191.w3", "model.layers.30.block_sparse_moe.experts.192.w3", "model.layers.30.block_sparse_moe.experts.193.w3", "model.layers.30.block_sparse_moe.experts.194.w3", "model.layers.30.block_sparse_moe.experts.195.w3", "model.layers.30.block_sparse_moe.experts.196.w3", "model.layers.30.block_sparse_moe.experts.197.w3", "model.layers.30.block_sparse_moe.experts.198.w3", "model.layers.30.block_sparse_moe.experts.199.w3", "model.layers.30.block_sparse_moe.experts.200.w3", "model.layers.30.block_sparse_moe.experts.201.w3", "model.layers.30.block_sparse_moe.experts.202.w3", "model.layers.30.block_sparse_moe.experts.203.w3", "model.layers.30.block_sparse_moe.experts.204.w3", "model.layers.30.block_sparse_moe.experts.205.w3", "model.layers.30.block_sparse_moe.experts.206.w3", "model.layers.30.block_sparse_moe.experts.207.w3", "model.layers.30.block_sparse_moe.experts.208.w3", "model.layers.30.block_sparse_moe.experts.209.w3", "model.layers.30.block_sparse_moe.experts.210.w3", "model.layers.30.block_sparse_moe.experts.211.w3", "model.layers.30.block_sparse_moe.experts.212.w3", "model.layers.30.block_sparse_moe.experts.213.w3", "model.layers.30.block_sparse_moe.experts.214.w3", "model.layers.30.block_sparse_moe.experts.215.w3", "model.layers.30.block_sparse_moe.experts.216.w3", "model.layers.30.block_sparse_moe.experts.217.w3", "model.layers.30.block_sparse_moe.experts.218.w3", "model.layers.30.block_sparse_moe.experts.219.w3", "model.layers.30.block_sparse_moe.experts.220.w3", "model.layers.30.block_sparse_moe.experts.221.w3", "model.layers.30.block_sparse_moe.experts.222.w3", "model.layers.30.block_sparse_moe.experts.223.w3", "model.layers.30.block_sparse_moe.experts.224.w3", "model.layers.30.block_sparse_moe.experts.225.w3", "model.layers.30.block_sparse_moe.experts.226.w3", "model.layers.30.block_sparse_moe.experts.227.w3", "model.layers.30.block_sparse_moe.experts.228.w3", "model.layers.30.block_sparse_moe.experts.229.w3", "model.layers.30.block_sparse_moe.experts.230.w3", "model.layers.30.block_sparse_moe.experts.231.w3", "model.layers.30.block_sparse_moe.experts.232.w3", "model.layers.30.block_sparse_moe.experts.233.w3", "model.layers.30.block_sparse_moe.experts.234.w3", "model.layers.30.block_sparse_moe.experts.235.w3", "model.layers.30.block_sparse_moe.experts.236.w3", "model.layers.30.block_sparse_moe.experts.237.w3", "model.layers.30.block_sparse_moe.experts.238.w3", "model.layers.30.block_sparse_moe.experts.239.w3", "model.layers.30.block_sparse_moe.experts.240.w3", "model.layers.30.block_sparse_moe.experts.241.w3", "model.layers.30.block_sparse_moe.experts.242.w3", "model.layers.30.block_sparse_moe.experts.243.w3", "model.layers.30.block_sparse_moe.experts.244.w3", "model.layers.30.block_sparse_moe.experts.245.w3", "model.layers.30.block_sparse_moe.experts.246.w3", "model.layers.30.block_sparse_moe.experts.247.w3", "model.layers.30.block_sparse_moe.experts.248.w3", "model.layers.30.block_sparse_moe.experts.249.w3", "model.layers.30.block_sparse_moe.experts.250.w3", "model.layers.30.block_sparse_moe.experts.251.w3", "model.layers.30.block_sparse_moe.experts.252.w3", "model.layers.30.block_sparse_moe.experts.253.w3", "model.layers.30.block_sparse_moe.experts.254.w3", "model.layers.30.block_sparse_moe.experts.255.w3", "model.layers.30.block_sparse_moe.experts.0.w2", "model.layers.30.block_sparse_moe.experts.1.w2", "model.layers.30.block_sparse_moe.experts.2.w2", "model.layers.30.block_sparse_moe.experts.3.w2", "model.layers.30.block_sparse_moe.experts.4.w2", "model.layers.30.block_sparse_moe.experts.5.w2", "model.layers.30.block_sparse_moe.experts.6.w2", "model.layers.30.block_sparse_moe.experts.7.w2", "model.layers.30.block_sparse_moe.experts.8.w2", "model.layers.30.block_sparse_moe.experts.9.w2", "model.layers.30.block_sparse_moe.experts.10.w2", "model.layers.30.block_sparse_moe.experts.11.w2", "model.layers.30.block_sparse_moe.experts.12.w2", "model.layers.30.block_sparse_moe.experts.13.w2", "model.layers.30.block_sparse_moe.experts.14.w2", "model.layers.30.block_sparse_moe.experts.15.w2", "model.layers.30.block_sparse_moe.experts.16.w2", "model.layers.30.block_sparse_moe.experts.17.w2", "model.layers.30.block_sparse_moe.experts.18.w2", "model.layers.30.block_sparse_moe.experts.19.w2", "model.layers.30.block_sparse_moe.experts.20.w2", "model.layers.30.block_sparse_moe.experts.21.w2", "model.layers.30.block_sparse_moe.experts.22.w2", "model.layers.30.block_sparse_moe.experts.23.w2", "model.layers.30.block_sparse_moe.experts.24.w2", "model.layers.30.block_sparse_moe.experts.25.w2", "model.layers.30.block_sparse_moe.experts.26.w2", "model.layers.30.block_sparse_moe.experts.27.w2", "model.layers.30.block_sparse_moe.experts.28.w2", "model.layers.30.block_sparse_moe.experts.29.w2", "model.layers.30.block_sparse_moe.experts.30.w2", "model.layers.30.block_sparse_moe.experts.31.w2", "model.layers.30.block_sparse_moe.experts.32.w2", "model.layers.30.block_sparse_moe.experts.33.w2", "model.layers.30.block_sparse_moe.experts.34.w2", "model.layers.30.block_sparse_moe.experts.35.w2", "model.layers.30.block_sparse_moe.experts.36.w2", "model.layers.30.block_sparse_moe.experts.37.w2", "model.layers.30.block_sparse_moe.experts.38.w2", "model.layers.30.block_sparse_moe.experts.39.w2", "model.layers.30.block_sparse_moe.experts.40.w2", "model.layers.30.block_sparse_moe.experts.41.w2", "model.layers.30.block_sparse_moe.experts.42.w2", "model.layers.30.block_sparse_moe.experts.43.w2", "model.layers.30.block_sparse_moe.experts.44.w2", "model.layers.30.block_sparse_moe.experts.45.w2", "model.layers.30.block_sparse_moe.experts.46.w2", "model.layers.30.block_sparse_moe.experts.47.w2", "model.layers.30.block_sparse_moe.experts.48.w2", "model.layers.30.block_sparse_moe.experts.49.w2", "model.layers.30.block_sparse_moe.experts.50.w2", "model.layers.30.block_sparse_moe.experts.51.w2", "model.layers.30.block_sparse_moe.experts.52.w2", "model.layers.30.block_sparse_moe.experts.53.w2", "model.layers.30.block_sparse_moe.experts.54.w2", "model.layers.30.block_sparse_moe.experts.55.w2", "model.layers.30.block_sparse_moe.experts.56.w2", "model.layers.30.block_sparse_moe.experts.57.w2", "model.layers.30.block_sparse_moe.experts.58.w2", "model.layers.30.block_sparse_moe.experts.59.w2", "model.layers.30.block_sparse_moe.experts.60.w2", "model.layers.30.block_sparse_moe.experts.61.w2", "model.layers.30.block_sparse_moe.experts.62.w2", "model.layers.30.block_sparse_moe.experts.63.w2", "model.layers.30.block_sparse_moe.experts.64.w2", "model.layers.30.block_sparse_moe.experts.65.w2", "model.layers.30.block_sparse_moe.experts.66.w2", "model.layers.30.block_sparse_moe.experts.67.w2", "model.layers.30.block_sparse_moe.experts.68.w2", "model.layers.30.block_sparse_moe.experts.69.w2", "model.layers.30.block_sparse_moe.experts.70.w2", "model.layers.30.block_sparse_moe.experts.71.w2", "model.layers.30.block_sparse_moe.experts.72.w2", "model.layers.30.block_sparse_moe.experts.73.w2", "model.layers.30.block_sparse_moe.experts.74.w2", "model.layers.30.block_sparse_moe.experts.75.w2", "model.layers.30.block_sparse_moe.experts.76.w2", "model.layers.30.block_sparse_moe.experts.77.w2", "model.layers.30.block_sparse_moe.experts.78.w2", "model.layers.30.block_sparse_moe.experts.79.w2", "model.layers.30.block_sparse_moe.experts.80.w2", "model.layers.30.block_sparse_moe.experts.81.w2", "model.layers.30.block_sparse_moe.experts.82.w2", "model.layers.30.block_sparse_moe.experts.83.w2", "model.layers.30.block_sparse_moe.experts.84.w2", "model.layers.30.block_sparse_moe.experts.85.w2", "model.layers.30.block_sparse_moe.experts.86.w2", "model.layers.30.block_sparse_moe.experts.87.w2", "model.layers.30.block_sparse_moe.experts.88.w2", "model.layers.30.block_sparse_moe.experts.89.w2", "model.layers.30.block_sparse_moe.experts.90.w2", "model.layers.30.block_sparse_moe.experts.91.w2", "model.layers.30.block_sparse_moe.experts.92.w2", "model.layers.30.block_sparse_moe.experts.93.w2", "model.layers.30.block_sparse_moe.experts.94.w2", "model.layers.30.block_sparse_moe.experts.95.w2", "model.layers.30.block_sparse_moe.experts.96.w2", "model.layers.30.block_sparse_moe.experts.97.w2", "model.layers.30.block_sparse_moe.experts.98.w2", "model.layers.30.block_sparse_moe.experts.99.w2", "model.layers.30.block_sparse_moe.experts.100.w2", "model.layers.30.block_sparse_moe.experts.101.w2", "model.layers.30.block_sparse_moe.experts.102.w2", "model.layers.30.block_sparse_moe.experts.103.w2", "model.layers.30.block_sparse_moe.experts.104.w2", "model.layers.30.block_sparse_moe.experts.105.w2", "model.layers.30.block_sparse_moe.experts.106.w2", "model.layers.30.block_sparse_moe.experts.107.w2", "model.layers.30.block_sparse_moe.experts.108.w2", "model.layers.30.block_sparse_moe.experts.109.w2", "model.layers.30.block_sparse_moe.experts.110.w2", "model.layers.30.block_sparse_moe.experts.111.w2", "model.layers.30.block_sparse_moe.experts.112.w2", "model.layers.30.block_sparse_moe.experts.113.w2", "model.layers.30.block_sparse_moe.experts.114.w2", "model.layers.30.block_sparse_moe.experts.115.w2", "model.layers.30.block_sparse_moe.experts.116.w2", "model.layers.30.block_sparse_moe.experts.117.w2", "model.layers.30.block_sparse_moe.experts.118.w2", "model.layers.30.block_sparse_moe.experts.119.w2", "model.layers.30.block_sparse_moe.experts.120.w2", "model.layers.30.block_sparse_moe.experts.121.w2", "model.layers.30.block_sparse_moe.experts.122.w2", "model.layers.30.block_sparse_moe.experts.123.w2", "model.layers.30.block_sparse_moe.experts.124.w2", "model.layers.30.block_sparse_moe.experts.125.w2", "model.layers.30.block_sparse_moe.experts.126.w2", "model.layers.30.block_sparse_moe.experts.127.w2", "model.layers.30.block_sparse_moe.experts.128.w2", "model.layers.30.block_sparse_moe.experts.129.w2", "model.layers.30.block_sparse_moe.experts.130.w2", "model.layers.30.block_sparse_moe.experts.131.w2", "model.layers.30.block_sparse_moe.experts.132.w2", "model.layers.30.block_sparse_moe.experts.133.w2", "model.layers.30.block_sparse_moe.experts.134.w2", "model.layers.30.block_sparse_moe.experts.135.w2", "model.layers.30.block_sparse_moe.experts.136.w2", "model.layers.30.block_sparse_moe.experts.137.w2", "model.layers.30.block_sparse_moe.experts.138.w2", "model.layers.30.block_sparse_moe.experts.139.w2", "model.layers.30.block_sparse_moe.experts.140.w2", "model.layers.30.block_sparse_moe.experts.141.w2", "model.layers.30.block_sparse_moe.experts.142.w2", "model.layers.30.block_sparse_moe.experts.143.w2", "model.layers.30.block_sparse_moe.experts.144.w2", "model.layers.30.block_sparse_moe.experts.145.w2", "model.layers.30.block_sparse_moe.experts.146.w2", "model.layers.30.block_sparse_moe.experts.147.w2", "model.layers.30.block_sparse_moe.experts.148.w2", "model.layers.30.block_sparse_moe.experts.149.w2", "model.layers.30.block_sparse_moe.experts.150.w2", "model.layers.30.block_sparse_moe.experts.151.w2", "model.layers.30.block_sparse_moe.experts.152.w2", "model.layers.30.block_sparse_moe.experts.153.w2", "model.layers.30.block_sparse_moe.experts.154.w2", "model.layers.30.block_sparse_moe.experts.155.w2", "model.layers.30.block_sparse_moe.experts.156.w2", "model.layers.30.block_sparse_moe.experts.157.w2", "model.layers.30.block_sparse_moe.experts.158.w2", "model.layers.30.block_sparse_moe.experts.159.w2", "model.layers.30.block_sparse_moe.experts.160.w2", "model.layers.30.block_sparse_moe.experts.161.w2", "model.layers.30.block_sparse_moe.experts.162.w2", "model.layers.30.block_sparse_moe.experts.163.w2", "model.layers.30.block_sparse_moe.experts.164.w2", "model.layers.30.block_sparse_moe.experts.165.w2", "model.layers.30.block_sparse_moe.experts.166.w2", "model.layers.30.block_sparse_moe.experts.167.w2", "model.layers.30.block_sparse_moe.experts.168.w2", "model.layers.30.block_sparse_moe.experts.169.w2", "model.layers.30.block_sparse_moe.experts.170.w2", "model.layers.30.block_sparse_moe.experts.171.w2", "model.layers.30.block_sparse_moe.experts.172.w2", "model.layers.30.block_sparse_moe.experts.173.w2", "model.layers.30.block_sparse_moe.experts.174.w2", "model.layers.30.block_sparse_moe.experts.175.w2", "model.layers.30.block_sparse_moe.experts.176.w2", "model.layers.30.block_sparse_moe.experts.177.w2", "model.layers.30.block_sparse_moe.experts.178.w2", "model.layers.30.block_sparse_moe.experts.179.w2", "model.layers.30.block_sparse_moe.experts.180.w2", "model.layers.30.block_sparse_moe.experts.181.w2", "model.layers.30.block_sparse_moe.experts.182.w2", "model.layers.30.block_sparse_moe.experts.183.w2", "model.layers.30.block_sparse_moe.experts.184.w2", "model.layers.30.block_sparse_moe.experts.185.w2", "model.layers.30.block_sparse_moe.experts.186.w2", "model.layers.30.block_sparse_moe.experts.187.w2", "model.layers.30.block_sparse_moe.experts.188.w2", "model.layers.30.block_sparse_moe.experts.189.w2", "model.layers.30.block_sparse_moe.experts.190.w2", "model.layers.30.block_sparse_moe.experts.191.w2", "model.layers.30.block_sparse_moe.experts.192.w2", "model.layers.30.block_sparse_moe.experts.193.w2", "model.layers.30.block_sparse_moe.experts.194.w2", "model.layers.30.block_sparse_moe.experts.195.w2", "model.layers.30.block_sparse_moe.experts.196.w2", "model.layers.30.block_sparse_moe.experts.197.w2", "model.layers.30.block_sparse_moe.experts.198.w2", "model.layers.30.block_sparse_moe.experts.199.w2", "model.layers.30.block_sparse_moe.experts.200.w2", "model.layers.30.block_sparse_moe.experts.201.w2", "model.layers.30.block_sparse_moe.experts.202.w2", "model.layers.30.block_sparse_moe.experts.203.w2", "model.layers.30.block_sparse_moe.experts.204.w2", "model.layers.30.block_sparse_moe.experts.205.w2", "model.layers.30.block_sparse_moe.experts.206.w2", "model.layers.30.block_sparse_moe.experts.207.w2", "model.layers.30.block_sparse_moe.experts.208.w2", "model.layers.30.block_sparse_moe.experts.209.w2", "model.layers.30.block_sparse_moe.experts.210.w2", "model.layers.30.block_sparse_moe.experts.211.w2", "model.layers.30.block_sparse_moe.experts.212.w2", "model.layers.30.block_sparse_moe.experts.213.w2", "model.layers.30.block_sparse_moe.experts.214.w2", "model.layers.30.block_sparse_moe.experts.215.w2", "model.layers.30.block_sparse_moe.experts.216.w2", "model.layers.30.block_sparse_moe.experts.217.w2", "model.layers.30.block_sparse_moe.experts.218.w2", "model.layers.30.block_sparse_moe.experts.219.w2", "model.layers.30.block_sparse_moe.experts.220.w2", "model.layers.30.block_sparse_moe.experts.221.w2", "model.layers.30.block_sparse_moe.experts.222.w2", "model.layers.30.block_sparse_moe.experts.223.w2", "model.layers.30.block_sparse_moe.experts.224.w2", "model.layers.30.block_sparse_moe.experts.225.w2", "model.layers.30.block_sparse_moe.experts.226.w2", "model.layers.30.block_sparse_moe.experts.227.w2", "model.layers.30.block_sparse_moe.experts.228.w2", "model.layers.30.block_sparse_moe.experts.229.w2", "model.layers.30.block_sparse_moe.experts.230.w2", "model.layers.30.block_sparse_moe.experts.231.w2", "model.layers.30.block_sparse_moe.experts.232.w2", "model.layers.30.block_sparse_moe.experts.233.w2", "model.layers.30.block_sparse_moe.experts.234.w2", "model.layers.30.block_sparse_moe.experts.235.w2", "model.layers.30.block_sparse_moe.experts.236.w2", "model.layers.30.block_sparse_moe.experts.237.w2", "model.layers.30.block_sparse_moe.experts.238.w2", "model.layers.30.block_sparse_moe.experts.239.w2", "model.layers.30.block_sparse_moe.experts.240.w2", "model.layers.30.block_sparse_moe.experts.241.w2", "model.layers.30.block_sparse_moe.experts.242.w2", "model.layers.30.block_sparse_moe.experts.243.w2", "model.layers.30.block_sparse_moe.experts.244.w2", "model.layers.30.block_sparse_moe.experts.245.w2", "model.layers.30.block_sparse_moe.experts.246.w2", "model.layers.30.block_sparse_moe.experts.247.w2", "model.layers.30.block_sparse_moe.experts.248.w2", "model.layers.30.block_sparse_moe.experts.249.w2", "model.layers.30.block_sparse_moe.experts.250.w2", "model.layers.30.block_sparse_moe.experts.251.w2", "model.layers.30.block_sparse_moe.experts.252.w2", "model.layers.30.block_sparse_moe.experts.253.w2", "model.layers.30.block_sparse_moe.experts.254.w2", "model.layers.30.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0009779151529074048, "dbits": 3623878656 } ] }, { "idx": 62, "layers": [ "model.layers.31.self_attn.q_proj", "model.layers.31.self_attn.k_proj", "model.layers.31.self_attn.v_proj", "model.layers.31.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0013589147478341723, "dbits": 44040192 } ] }, { "idx": 63, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w1", "model.layers.31.block_sparse_moe.experts.1.w1", "model.layers.31.block_sparse_moe.experts.2.w1", "model.layers.31.block_sparse_moe.experts.3.w1", "model.layers.31.block_sparse_moe.experts.4.w1", "model.layers.31.block_sparse_moe.experts.5.w1", "model.layers.31.block_sparse_moe.experts.6.w1", "model.layers.31.block_sparse_moe.experts.7.w1", "model.layers.31.block_sparse_moe.experts.8.w1", "model.layers.31.block_sparse_moe.experts.9.w1", "model.layers.31.block_sparse_moe.experts.10.w1", "model.layers.31.block_sparse_moe.experts.11.w1", "model.layers.31.block_sparse_moe.experts.12.w1", "model.layers.31.block_sparse_moe.experts.13.w1", "model.layers.31.block_sparse_moe.experts.14.w1", "model.layers.31.block_sparse_moe.experts.15.w1", "model.layers.31.block_sparse_moe.experts.16.w1", "model.layers.31.block_sparse_moe.experts.17.w1", "model.layers.31.block_sparse_moe.experts.18.w1", "model.layers.31.block_sparse_moe.experts.19.w1", "model.layers.31.block_sparse_moe.experts.20.w1", "model.layers.31.block_sparse_moe.experts.21.w1", "model.layers.31.block_sparse_moe.experts.22.w1", "model.layers.31.block_sparse_moe.experts.23.w1", "model.layers.31.block_sparse_moe.experts.24.w1", "model.layers.31.block_sparse_moe.experts.25.w1", "model.layers.31.block_sparse_moe.experts.26.w1", "model.layers.31.block_sparse_moe.experts.27.w1", "model.layers.31.block_sparse_moe.experts.28.w1", "model.layers.31.block_sparse_moe.experts.29.w1", "model.layers.31.block_sparse_moe.experts.30.w1", "model.layers.31.block_sparse_moe.experts.31.w1", "model.layers.31.block_sparse_moe.experts.32.w1", "model.layers.31.block_sparse_moe.experts.33.w1", "model.layers.31.block_sparse_moe.experts.34.w1", "model.layers.31.block_sparse_moe.experts.35.w1", "model.layers.31.block_sparse_moe.experts.36.w1", "model.layers.31.block_sparse_moe.experts.37.w1", "model.layers.31.block_sparse_moe.experts.38.w1", "model.layers.31.block_sparse_moe.experts.39.w1", "model.layers.31.block_sparse_moe.experts.40.w1", "model.layers.31.block_sparse_moe.experts.41.w1", "model.layers.31.block_sparse_moe.experts.42.w1", "model.layers.31.block_sparse_moe.experts.43.w1", "model.layers.31.block_sparse_moe.experts.44.w1", "model.layers.31.block_sparse_moe.experts.45.w1", "model.layers.31.block_sparse_moe.experts.46.w1", "model.layers.31.block_sparse_moe.experts.47.w1", "model.layers.31.block_sparse_moe.experts.48.w1", "model.layers.31.block_sparse_moe.experts.49.w1", "model.layers.31.block_sparse_moe.experts.50.w1", "model.layers.31.block_sparse_moe.experts.51.w1", "model.layers.31.block_sparse_moe.experts.52.w1", "model.layers.31.block_sparse_moe.experts.53.w1", "model.layers.31.block_sparse_moe.experts.54.w1", "model.layers.31.block_sparse_moe.experts.55.w1", "model.layers.31.block_sparse_moe.experts.56.w1", "model.layers.31.block_sparse_moe.experts.57.w1", "model.layers.31.block_sparse_moe.experts.58.w1", "model.layers.31.block_sparse_moe.experts.59.w1", "model.layers.31.block_sparse_moe.experts.60.w1", "model.layers.31.block_sparse_moe.experts.61.w1", "model.layers.31.block_sparse_moe.experts.62.w1", "model.layers.31.block_sparse_moe.experts.63.w1", "model.layers.31.block_sparse_moe.experts.64.w1", "model.layers.31.block_sparse_moe.experts.65.w1", "model.layers.31.block_sparse_moe.experts.66.w1", "model.layers.31.block_sparse_moe.experts.67.w1", "model.layers.31.block_sparse_moe.experts.68.w1", "model.layers.31.block_sparse_moe.experts.69.w1", "model.layers.31.block_sparse_moe.experts.70.w1", "model.layers.31.block_sparse_moe.experts.71.w1", "model.layers.31.block_sparse_moe.experts.72.w1", "model.layers.31.block_sparse_moe.experts.73.w1", "model.layers.31.block_sparse_moe.experts.74.w1", "model.layers.31.block_sparse_moe.experts.75.w1", "model.layers.31.block_sparse_moe.experts.76.w1", "model.layers.31.block_sparse_moe.experts.77.w1", "model.layers.31.block_sparse_moe.experts.78.w1", "model.layers.31.block_sparse_moe.experts.79.w1", "model.layers.31.block_sparse_moe.experts.80.w1", "model.layers.31.block_sparse_moe.experts.81.w1", "model.layers.31.block_sparse_moe.experts.82.w1", "model.layers.31.block_sparse_moe.experts.83.w1", "model.layers.31.block_sparse_moe.experts.84.w1", "model.layers.31.block_sparse_moe.experts.85.w1", "model.layers.31.block_sparse_moe.experts.86.w1", "model.layers.31.block_sparse_moe.experts.87.w1", "model.layers.31.block_sparse_moe.experts.88.w1", "model.layers.31.block_sparse_moe.experts.89.w1", "model.layers.31.block_sparse_moe.experts.90.w1", "model.layers.31.block_sparse_moe.experts.91.w1", "model.layers.31.block_sparse_moe.experts.92.w1", "model.layers.31.block_sparse_moe.experts.93.w1", "model.layers.31.block_sparse_moe.experts.94.w1", "model.layers.31.block_sparse_moe.experts.95.w1", "model.layers.31.block_sparse_moe.experts.96.w1", "model.layers.31.block_sparse_moe.experts.97.w1", "model.layers.31.block_sparse_moe.experts.98.w1", "model.layers.31.block_sparse_moe.experts.99.w1", "model.layers.31.block_sparse_moe.experts.100.w1", "model.layers.31.block_sparse_moe.experts.101.w1", "model.layers.31.block_sparse_moe.experts.102.w1", "model.layers.31.block_sparse_moe.experts.103.w1", "model.layers.31.block_sparse_moe.experts.104.w1", "model.layers.31.block_sparse_moe.experts.105.w1", "model.layers.31.block_sparse_moe.experts.106.w1", "model.layers.31.block_sparse_moe.experts.107.w1", "model.layers.31.block_sparse_moe.experts.108.w1", "model.layers.31.block_sparse_moe.experts.109.w1", "model.layers.31.block_sparse_moe.experts.110.w1", "model.layers.31.block_sparse_moe.experts.111.w1", "model.layers.31.block_sparse_moe.experts.112.w1", "model.layers.31.block_sparse_moe.experts.113.w1", "model.layers.31.block_sparse_moe.experts.114.w1", "model.layers.31.block_sparse_moe.experts.115.w1", "model.layers.31.block_sparse_moe.experts.116.w1", "model.layers.31.block_sparse_moe.experts.117.w1", "model.layers.31.block_sparse_moe.experts.118.w1", "model.layers.31.block_sparse_moe.experts.119.w1", "model.layers.31.block_sparse_moe.experts.120.w1", "model.layers.31.block_sparse_moe.experts.121.w1", "model.layers.31.block_sparse_moe.experts.122.w1", "model.layers.31.block_sparse_moe.experts.123.w1", "model.layers.31.block_sparse_moe.experts.124.w1", "model.layers.31.block_sparse_moe.experts.125.w1", "model.layers.31.block_sparse_moe.experts.126.w1", "model.layers.31.block_sparse_moe.experts.127.w1", "model.layers.31.block_sparse_moe.experts.128.w1", "model.layers.31.block_sparse_moe.experts.129.w1", "model.layers.31.block_sparse_moe.experts.130.w1", "model.layers.31.block_sparse_moe.experts.131.w1", "model.layers.31.block_sparse_moe.experts.132.w1", "model.layers.31.block_sparse_moe.experts.133.w1", "model.layers.31.block_sparse_moe.experts.134.w1", "model.layers.31.block_sparse_moe.experts.135.w1", "model.layers.31.block_sparse_moe.experts.136.w1", "model.layers.31.block_sparse_moe.experts.137.w1", "model.layers.31.block_sparse_moe.experts.138.w1", "model.layers.31.block_sparse_moe.experts.139.w1", "model.layers.31.block_sparse_moe.experts.140.w1", "model.layers.31.block_sparse_moe.experts.141.w1", "model.layers.31.block_sparse_moe.experts.142.w1", "model.layers.31.block_sparse_moe.experts.143.w1", "model.layers.31.block_sparse_moe.experts.144.w1", "model.layers.31.block_sparse_moe.experts.145.w1", "model.layers.31.block_sparse_moe.experts.146.w1", "model.layers.31.block_sparse_moe.experts.147.w1", "model.layers.31.block_sparse_moe.experts.148.w1", "model.layers.31.block_sparse_moe.experts.149.w1", "model.layers.31.block_sparse_moe.experts.150.w1", "model.layers.31.block_sparse_moe.experts.151.w1", "model.layers.31.block_sparse_moe.experts.152.w1", "model.layers.31.block_sparse_moe.experts.153.w1", "model.layers.31.block_sparse_moe.experts.154.w1", "model.layers.31.block_sparse_moe.experts.155.w1", "model.layers.31.block_sparse_moe.experts.156.w1", "model.layers.31.block_sparse_moe.experts.157.w1", "model.layers.31.block_sparse_moe.experts.158.w1", "model.layers.31.block_sparse_moe.experts.159.w1", "model.layers.31.block_sparse_moe.experts.160.w1", "model.layers.31.block_sparse_moe.experts.161.w1", "model.layers.31.block_sparse_moe.experts.162.w1", "model.layers.31.block_sparse_moe.experts.163.w1", "model.layers.31.block_sparse_moe.experts.164.w1", "model.layers.31.block_sparse_moe.experts.165.w1", "model.layers.31.block_sparse_moe.experts.166.w1", "model.layers.31.block_sparse_moe.experts.167.w1", "model.layers.31.block_sparse_moe.experts.168.w1", "model.layers.31.block_sparse_moe.experts.169.w1", "model.layers.31.block_sparse_moe.experts.170.w1", "model.layers.31.block_sparse_moe.experts.171.w1", "model.layers.31.block_sparse_moe.experts.172.w1", "model.layers.31.block_sparse_moe.experts.173.w1", "model.layers.31.block_sparse_moe.experts.174.w1", "model.layers.31.block_sparse_moe.experts.175.w1", "model.layers.31.block_sparse_moe.experts.176.w1", "model.layers.31.block_sparse_moe.experts.177.w1", "model.layers.31.block_sparse_moe.experts.178.w1", "model.layers.31.block_sparse_moe.experts.179.w1", "model.layers.31.block_sparse_moe.experts.180.w1", "model.layers.31.block_sparse_moe.experts.181.w1", "model.layers.31.block_sparse_moe.experts.182.w1", "model.layers.31.block_sparse_moe.experts.183.w1", "model.layers.31.block_sparse_moe.experts.184.w1", "model.layers.31.block_sparse_moe.experts.185.w1", "model.layers.31.block_sparse_moe.experts.186.w1", "model.layers.31.block_sparse_moe.experts.187.w1", "model.layers.31.block_sparse_moe.experts.188.w1", "model.layers.31.block_sparse_moe.experts.189.w1", "model.layers.31.block_sparse_moe.experts.190.w1", "model.layers.31.block_sparse_moe.experts.191.w1", "model.layers.31.block_sparse_moe.experts.192.w1", "model.layers.31.block_sparse_moe.experts.193.w1", "model.layers.31.block_sparse_moe.experts.194.w1", "model.layers.31.block_sparse_moe.experts.195.w1", "model.layers.31.block_sparse_moe.experts.196.w1", "model.layers.31.block_sparse_moe.experts.197.w1", "model.layers.31.block_sparse_moe.experts.198.w1", "model.layers.31.block_sparse_moe.experts.199.w1", "model.layers.31.block_sparse_moe.experts.200.w1", "model.layers.31.block_sparse_moe.experts.201.w1", "model.layers.31.block_sparse_moe.experts.202.w1", "model.layers.31.block_sparse_moe.experts.203.w1", "model.layers.31.block_sparse_moe.experts.204.w1", "model.layers.31.block_sparse_moe.experts.205.w1", "model.layers.31.block_sparse_moe.experts.206.w1", "model.layers.31.block_sparse_moe.experts.207.w1", "model.layers.31.block_sparse_moe.experts.208.w1", "model.layers.31.block_sparse_moe.experts.209.w1", "model.layers.31.block_sparse_moe.experts.210.w1", "model.layers.31.block_sparse_moe.experts.211.w1", "model.layers.31.block_sparse_moe.experts.212.w1", "model.layers.31.block_sparse_moe.experts.213.w1", "model.layers.31.block_sparse_moe.experts.214.w1", "model.layers.31.block_sparse_moe.experts.215.w1", "model.layers.31.block_sparse_moe.experts.216.w1", "model.layers.31.block_sparse_moe.experts.217.w1", "model.layers.31.block_sparse_moe.experts.218.w1", "model.layers.31.block_sparse_moe.experts.219.w1", "model.layers.31.block_sparse_moe.experts.220.w1", "model.layers.31.block_sparse_moe.experts.221.w1", "model.layers.31.block_sparse_moe.experts.222.w1", "model.layers.31.block_sparse_moe.experts.223.w1", "model.layers.31.block_sparse_moe.experts.224.w1", "model.layers.31.block_sparse_moe.experts.225.w1", "model.layers.31.block_sparse_moe.experts.226.w1", "model.layers.31.block_sparse_moe.experts.227.w1", "model.layers.31.block_sparse_moe.experts.228.w1", "model.layers.31.block_sparse_moe.experts.229.w1", "model.layers.31.block_sparse_moe.experts.230.w1", "model.layers.31.block_sparse_moe.experts.231.w1", "model.layers.31.block_sparse_moe.experts.232.w1", "model.layers.31.block_sparse_moe.experts.233.w1", "model.layers.31.block_sparse_moe.experts.234.w1", "model.layers.31.block_sparse_moe.experts.235.w1", "model.layers.31.block_sparse_moe.experts.236.w1", "model.layers.31.block_sparse_moe.experts.237.w1", "model.layers.31.block_sparse_moe.experts.238.w1", "model.layers.31.block_sparse_moe.experts.239.w1", "model.layers.31.block_sparse_moe.experts.240.w1", "model.layers.31.block_sparse_moe.experts.241.w1", "model.layers.31.block_sparse_moe.experts.242.w1", "model.layers.31.block_sparse_moe.experts.243.w1", "model.layers.31.block_sparse_moe.experts.244.w1", "model.layers.31.block_sparse_moe.experts.245.w1", "model.layers.31.block_sparse_moe.experts.246.w1", "model.layers.31.block_sparse_moe.experts.247.w1", "model.layers.31.block_sparse_moe.experts.248.w1", "model.layers.31.block_sparse_moe.experts.249.w1", "model.layers.31.block_sparse_moe.experts.250.w1", "model.layers.31.block_sparse_moe.experts.251.w1", "model.layers.31.block_sparse_moe.experts.252.w1", "model.layers.31.block_sparse_moe.experts.253.w1", "model.layers.31.block_sparse_moe.experts.254.w1", "model.layers.31.block_sparse_moe.experts.255.w1", "model.layers.31.block_sparse_moe.experts.0.w3", "model.layers.31.block_sparse_moe.experts.1.w3", "model.layers.31.block_sparse_moe.experts.2.w3", "model.layers.31.block_sparse_moe.experts.3.w3", "model.layers.31.block_sparse_moe.experts.4.w3", "model.layers.31.block_sparse_moe.experts.5.w3", "model.layers.31.block_sparse_moe.experts.6.w3", "model.layers.31.block_sparse_moe.experts.7.w3", "model.layers.31.block_sparse_moe.experts.8.w3", "model.layers.31.block_sparse_moe.experts.9.w3", "model.layers.31.block_sparse_moe.experts.10.w3", "model.layers.31.block_sparse_moe.experts.11.w3", "model.layers.31.block_sparse_moe.experts.12.w3", "model.layers.31.block_sparse_moe.experts.13.w3", "model.layers.31.block_sparse_moe.experts.14.w3", "model.layers.31.block_sparse_moe.experts.15.w3", "model.layers.31.block_sparse_moe.experts.16.w3", "model.layers.31.block_sparse_moe.experts.17.w3", "model.layers.31.block_sparse_moe.experts.18.w3", "model.layers.31.block_sparse_moe.experts.19.w3", "model.layers.31.block_sparse_moe.experts.20.w3", "model.layers.31.block_sparse_moe.experts.21.w3", "model.layers.31.block_sparse_moe.experts.22.w3", "model.layers.31.block_sparse_moe.experts.23.w3", "model.layers.31.block_sparse_moe.experts.24.w3", "model.layers.31.block_sparse_moe.experts.25.w3", "model.layers.31.block_sparse_moe.experts.26.w3", "model.layers.31.block_sparse_moe.experts.27.w3", "model.layers.31.block_sparse_moe.experts.28.w3", "model.layers.31.block_sparse_moe.experts.29.w3", "model.layers.31.block_sparse_moe.experts.30.w3", "model.layers.31.block_sparse_moe.experts.31.w3", "model.layers.31.block_sparse_moe.experts.32.w3", "model.layers.31.block_sparse_moe.experts.33.w3", "model.layers.31.block_sparse_moe.experts.34.w3", "model.layers.31.block_sparse_moe.experts.35.w3", "model.layers.31.block_sparse_moe.experts.36.w3", "model.layers.31.block_sparse_moe.experts.37.w3", "model.layers.31.block_sparse_moe.experts.38.w3", "model.layers.31.block_sparse_moe.experts.39.w3", "model.layers.31.block_sparse_moe.experts.40.w3", "model.layers.31.block_sparse_moe.experts.41.w3", "model.layers.31.block_sparse_moe.experts.42.w3", "model.layers.31.block_sparse_moe.experts.43.w3", "model.layers.31.block_sparse_moe.experts.44.w3", "model.layers.31.block_sparse_moe.experts.45.w3", "model.layers.31.block_sparse_moe.experts.46.w3", "model.layers.31.block_sparse_moe.experts.47.w3", "model.layers.31.block_sparse_moe.experts.48.w3", "model.layers.31.block_sparse_moe.experts.49.w3", "model.layers.31.block_sparse_moe.experts.50.w3", "model.layers.31.block_sparse_moe.experts.51.w3", "model.layers.31.block_sparse_moe.experts.52.w3", "model.layers.31.block_sparse_moe.experts.53.w3", "model.layers.31.block_sparse_moe.experts.54.w3", "model.layers.31.block_sparse_moe.experts.55.w3", "model.layers.31.block_sparse_moe.experts.56.w3", "model.layers.31.block_sparse_moe.experts.57.w3", "model.layers.31.block_sparse_moe.experts.58.w3", "model.layers.31.block_sparse_moe.experts.59.w3", "model.layers.31.block_sparse_moe.experts.60.w3", "model.layers.31.block_sparse_moe.experts.61.w3", "model.layers.31.block_sparse_moe.experts.62.w3", "model.layers.31.block_sparse_moe.experts.63.w3", "model.layers.31.block_sparse_moe.experts.64.w3", "model.layers.31.block_sparse_moe.experts.65.w3", "model.layers.31.block_sparse_moe.experts.66.w3", "model.layers.31.block_sparse_moe.experts.67.w3", "model.layers.31.block_sparse_moe.experts.68.w3", "model.layers.31.block_sparse_moe.experts.69.w3", "model.layers.31.block_sparse_moe.experts.70.w3", "model.layers.31.block_sparse_moe.experts.71.w3", "model.layers.31.block_sparse_moe.experts.72.w3", "model.layers.31.block_sparse_moe.experts.73.w3", "model.layers.31.block_sparse_moe.experts.74.w3", "model.layers.31.block_sparse_moe.experts.75.w3", "model.layers.31.block_sparse_moe.experts.76.w3", "model.layers.31.block_sparse_moe.experts.77.w3", "model.layers.31.block_sparse_moe.experts.78.w3", "model.layers.31.block_sparse_moe.experts.79.w3", "model.layers.31.block_sparse_moe.experts.80.w3", "model.layers.31.block_sparse_moe.experts.81.w3", "model.layers.31.block_sparse_moe.experts.82.w3", "model.layers.31.block_sparse_moe.experts.83.w3", "model.layers.31.block_sparse_moe.experts.84.w3", "model.layers.31.block_sparse_moe.experts.85.w3", "model.layers.31.block_sparse_moe.experts.86.w3", "model.layers.31.block_sparse_moe.experts.87.w3", "model.layers.31.block_sparse_moe.experts.88.w3", "model.layers.31.block_sparse_moe.experts.89.w3", "model.layers.31.block_sparse_moe.experts.90.w3", "model.layers.31.block_sparse_moe.experts.91.w3", "model.layers.31.block_sparse_moe.experts.92.w3", "model.layers.31.block_sparse_moe.experts.93.w3", "model.layers.31.block_sparse_moe.experts.94.w3", "model.layers.31.block_sparse_moe.experts.95.w3", "model.layers.31.block_sparse_moe.experts.96.w3", "model.layers.31.block_sparse_moe.experts.97.w3", "model.layers.31.block_sparse_moe.experts.98.w3", "model.layers.31.block_sparse_moe.experts.99.w3", "model.layers.31.block_sparse_moe.experts.100.w3", "model.layers.31.block_sparse_moe.experts.101.w3", "model.layers.31.block_sparse_moe.experts.102.w3", "model.layers.31.block_sparse_moe.experts.103.w3", "model.layers.31.block_sparse_moe.experts.104.w3", "model.layers.31.block_sparse_moe.experts.105.w3", "model.layers.31.block_sparse_moe.experts.106.w3", "model.layers.31.block_sparse_moe.experts.107.w3", "model.layers.31.block_sparse_moe.experts.108.w3", "model.layers.31.block_sparse_moe.experts.109.w3", "model.layers.31.block_sparse_moe.experts.110.w3", "model.layers.31.block_sparse_moe.experts.111.w3", "model.layers.31.block_sparse_moe.experts.112.w3", "model.layers.31.block_sparse_moe.experts.113.w3", "model.layers.31.block_sparse_moe.experts.114.w3", "model.layers.31.block_sparse_moe.experts.115.w3", "model.layers.31.block_sparse_moe.experts.116.w3", "model.layers.31.block_sparse_moe.experts.117.w3", "model.layers.31.block_sparse_moe.experts.118.w3", "model.layers.31.block_sparse_moe.experts.119.w3", "model.layers.31.block_sparse_moe.experts.120.w3", "model.layers.31.block_sparse_moe.experts.121.w3", "model.layers.31.block_sparse_moe.experts.122.w3", "model.layers.31.block_sparse_moe.experts.123.w3", "model.layers.31.block_sparse_moe.experts.124.w3", "model.layers.31.block_sparse_moe.experts.125.w3", "model.layers.31.block_sparse_moe.experts.126.w3", "model.layers.31.block_sparse_moe.experts.127.w3", "model.layers.31.block_sparse_moe.experts.128.w3", "model.layers.31.block_sparse_moe.experts.129.w3", "model.layers.31.block_sparse_moe.experts.130.w3", "model.layers.31.block_sparse_moe.experts.131.w3", "model.layers.31.block_sparse_moe.experts.132.w3", "model.layers.31.block_sparse_moe.experts.133.w3", "model.layers.31.block_sparse_moe.experts.134.w3", "model.layers.31.block_sparse_moe.experts.135.w3", "model.layers.31.block_sparse_moe.experts.136.w3", "model.layers.31.block_sparse_moe.experts.137.w3", "model.layers.31.block_sparse_moe.experts.138.w3", "model.layers.31.block_sparse_moe.experts.139.w3", "model.layers.31.block_sparse_moe.experts.140.w3", "model.layers.31.block_sparse_moe.experts.141.w3", "model.layers.31.block_sparse_moe.experts.142.w3", "model.layers.31.block_sparse_moe.experts.143.w3", "model.layers.31.block_sparse_moe.experts.144.w3", "model.layers.31.block_sparse_moe.experts.145.w3", "model.layers.31.block_sparse_moe.experts.146.w3", "model.layers.31.block_sparse_moe.experts.147.w3", "model.layers.31.block_sparse_moe.experts.148.w3", "model.layers.31.block_sparse_moe.experts.149.w3", "model.layers.31.block_sparse_moe.experts.150.w3", "model.layers.31.block_sparse_moe.experts.151.w3", "model.layers.31.block_sparse_moe.experts.152.w3", "model.layers.31.block_sparse_moe.experts.153.w3", "model.layers.31.block_sparse_moe.experts.154.w3", "model.layers.31.block_sparse_moe.experts.155.w3", "model.layers.31.block_sparse_moe.experts.156.w3", "model.layers.31.block_sparse_moe.experts.157.w3", "model.layers.31.block_sparse_moe.experts.158.w3", "model.layers.31.block_sparse_moe.experts.159.w3", "model.layers.31.block_sparse_moe.experts.160.w3", "model.layers.31.block_sparse_moe.experts.161.w3", "model.layers.31.block_sparse_moe.experts.162.w3", "model.layers.31.block_sparse_moe.experts.163.w3", "model.layers.31.block_sparse_moe.experts.164.w3", "model.layers.31.block_sparse_moe.experts.165.w3", "model.layers.31.block_sparse_moe.experts.166.w3", "model.layers.31.block_sparse_moe.experts.167.w3", "model.layers.31.block_sparse_moe.experts.168.w3", "model.layers.31.block_sparse_moe.experts.169.w3", "model.layers.31.block_sparse_moe.experts.170.w3", "model.layers.31.block_sparse_moe.experts.171.w3", "model.layers.31.block_sparse_moe.experts.172.w3", "model.layers.31.block_sparse_moe.experts.173.w3", "model.layers.31.block_sparse_moe.experts.174.w3", "model.layers.31.block_sparse_moe.experts.175.w3", "model.layers.31.block_sparse_moe.experts.176.w3", "model.layers.31.block_sparse_moe.experts.177.w3", "model.layers.31.block_sparse_moe.experts.178.w3", "model.layers.31.block_sparse_moe.experts.179.w3", "model.layers.31.block_sparse_moe.experts.180.w3", "model.layers.31.block_sparse_moe.experts.181.w3", "model.layers.31.block_sparse_moe.experts.182.w3", "model.layers.31.block_sparse_moe.experts.183.w3", "model.layers.31.block_sparse_moe.experts.184.w3", "model.layers.31.block_sparse_moe.experts.185.w3", "model.layers.31.block_sparse_moe.experts.186.w3", "model.layers.31.block_sparse_moe.experts.187.w3", "model.layers.31.block_sparse_moe.experts.188.w3", "model.layers.31.block_sparse_moe.experts.189.w3", "model.layers.31.block_sparse_moe.experts.190.w3", "model.layers.31.block_sparse_moe.experts.191.w3", "model.layers.31.block_sparse_moe.experts.192.w3", "model.layers.31.block_sparse_moe.experts.193.w3", "model.layers.31.block_sparse_moe.experts.194.w3", "model.layers.31.block_sparse_moe.experts.195.w3", "model.layers.31.block_sparse_moe.experts.196.w3", "model.layers.31.block_sparse_moe.experts.197.w3", "model.layers.31.block_sparse_moe.experts.198.w3", "model.layers.31.block_sparse_moe.experts.199.w3", "model.layers.31.block_sparse_moe.experts.200.w3", "model.layers.31.block_sparse_moe.experts.201.w3", "model.layers.31.block_sparse_moe.experts.202.w3", "model.layers.31.block_sparse_moe.experts.203.w3", "model.layers.31.block_sparse_moe.experts.204.w3", "model.layers.31.block_sparse_moe.experts.205.w3", "model.layers.31.block_sparse_moe.experts.206.w3", "model.layers.31.block_sparse_moe.experts.207.w3", "model.layers.31.block_sparse_moe.experts.208.w3", "model.layers.31.block_sparse_moe.experts.209.w3", "model.layers.31.block_sparse_moe.experts.210.w3", "model.layers.31.block_sparse_moe.experts.211.w3", "model.layers.31.block_sparse_moe.experts.212.w3", "model.layers.31.block_sparse_moe.experts.213.w3", "model.layers.31.block_sparse_moe.experts.214.w3", "model.layers.31.block_sparse_moe.experts.215.w3", "model.layers.31.block_sparse_moe.experts.216.w3", "model.layers.31.block_sparse_moe.experts.217.w3", "model.layers.31.block_sparse_moe.experts.218.w3", "model.layers.31.block_sparse_moe.experts.219.w3", "model.layers.31.block_sparse_moe.experts.220.w3", "model.layers.31.block_sparse_moe.experts.221.w3", "model.layers.31.block_sparse_moe.experts.222.w3", "model.layers.31.block_sparse_moe.experts.223.w3", "model.layers.31.block_sparse_moe.experts.224.w3", "model.layers.31.block_sparse_moe.experts.225.w3", "model.layers.31.block_sparse_moe.experts.226.w3", "model.layers.31.block_sparse_moe.experts.227.w3", "model.layers.31.block_sparse_moe.experts.228.w3", "model.layers.31.block_sparse_moe.experts.229.w3", "model.layers.31.block_sparse_moe.experts.230.w3", "model.layers.31.block_sparse_moe.experts.231.w3", "model.layers.31.block_sparse_moe.experts.232.w3", "model.layers.31.block_sparse_moe.experts.233.w3", "model.layers.31.block_sparse_moe.experts.234.w3", "model.layers.31.block_sparse_moe.experts.235.w3", "model.layers.31.block_sparse_moe.experts.236.w3", "model.layers.31.block_sparse_moe.experts.237.w3", "model.layers.31.block_sparse_moe.experts.238.w3", "model.layers.31.block_sparse_moe.experts.239.w3", "model.layers.31.block_sparse_moe.experts.240.w3", "model.layers.31.block_sparse_moe.experts.241.w3", "model.layers.31.block_sparse_moe.experts.242.w3", "model.layers.31.block_sparse_moe.experts.243.w3", "model.layers.31.block_sparse_moe.experts.244.w3", "model.layers.31.block_sparse_moe.experts.245.w3", "model.layers.31.block_sparse_moe.experts.246.w3", "model.layers.31.block_sparse_moe.experts.247.w3", "model.layers.31.block_sparse_moe.experts.248.w3", "model.layers.31.block_sparse_moe.experts.249.w3", "model.layers.31.block_sparse_moe.experts.250.w3", "model.layers.31.block_sparse_moe.experts.251.w3", "model.layers.31.block_sparse_moe.experts.252.w3", "model.layers.31.block_sparse_moe.experts.253.w3", "model.layers.31.block_sparse_moe.experts.254.w3", "model.layers.31.block_sparse_moe.experts.255.w3", "model.layers.31.block_sparse_moe.experts.0.w2", "model.layers.31.block_sparse_moe.experts.1.w2", "model.layers.31.block_sparse_moe.experts.2.w2", "model.layers.31.block_sparse_moe.experts.3.w2", "model.layers.31.block_sparse_moe.experts.4.w2", "model.layers.31.block_sparse_moe.experts.5.w2", "model.layers.31.block_sparse_moe.experts.6.w2", "model.layers.31.block_sparse_moe.experts.7.w2", "model.layers.31.block_sparse_moe.experts.8.w2", "model.layers.31.block_sparse_moe.experts.9.w2", "model.layers.31.block_sparse_moe.experts.10.w2", "model.layers.31.block_sparse_moe.experts.11.w2", "model.layers.31.block_sparse_moe.experts.12.w2", "model.layers.31.block_sparse_moe.experts.13.w2", "model.layers.31.block_sparse_moe.experts.14.w2", "model.layers.31.block_sparse_moe.experts.15.w2", "model.layers.31.block_sparse_moe.experts.16.w2", "model.layers.31.block_sparse_moe.experts.17.w2", "model.layers.31.block_sparse_moe.experts.18.w2", "model.layers.31.block_sparse_moe.experts.19.w2", "model.layers.31.block_sparse_moe.experts.20.w2", "model.layers.31.block_sparse_moe.experts.21.w2", "model.layers.31.block_sparse_moe.experts.22.w2", "model.layers.31.block_sparse_moe.experts.23.w2", "model.layers.31.block_sparse_moe.experts.24.w2", "model.layers.31.block_sparse_moe.experts.25.w2", "model.layers.31.block_sparse_moe.experts.26.w2", "model.layers.31.block_sparse_moe.experts.27.w2", "model.layers.31.block_sparse_moe.experts.28.w2", "model.layers.31.block_sparse_moe.experts.29.w2", "model.layers.31.block_sparse_moe.experts.30.w2", "model.layers.31.block_sparse_moe.experts.31.w2", "model.layers.31.block_sparse_moe.experts.32.w2", "model.layers.31.block_sparse_moe.experts.33.w2", "model.layers.31.block_sparse_moe.experts.34.w2", "model.layers.31.block_sparse_moe.experts.35.w2", "model.layers.31.block_sparse_moe.experts.36.w2", "model.layers.31.block_sparse_moe.experts.37.w2", "model.layers.31.block_sparse_moe.experts.38.w2", "model.layers.31.block_sparse_moe.experts.39.w2", "model.layers.31.block_sparse_moe.experts.40.w2", "model.layers.31.block_sparse_moe.experts.41.w2", "model.layers.31.block_sparse_moe.experts.42.w2", "model.layers.31.block_sparse_moe.experts.43.w2", "model.layers.31.block_sparse_moe.experts.44.w2", "model.layers.31.block_sparse_moe.experts.45.w2", "model.layers.31.block_sparse_moe.experts.46.w2", "model.layers.31.block_sparse_moe.experts.47.w2", "model.layers.31.block_sparse_moe.experts.48.w2", "model.layers.31.block_sparse_moe.experts.49.w2", "model.layers.31.block_sparse_moe.experts.50.w2", "model.layers.31.block_sparse_moe.experts.51.w2", "model.layers.31.block_sparse_moe.experts.52.w2", "model.layers.31.block_sparse_moe.experts.53.w2", "model.layers.31.block_sparse_moe.experts.54.w2", "model.layers.31.block_sparse_moe.experts.55.w2", "model.layers.31.block_sparse_moe.experts.56.w2", "model.layers.31.block_sparse_moe.experts.57.w2", "model.layers.31.block_sparse_moe.experts.58.w2", "model.layers.31.block_sparse_moe.experts.59.w2", "model.layers.31.block_sparse_moe.experts.60.w2", "model.layers.31.block_sparse_moe.experts.61.w2", "model.layers.31.block_sparse_moe.experts.62.w2", "model.layers.31.block_sparse_moe.experts.63.w2", "model.layers.31.block_sparse_moe.experts.64.w2", "model.layers.31.block_sparse_moe.experts.65.w2", "model.layers.31.block_sparse_moe.experts.66.w2", "model.layers.31.block_sparse_moe.experts.67.w2", "model.layers.31.block_sparse_moe.experts.68.w2", "model.layers.31.block_sparse_moe.experts.69.w2", "model.layers.31.block_sparse_moe.experts.70.w2", "model.layers.31.block_sparse_moe.experts.71.w2", "model.layers.31.block_sparse_moe.experts.72.w2", "model.layers.31.block_sparse_moe.experts.73.w2", "model.layers.31.block_sparse_moe.experts.74.w2", "model.layers.31.block_sparse_moe.experts.75.w2", "model.layers.31.block_sparse_moe.experts.76.w2", "model.layers.31.block_sparse_moe.experts.77.w2", "model.layers.31.block_sparse_moe.experts.78.w2", "model.layers.31.block_sparse_moe.experts.79.w2", "model.layers.31.block_sparse_moe.experts.80.w2", "model.layers.31.block_sparse_moe.experts.81.w2", "model.layers.31.block_sparse_moe.experts.82.w2", "model.layers.31.block_sparse_moe.experts.83.w2", "model.layers.31.block_sparse_moe.experts.84.w2", "model.layers.31.block_sparse_moe.experts.85.w2", "model.layers.31.block_sparse_moe.experts.86.w2", "model.layers.31.block_sparse_moe.experts.87.w2", "model.layers.31.block_sparse_moe.experts.88.w2", "model.layers.31.block_sparse_moe.experts.89.w2", "model.layers.31.block_sparse_moe.experts.90.w2", "model.layers.31.block_sparse_moe.experts.91.w2", "model.layers.31.block_sparse_moe.experts.92.w2", "model.layers.31.block_sparse_moe.experts.93.w2", "model.layers.31.block_sparse_moe.experts.94.w2", "model.layers.31.block_sparse_moe.experts.95.w2", "model.layers.31.block_sparse_moe.experts.96.w2", "model.layers.31.block_sparse_moe.experts.97.w2", "model.layers.31.block_sparse_moe.experts.98.w2", "model.layers.31.block_sparse_moe.experts.99.w2", "model.layers.31.block_sparse_moe.experts.100.w2", "model.layers.31.block_sparse_moe.experts.101.w2", "model.layers.31.block_sparse_moe.experts.102.w2", "model.layers.31.block_sparse_moe.experts.103.w2", "model.layers.31.block_sparse_moe.experts.104.w2", "model.layers.31.block_sparse_moe.experts.105.w2", "model.layers.31.block_sparse_moe.experts.106.w2", "model.layers.31.block_sparse_moe.experts.107.w2", "model.layers.31.block_sparse_moe.experts.108.w2", "model.layers.31.block_sparse_moe.experts.109.w2", "model.layers.31.block_sparse_moe.experts.110.w2", "model.layers.31.block_sparse_moe.experts.111.w2", "model.layers.31.block_sparse_moe.experts.112.w2", "model.layers.31.block_sparse_moe.experts.113.w2", "model.layers.31.block_sparse_moe.experts.114.w2", "model.layers.31.block_sparse_moe.experts.115.w2", "model.layers.31.block_sparse_moe.experts.116.w2", "model.layers.31.block_sparse_moe.experts.117.w2", "model.layers.31.block_sparse_moe.experts.118.w2", "model.layers.31.block_sparse_moe.experts.119.w2", "model.layers.31.block_sparse_moe.experts.120.w2", "model.layers.31.block_sparse_moe.experts.121.w2", "model.layers.31.block_sparse_moe.experts.122.w2", "model.layers.31.block_sparse_moe.experts.123.w2", "model.layers.31.block_sparse_moe.experts.124.w2", "model.layers.31.block_sparse_moe.experts.125.w2", "model.layers.31.block_sparse_moe.experts.126.w2", "model.layers.31.block_sparse_moe.experts.127.w2", "model.layers.31.block_sparse_moe.experts.128.w2", "model.layers.31.block_sparse_moe.experts.129.w2", "model.layers.31.block_sparse_moe.experts.130.w2", "model.layers.31.block_sparse_moe.experts.131.w2", "model.layers.31.block_sparse_moe.experts.132.w2", "model.layers.31.block_sparse_moe.experts.133.w2", "model.layers.31.block_sparse_moe.experts.134.w2", "model.layers.31.block_sparse_moe.experts.135.w2", "model.layers.31.block_sparse_moe.experts.136.w2", "model.layers.31.block_sparse_moe.experts.137.w2", "model.layers.31.block_sparse_moe.experts.138.w2", "model.layers.31.block_sparse_moe.experts.139.w2", "model.layers.31.block_sparse_moe.experts.140.w2", "model.layers.31.block_sparse_moe.experts.141.w2", "model.layers.31.block_sparse_moe.experts.142.w2", "model.layers.31.block_sparse_moe.experts.143.w2", "model.layers.31.block_sparse_moe.experts.144.w2", "model.layers.31.block_sparse_moe.experts.145.w2", "model.layers.31.block_sparse_moe.experts.146.w2", "model.layers.31.block_sparse_moe.experts.147.w2", "model.layers.31.block_sparse_moe.experts.148.w2", "model.layers.31.block_sparse_moe.experts.149.w2", "model.layers.31.block_sparse_moe.experts.150.w2", "model.layers.31.block_sparse_moe.experts.151.w2", "model.layers.31.block_sparse_moe.experts.152.w2", "model.layers.31.block_sparse_moe.experts.153.w2", "model.layers.31.block_sparse_moe.experts.154.w2", "model.layers.31.block_sparse_moe.experts.155.w2", "model.layers.31.block_sparse_moe.experts.156.w2", "model.layers.31.block_sparse_moe.experts.157.w2", "model.layers.31.block_sparse_moe.experts.158.w2", "model.layers.31.block_sparse_moe.experts.159.w2", "model.layers.31.block_sparse_moe.experts.160.w2", "model.layers.31.block_sparse_moe.experts.161.w2", "model.layers.31.block_sparse_moe.experts.162.w2", "model.layers.31.block_sparse_moe.experts.163.w2", "model.layers.31.block_sparse_moe.experts.164.w2", "model.layers.31.block_sparse_moe.experts.165.w2", "model.layers.31.block_sparse_moe.experts.166.w2", "model.layers.31.block_sparse_moe.experts.167.w2", "model.layers.31.block_sparse_moe.experts.168.w2", "model.layers.31.block_sparse_moe.experts.169.w2", "model.layers.31.block_sparse_moe.experts.170.w2", "model.layers.31.block_sparse_moe.experts.171.w2", "model.layers.31.block_sparse_moe.experts.172.w2", "model.layers.31.block_sparse_moe.experts.173.w2", "model.layers.31.block_sparse_moe.experts.174.w2", "model.layers.31.block_sparse_moe.experts.175.w2", "model.layers.31.block_sparse_moe.experts.176.w2", "model.layers.31.block_sparse_moe.experts.177.w2", "model.layers.31.block_sparse_moe.experts.178.w2", "model.layers.31.block_sparse_moe.experts.179.w2", "model.layers.31.block_sparse_moe.experts.180.w2", "model.layers.31.block_sparse_moe.experts.181.w2", "model.layers.31.block_sparse_moe.experts.182.w2", "model.layers.31.block_sparse_moe.experts.183.w2", "model.layers.31.block_sparse_moe.experts.184.w2", "model.layers.31.block_sparse_moe.experts.185.w2", "model.layers.31.block_sparse_moe.experts.186.w2", "model.layers.31.block_sparse_moe.experts.187.w2", "model.layers.31.block_sparse_moe.experts.188.w2", "model.layers.31.block_sparse_moe.experts.189.w2", "model.layers.31.block_sparse_moe.experts.190.w2", "model.layers.31.block_sparse_moe.experts.191.w2", "model.layers.31.block_sparse_moe.experts.192.w2", "model.layers.31.block_sparse_moe.experts.193.w2", "model.layers.31.block_sparse_moe.experts.194.w2", "model.layers.31.block_sparse_moe.experts.195.w2", "model.layers.31.block_sparse_moe.experts.196.w2", "model.layers.31.block_sparse_moe.experts.197.w2", "model.layers.31.block_sparse_moe.experts.198.w2", "model.layers.31.block_sparse_moe.experts.199.w2", "model.layers.31.block_sparse_moe.experts.200.w2", "model.layers.31.block_sparse_moe.experts.201.w2", "model.layers.31.block_sparse_moe.experts.202.w2", "model.layers.31.block_sparse_moe.experts.203.w2", "model.layers.31.block_sparse_moe.experts.204.w2", "model.layers.31.block_sparse_moe.experts.205.w2", "model.layers.31.block_sparse_moe.experts.206.w2", "model.layers.31.block_sparse_moe.experts.207.w2", "model.layers.31.block_sparse_moe.experts.208.w2", "model.layers.31.block_sparse_moe.experts.209.w2", "model.layers.31.block_sparse_moe.experts.210.w2", "model.layers.31.block_sparse_moe.experts.211.w2", "model.layers.31.block_sparse_moe.experts.212.w2", "model.layers.31.block_sparse_moe.experts.213.w2", "model.layers.31.block_sparse_moe.experts.214.w2", "model.layers.31.block_sparse_moe.experts.215.w2", "model.layers.31.block_sparse_moe.experts.216.w2", "model.layers.31.block_sparse_moe.experts.217.w2", "model.layers.31.block_sparse_moe.experts.218.w2", "model.layers.31.block_sparse_moe.experts.219.w2", "model.layers.31.block_sparse_moe.experts.220.w2", "model.layers.31.block_sparse_moe.experts.221.w2", "model.layers.31.block_sparse_moe.experts.222.w2", "model.layers.31.block_sparse_moe.experts.223.w2", "model.layers.31.block_sparse_moe.experts.224.w2", "model.layers.31.block_sparse_moe.experts.225.w2", "model.layers.31.block_sparse_moe.experts.226.w2", "model.layers.31.block_sparse_moe.experts.227.w2", "model.layers.31.block_sparse_moe.experts.228.w2", "model.layers.31.block_sparse_moe.experts.229.w2", "model.layers.31.block_sparse_moe.experts.230.w2", "model.layers.31.block_sparse_moe.experts.231.w2", "model.layers.31.block_sparse_moe.experts.232.w2", "model.layers.31.block_sparse_moe.experts.233.w2", "model.layers.31.block_sparse_moe.experts.234.w2", "model.layers.31.block_sparse_moe.experts.235.w2", "model.layers.31.block_sparse_moe.experts.236.w2", "model.layers.31.block_sparse_moe.experts.237.w2", "model.layers.31.block_sparse_moe.experts.238.w2", "model.layers.31.block_sparse_moe.experts.239.w2", "model.layers.31.block_sparse_moe.experts.240.w2", "model.layers.31.block_sparse_moe.experts.241.w2", "model.layers.31.block_sparse_moe.experts.242.w2", "model.layers.31.block_sparse_moe.experts.243.w2", "model.layers.31.block_sparse_moe.experts.244.w2", "model.layers.31.block_sparse_moe.experts.245.w2", "model.layers.31.block_sparse_moe.experts.246.w2", "model.layers.31.block_sparse_moe.experts.247.w2", "model.layers.31.block_sparse_moe.experts.248.w2", "model.layers.31.block_sparse_moe.experts.249.w2", "model.layers.31.block_sparse_moe.experts.250.w2", "model.layers.31.block_sparse_moe.experts.251.w2", "model.layers.31.block_sparse_moe.experts.252.w2", "model.layers.31.block_sparse_moe.experts.253.w2", "model.layers.31.block_sparse_moe.experts.254.w2", "model.layers.31.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.001337856426835038, "dbits": 3623878656 } ] }, { "idx": 64, "layers": [ "model.layers.32.self_attn.q_proj", "model.layers.32.self_attn.k_proj", "model.layers.32.self_attn.v_proj", "model.layers.32.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0073462940752506145, "dbits": 44040192 } ] }, { "idx": 65, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w1", "model.layers.32.block_sparse_moe.experts.1.w1", "model.layers.32.block_sparse_moe.experts.2.w1", "model.layers.32.block_sparse_moe.experts.3.w1", "model.layers.32.block_sparse_moe.experts.4.w1", "model.layers.32.block_sparse_moe.experts.5.w1", "model.layers.32.block_sparse_moe.experts.6.w1", "model.layers.32.block_sparse_moe.experts.7.w1", "model.layers.32.block_sparse_moe.experts.8.w1", "model.layers.32.block_sparse_moe.experts.9.w1", "model.layers.32.block_sparse_moe.experts.10.w1", "model.layers.32.block_sparse_moe.experts.11.w1", "model.layers.32.block_sparse_moe.experts.12.w1", "model.layers.32.block_sparse_moe.experts.13.w1", "model.layers.32.block_sparse_moe.experts.14.w1", "model.layers.32.block_sparse_moe.experts.15.w1", "model.layers.32.block_sparse_moe.experts.16.w1", "model.layers.32.block_sparse_moe.experts.17.w1", "model.layers.32.block_sparse_moe.experts.18.w1", "model.layers.32.block_sparse_moe.experts.19.w1", "model.layers.32.block_sparse_moe.experts.20.w1", "model.layers.32.block_sparse_moe.experts.21.w1", "model.layers.32.block_sparse_moe.experts.22.w1", "model.layers.32.block_sparse_moe.experts.23.w1", "model.layers.32.block_sparse_moe.experts.24.w1", "model.layers.32.block_sparse_moe.experts.25.w1", "model.layers.32.block_sparse_moe.experts.26.w1", "model.layers.32.block_sparse_moe.experts.27.w1", "model.layers.32.block_sparse_moe.experts.28.w1", "model.layers.32.block_sparse_moe.experts.29.w1", "model.layers.32.block_sparse_moe.experts.30.w1", "model.layers.32.block_sparse_moe.experts.31.w1", "model.layers.32.block_sparse_moe.experts.32.w1", "model.layers.32.block_sparse_moe.experts.33.w1", "model.layers.32.block_sparse_moe.experts.34.w1", "model.layers.32.block_sparse_moe.experts.35.w1", "model.layers.32.block_sparse_moe.experts.36.w1", "model.layers.32.block_sparse_moe.experts.37.w1", "model.layers.32.block_sparse_moe.experts.38.w1", "model.layers.32.block_sparse_moe.experts.39.w1", "model.layers.32.block_sparse_moe.experts.40.w1", "model.layers.32.block_sparse_moe.experts.41.w1", "model.layers.32.block_sparse_moe.experts.42.w1", "model.layers.32.block_sparse_moe.experts.43.w1", "model.layers.32.block_sparse_moe.experts.44.w1", "model.layers.32.block_sparse_moe.experts.45.w1", "model.layers.32.block_sparse_moe.experts.46.w1", "model.layers.32.block_sparse_moe.experts.47.w1", "model.layers.32.block_sparse_moe.experts.48.w1", "model.layers.32.block_sparse_moe.experts.49.w1", "model.layers.32.block_sparse_moe.experts.50.w1", "model.layers.32.block_sparse_moe.experts.51.w1", "model.layers.32.block_sparse_moe.experts.52.w1", "model.layers.32.block_sparse_moe.experts.53.w1", "model.layers.32.block_sparse_moe.experts.54.w1", "model.layers.32.block_sparse_moe.experts.55.w1", "model.layers.32.block_sparse_moe.experts.56.w1", "model.layers.32.block_sparse_moe.experts.57.w1", "model.layers.32.block_sparse_moe.experts.58.w1", "model.layers.32.block_sparse_moe.experts.59.w1", "model.layers.32.block_sparse_moe.experts.60.w1", "model.layers.32.block_sparse_moe.experts.61.w1", "model.layers.32.block_sparse_moe.experts.62.w1", "model.layers.32.block_sparse_moe.experts.63.w1", "model.layers.32.block_sparse_moe.experts.64.w1", "model.layers.32.block_sparse_moe.experts.65.w1", "model.layers.32.block_sparse_moe.experts.66.w1", "model.layers.32.block_sparse_moe.experts.67.w1", "model.layers.32.block_sparse_moe.experts.68.w1", "model.layers.32.block_sparse_moe.experts.69.w1", "model.layers.32.block_sparse_moe.experts.70.w1", "model.layers.32.block_sparse_moe.experts.71.w1", "model.layers.32.block_sparse_moe.experts.72.w1", "model.layers.32.block_sparse_moe.experts.73.w1", "model.layers.32.block_sparse_moe.experts.74.w1", "model.layers.32.block_sparse_moe.experts.75.w1", "model.layers.32.block_sparse_moe.experts.76.w1", "model.layers.32.block_sparse_moe.experts.77.w1", "model.layers.32.block_sparse_moe.experts.78.w1", "model.layers.32.block_sparse_moe.experts.79.w1", "model.layers.32.block_sparse_moe.experts.80.w1", "model.layers.32.block_sparse_moe.experts.81.w1", "model.layers.32.block_sparse_moe.experts.82.w1", "model.layers.32.block_sparse_moe.experts.83.w1", "model.layers.32.block_sparse_moe.experts.84.w1", "model.layers.32.block_sparse_moe.experts.85.w1", "model.layers.32.block_sparse_moe.experts.86.w1", "model.layers.32.block_sparse_moe.experts.87.w1", "model.layers.32.block_sparse_moe.experts.88.w1", "model.layers.32.block_sparse_moe.experts.89.w1", "model.layers.32.block_sparse_moe.experts.90.w1", "model.layers.32.block_sparse_moe.experts.91.w1", "model.layers.32.block_sparse_moe.experts.92.w1", "model.layers.32.block_sparse_moe.experts.93.w1", "model.layers.32.block_sparse_moe.experts.94.w1", "model.layers.32.block_sparse_moe.experts.95.w1", "model.layers.32.block_sparse_moe.experts.96.w1", "model.layers.32.block_sparse_moe.experts.97.w1", "model.layers.32.block_sparse_moe.experts.98.w1", "model.layers.32.block_sparse_moe.experts.99.w1", "model.layers.32.block_sparse_moe.experts.100.w1", "model.layers.32.block_sparse_moe.experts.101.w1", "model.layers.32.block_sparse_moe.experts.102.w1", "model.layers.32.block_sparse_moe.experts.103.w1", "model.layers.32.block_sparse_moe.experts.104.w1", "model.layers.32.block_sparse_moe.experts.105.w1", "model.layers.32.block_sparse_moe.experts.106.w1", "model.layers.32.block_sparse_moe.experts.107.w1", "model.layers.32.block_sparse_moe.experts.108.w1", "model.layers.32.block_sparse_moe.experts.109.w1", "model.layers.32.block_sparse_moe.experts.110.w1", "model.layers.32.block_sparse_moe.experts.111.w1", "model.layers.32.block_sparse_moe.experts.112.w1", "model.layers.32.block_sparse_moe.experts.113.w1", "model.layers.32.block_sparse_moe.experts.114.w1", "model.layers.32.block_sparse_moe.experts.115.w1", "model.layers.32.block_sparse_moe.experts.116.w1", "model.layers.32.block_sparse_moe.experts.117.w1", "model.layers.32.block_sparse_moe.experts.118.w1", "model.layers.32.block_sparse_moe.experts.119.w1", "model.layers.32.block_sparse_moe.experts.120.w1", "model.layers.32.block_sparse_moe.experts.121.w1", "model.layers.32.block_sparse_moe.experts.122.w1", "model.layers.32.block_sparse_moe.experts.123.w1", "model.layers.32.block_sparse_moe.experts.124.w1", "model.layers.32.block_sparse_moe.experts.125.w1", "model.layers.32.block_sparse_moe.experts.126.w1", "model.layers.32.block_sparse_moe.experts.127.w1", "model.layers.32.block_sparse_moe.experts.128.w1", "model.layers.32.block_sparse_moe.experts.129.w1", "model.layers.32.block_sparse_moe.experts.130.w1", "model.layers.32.block_sparse_moe.experts.131.w1", "model.layers.32.block_sparse_moe.experts.132.w1", "model.layers.32.block_sparse_moe.experts.133.w1", "model.layers.32.block_sparse_moe.experts.134.w1", "model.layers.32.block_sparse_moe.experts.135.w1", "model.layers.32.block_sparse_moe.experts.136.w1", "model.layers.32.block_sparse_moe.experts.137.w1", "model.layers.32.block_sparse_moe.experts.138.w1", "model.layers.32.block_sparse_moe.experts.139.w1", "model.layers.32.block_sparse_moe.experts.140.w1", "model.layers.32.block_sparse_moe.experts.141.w1", "model.layers.32.block_sparse_moe.experts.142.w1", "model.layers.32.block_sparse_moe.experts.143.w1", "model.layers.32.block_sparse_moe.experts.144.w1", "model.layers.32.block_sparse_moe.experts.145.w1", "model.layers.32.block_sparse_moe.experts.146.w1", "model.layers.32.block_sparse_moe.experts.147.w1", "model.layers.32.block_sparse_moe.experts.148.w1", "model.layers.32.block_sparse_moe.experts.149.w1", "model.layers.32.block_sparse_moe.experts.150.w1", "model.layers.32.block_sparse_moe.experts.151.w1", "model.layers.32.block_sparse_moe.experts.152.w1", "model.layers.32.block_sparse_moe.experts.153.w1", "model.layers.32.block_sparse_moe.experts.154.w1", "model.layers.32.block_sparse_moe.experts.155.w1", "model.layers.32.block_sparse_moe.experts.156.w1", "model.layers.32.block_sparse_moe.experts.157.w1", "model.layers.32.block_sparse_moe.experts.158.w1", "model.layers.32.block_sparse_moe.experts.159.w1", "model.layers.32.block_sparse_moe.experts.160.w1", "model.layers.32.block_sparse_moe.experts.161.w1", "model.layers.32.block_sparse_moe.experts.162.w1", "model.layers.32.block_sparse_moe.experts.163.w1", "model.layers.32.block_sparse_moe.experts.164.w1", "model.layers.32.block_sparse_moe.experts.165.w1", "model.layers.32.block_sparse_moe.experts.166.w1", "model.layers.32.block_sparse_moe.experts.167.w1", "model.layers.32.block_sparse_moe.experts.168.w1", "model.layers.32.block_sparse_moe.experts.169.w1", "model.layers.32.block_sparse_moe.experts.170.w1", "model.layers.32.block_sparse_moe.experts.171.w1", "model.layers.32.block_sparse_moe.experts.172.w1", "model.layers.32.block_sparse_moe.experts.173.w1", "model.layers.32.block_sparse_moe.experts.174.w1", "model.layers.32.block_sparse_moe.experts.175.w1", "model.layers.32.block_sparse_moe.experts.176.w1", "model.layers.32.block_sparse_moe.experts.177.w1", "model.layers.32.block_sparse_moe.experts.178.w1", "model.layers.32.block_sparse_moe.experts.179.w1", "model.layers.32.block_sparse_moe.experts.180.w1", "model.layers.32.block_sparse_moe.experts.181.w1", "model.layers.32.block_sparse_moe.experts.182.w1", "model.layers.32.block_sparse_moe.experts.183.w1", "model.layers.32.block_sparse_moe.experts.184.w1", "model.layers.32.block_sparse_moe.experts.185.w1", "model.layers.32.block_sparse_moe.experts.186.w1", "model.layers.32.block_sparse_moe.experts.187.w1", "model.layers.32.block_sparse_moe.experts.188.w1", "model.layers.32.block_sparse_moe.experts.189.w1", "model.layers.32.block_sparse_moe.experts.190.w1", "model.layers.32.block_sparse_moe.experts.191.w1", "model.layers.32.block_sparse_moe.experts.192.w1", "model.layers.32.block_sparse_moe.experts.193.w1", "model.layers.32.block_sparse_moe.experts.194.w1", "model.layers.32.block_sparse_moe.experts.195.w1", "model.layers.32.block_sparse_moe.experts.196.w1", "model.layers.32.block_sparse_moe.experts.197.w1", "model.layers.32.block_sparse_moe.experts.198.w1", "model.layers.32.block_sparse_moe.experts.199.w1", "model.layers.32.block_sparse_moe.experts.200.w1", "model.layers.32.block_sparse_moe.experts.201.w1", "model.layers.32.block_sparse_moe.experts.202.w1", "model.layers.32.block_sparse_moe.experts.203.w1", "model.layers.32.block_sparse_moe.experts.204.w1", "model.layers.32.block_sparse_moe.experts.205.w1", "model.layers.32.block_sparse_moe.experts.206.w1", "model.layers.32.block_sparse_moe.experts.207.w1", "model.layers.32.block_sparse_moe.experts.208.w1", "model.layers.32.block_sparse_moe.experts.209.w1", "model.layers.32.block_sparse_moe.experts.210.w1", "model.layers.32.block_sparse_moe.experts.211.w1", "model.layers.32.block_sparse_moe.experts.212.w1", "model.layers.32.block_sparse_moe.experts.213.w1", "model.layers.32.block_sparse_moe.experts.214.w1", "model.layers.32.block_sparse_moe.experts.215.w1", "model.layers.32.block_sparse_moe.experts.216.w1", "model.layers.32.block_sparse_moe.experts.217.w1", "model.layers.32.block_sparse_moe.experts.218.w1", "model.layers.32.block_sparse_moe.experts.219.w1", "model.layers.32.block_sparse_moe.experts.220.w1", "model.layers.32.block_sparse_moe.experts.221.w1", "model.layers.32.block_sparse_moe.experts.222.w1", "model.layers.32.block_sparse_moe.experts.223.w1", "model.layers.32.block_sparse_moe.experts.224.w1", "model.layers.32.block_sparse_moe.experts.225.w1", "model.layers.32.block_sparse_moe.experts.226.w1", "model.layers.32.block_sparse_moe.experts.227.w1", "model.layers.32.block_sparse_moe.experts.228.w1", "model.layers.32.block_sparse_moe.experts.229.w1", "model.layers.32.block_sparse_moe.experts.230.w1", "model.layers.32.block_sparse_moe.experts.231.w1", "model.layers.32.block_sparse_moe.experts.232.w1", "model.layers.32.block_sparse_moe.experts.233.w1", "model.layers.32.block_sparse_moe.experts.234.w1", "model.layers.32.block_sparse_moe.experts.235.w1", "model.layers.32.block_sparse_moe.experts.236.w1", "model.layers.32.block_sparse_moe.experts.237.w1", "model.layers.32.block_sparse_moe.experts.238.w1", "model.layers.32.block_sparse_moe.experts.239.w1", "model.layers.32.block_sparse_moe.experts.240.w1", "model.layers.32.block_sparse_moe.experts.241.w1", "model.layers.32.block_sparse_moe.experts.242.w1", "model.layers.32.block_sparse_moe.experts.243.w1", "model.layers.32.block_sparse_moe.experts.244.w1", "model.layers.32.block_sparse_moe.experts.245.w1", "model.layers.32.block_sparse_moe.experts.246.w1", "model.layers.32.block_sparse_moe.experts.247.w1", "model.layers.32.block_sparse_moe.experts.248.w1", "model.layers.32.block_sparse_moe.experts.249.w1", "model.layers.32.block_sparse_moe.experts.250.w1", "model.layers.32.block_sparse_moe.experts.251.w1", "model.layers.32.block_sparse_moe.experts.252.w1", "model.layers.32.block_sparse_moe.experts.253.w1", "model.layers.32.block_sparse_moe.experts.254.w1", "model.layers.32.block_sparse_moe.experts.255.w1", "model.layers.32.block_sparse_moe.experts.0.w3", "model.layers.32.block_sparse_moe.experts.1.w3", "model.layers.32.block_sparse_moe.experts.2.w3", "model.layers.32.block_sparse_moe.experts.3.w3", "model.layers.32.block_sparse_moe.experts.4.w3", "model.layers.32.block_sparse_moe.experts.5.w3", "model.layers.32.block_sparse_moe.experts.6.w3", "model.layers.32.block_sparse_moe.experts.7.w3", "model.layers.32.block_sparse_moe.experts.8.w3", "model.layers.32.block_sparse_moe.experts.9.w3", "model.layers.32.block_sparse_moe.experts.10.w3", "model.layers.32.block_sparse_moe.experts.11.w3", "model.layers.32.block_sparse_moe.experts.12.w3", "model.layers.32.block_sparse_moe.experts.13.w3", "model.layers.32.block_sparse_moe.experts.14.w3", "model.layers.32.block_sparse_moe.experts.15.w3", "model.layers.32.block_sparse_moe.experts.16.w3", "model.layers.32.block_sparse_moe.experts.17.w3", "model.layers.32.block_sparse_moe.experts.18.w3", "model.layers.32.block_sparse_moe.experts.19.w3", "model.layers.32.block_sparse_moe.experts.20.w3", "model.layers.32.block_sparse_moe.experts.21.w3", "model.layers.32.block_sparse_moe.experts.22.w3", "model.layers.32.block_sparse_moe.experts.23.w3", "model.layers.32.block_sparse_moe.experts.24.w3", "model.layers.32.block_sparse_moe.experts.25.w3", "model.layers.32.block_sparse_moe.experts.26.w3", "model.layers.32.block_sparse_moe.experts.27.w3", "model.layers.32.block_sparse_moe.experts.28.w3", "model.layers.32.block_sparse_moe.experts.29.w3", "model.layers.32.block_sparse_moe.experts.30.w3", "model.layers.32.block_sparse_moe.experts.31.w3", "model.layers.32.block_sparse_moe.experts.32.w3", "model.layers.32.block_sparse_moe.experts.33.w3", "model.layers.32.block_sparse_moe.experts.34.w3", "model.layers.32.block_sparse_moe.experts.35.w3", "model.layers.32.block_sparse_moe.experts.36.w3", "model.layers.32.block_sparse_moe.experts.37.w3", "model.layers.32.block_sparse_moe.experts.38.w3", "model.layers.32.block_sparse_moe.experts.39.w3", "model.layers.32.block_sparse_moe.experts.40.w3", "model.layers.32.block_sparse_moe.experts.41.w3", "model.layers.32.block_sparse_moe.experts.42.w3", "model.layers.32.block_sparse_moe.experts.43.w3", "model.layers.32.block_sparse_moe.experts.44.w3", "model.layers.32.block_sparse_moe.experts.45.w3", "model.layers.32.block_sparse_moe.experts.46.w3", "model.layers.32.block_sparse_moe.experts.47.w3", "model.layers.32.block_sparse_moe.experts.48.w3", "model.layers.32.block_sparse_moe.experts.49.w3", "model.layers.32.block_sparse_moe.experts.50.w3", "model.layers.32.block_sparse_moe.experts.51.w3", "model.layers.32.block_sparse_moe.experts.52.w3", "model.layers.32.block_sparse_moe.experts.53.w3", "model.layers.32.block_sparse_moe.experts.54.w3", "model.layers.32.block_sparse_moe.experts.55.w3", "model.layers.32.block_sparse_moe.experts.56.w3", "model.layers.32.block_sparse_moe.experts.57.w3", "model.layers.32.block_sparse_moe.experts.58.w3", "model.layers.32.block_sparse_moe.experts.59.w3", "model.layers.32.block_sparse_moe.experts.60.w3", "model.layers.32.block_sparse_moe.experts.61.w3", "model.layers.32.block_sparse_moe.experts.62.w3", "model.layers.32.block_sparse_moe.experts.63.w3", "model.layers.32.block_sparse_moe.experts.64.w3", "model.layers.32.block_sparse_moe.experts.65.w3", "model.layers.32.block_sparse_moe.experts.66.w3", "model.layers.32.block_sparse_moe.experts.67.w3", "model.layers.32.block_sparse_moe.experts.68.w3", "model.layers.32.block_sparse_moe.experts.69.w3", "model.layers.32.block_sparse_moe.experts.70.w3", "model.layers.32.block_sparse_moe.experts.71.w3", "model.layers.32.block_sparse_moe.experts.72.w3", "model.layers.32.block_sparse_moe.experts.73.w3", "model.layers.32.block_sparse_moe.experts.74.w3", "model.layers.32.block_sparse_moe.experts.75.w3", "model.layers.32.block_sparse_moe.experts.76.w3", "model.layers.32.block_sparse_moe.experts.77.w3", "model.layers.32.block_sparse_moe.experts.78.w3", "model.layers.32.block_sparse_moe.experts.79.w3", "model.layers.32.block_sparse_moe.experts.80.w3", "model.layers.32.block_sparse_moe.experts.81.w3", "model.layers.32.block_sparse_moe.experts.82.w3", "model.layers.32.block_sparse_moe.experts.83.w3", "model.layers.32.block_sparse_moe.experts.84.w3", "model.layers.32.block_sparse_moe.experts.85.w3", "model.layers.32.block_sparse_moe.experts.86.w3", "model.layers.32.block_sparse_moe.experts.87.w3", "model.layers.32.block_sparse_moe.experts.88.w3", "model.layers.32.block_sparse_moe.experts.89.w3", "model.layers.32.block_sparse_moe.experts.90.w3", "model.layers.32.block_sparse_moe.experts.91.w3", "model.layers.32.block_sparse_moe.experts.92.w3", "model.layers.32.block_sparse_moe.experts.93.w3", "model.layers.32.block_sparse_moe.experts.94.w3", "model.layers.32.block_sparse_moe.experts.95.w3", "model.layers.32.block_sparse_moe.experts.96.w3", "model.layers.32.block_sparse_moe.experts.97.w3", "model.layers.32.block_sparse_moe.experts.98.w3", "model.layers.32.block_sparse_moe.experts.99.w3", "model.layers.32.block_sparse_moe.experts.100.w3", "model.layers.32.block_sparse_moe.experts.101.w3", "model.layers.32.block_sparse_moe.experts.102.w3", "model.layers.32.block_sparse_moe.experts.103.w3", "model.layers.32.block_sparse_moe.experts.104.w3", "model.layers.32.block_sparse_moe.experts.105.w3", "model.layers.32.block_sparse_moe.experts.106.w3", "model.layers.32.block_sparse_moe.experts.107.w3", "model.layers.32.block_sparse_moe.experts.108.w3", "model.layers.32.block_sparse_moe.experts.109.w3", "model.layers.32.block_sparse_moe.experts.110.w3", "model.layers.32.block_sparse_moe.experts.111.w3", "model.layers.32.block_sparse_moe.experts.112.w3", "model.layers.32.block_sparse_moe.experts.113.w3", "model.layers.32.block_sparse_moe.experts.114.w3", "model.layers.32.block_sparse_moe.experts.115.w3", "model.layers.32.block_sparse_moe.experts.116.w3", "model.layers.32.block_sparse_moe.experts.117.w3", "model.layers.32.block_sparse_moe.experts.118.w3", "model.layers.32.block_sparse_moe.experts.119.w3", "model.layers.32.block_sparse_moe.experts.120.w3", "model.layers.32.block_sparse_moe.experts.121.w3", "model.layers.32.block_sparse_moe.experts.122.w3", "model.layers.32.block_sparse_moe.experts.123.w3", "model.layers.32.block_sparse_moe.experts.124.w3", "model.layers.32.block_sparse_moe.experts.125.w3", "model.layers.32.block_sparse_moe.experts.126.w3", "model.layers.32.block_sparse_moe.experts.127.w3", "model.layers.32.block_sparse_moe.experts.128.w3", "model.layers.32.block_sparse_moe.experts.129.w3", "model.layers.32.block_sparse_moe.experts.130.w3", "model.layers.32.block_sparse_moe.experts.131.w3", "model.layers.32.block_sparse_moe.experts.132.w3", "model.layers.32.block_sparse_moe.experts.133.w3", "model.layers.32.block_sparse_moe.experts.134.w3", "model.layers.32.block_sparse_moe.experts.135.w3", "model.layers.32.block_sparse_moe.experts.136.w3", "model.layers.32.block_sparse_moe.experts.137.w3", "model.layers.32.block_sparse_moe.experts.138.w3", "model.layers.32.block_sparse_moe.experts.139.w3", "model.layers.32.block_sparse_moe.experts.140.w3", "model.layers.32.block_sparse_moe.experts.141.w3", "model.layers.32.block_sparse_moe.experts.142.w3", "model.layers.32.block_sparse_moe.experts.143.w3", "model.layers.32.block_sparse_moe.experts.144.w3", "model.layers.32.block_sparse_moe.experts.145.w3", "model.layers.32.block_sparse_moe.experts.146.w3", "model.layers.32.block_sparse_moe.experts.147.w3", "model.layers.32.block_sparse_moe.experts.148.w3", "model.layers.32.block_sparse_moe.experts.149.w3", "model.layers.32.block_sparse_moe.experts.150.w3", "model.layers.32.block_sparse_moe.experts.151.w3", "model.layers.32.block_sparse_moe.experts.152.w3", "model.layers.32.block_sparse_moe.experts.153.w3", "model.layers.32.block_sparse_moe.experts.154.w3", "model.layers.32.block_sparse_moe.experts.155.w3", "model.layers.32.block_sparse_moe.experts.156.w3", "model.layers.32.block_sparse_moe.experts.157.w3", "model.layers.32.block_sparse_moe.experts.158.w3", "model.layers.32.block_sparse_moe.experts.159.w3", "model.layers.32.block_sparse_moe.experts.160.w3", "model.layers.32.block_sparse_moe.experts.161.w3", "model.layers.32.block_sparse_moe.experts.162.w3", "model.layers.32.block_sparse_moe.experts.163.w3", "model.layers.32.block_sparse_moe.experts.164.w3", "model.layers.32.block_sparse_moe.experts.165.w3", "model.layers.32.block_sparse_moe.experts.166.w3", "model.layers.32.block_sparse_moe.experts.167.w3", "model.layers.32.block_sparse_moe.experts.168.w3", "model.layers.32.block_sparse_moe.experts.169.w3", "model.layers.32.block_sparse_moe.experts.170.w3", "model.layers.32.block_sparse_moe.experts.171.w3", "model.layers.32.block_sparse_moe.experts.172.w3", "model.layers.32.block_sparse_moe.experts.173.w3", "model.layers.32.block_sparse_moe.experts.174.w3", "model.layers.32.block_sparse_moe.experts.175.w3", "model.layers.32.block_sparse_moe.experts.176.w3", "model.layers.32.block_sparse_moe.experts.177.w3", "model.layers.32.block_sparse_moe.experts.178.w3", "model.layers.32.block_sparse_moe.experts.179.w3", "model.layers.32.block_sparse_moe.experts.180.w3", "model.layers.32.block_sparse_moe.experts.181.w3", "model.layers.32.block_sparse_moe.experts.182.w3", "model.layers.32.block_sparse_moe.experts.183.w3", "model.layers.32.block_sparse_moe.experts.184.w3", "model.layers.32.block_sparse_moe.experts.185.w3", "model.layers.32.block_sparse_moe.experts.186.w3", "model.layers.32.block_sparse_moe.experts.187.w3", "model.layers.32.block_sparse_moe.experts.188.w3", "model.layers.32.block_sparse_moe.experts.189.w3", "model.layers.32.block_sparse_moe.experts.190.w3", "model.layers.32.block_sparse_moe.experts.191.w3", "model.layers.32.block_sparse_moe.experts.192.w3", "model.layers.32.block_sparse_moe.experts.193.w3", "model.layers.32.block_sparse_moe.experts.194.w3", "model.layers.32.block_sparse_moe.experts.195.w3", "model.layers.32.block_sparse_moe.experts.196.w3", "model.layers.32.block_sparse_moe.experts.197.w3", "model.layers.32.block_sparse_moe.experts.198.w3", "model.layers.32.block_sparse_moe.experts.199.w3", "model.layers.32.block_sparse_moe.experts.200.w3", "model.layers.32.block_sparse_moe.experts.201.w3", "model.layers.32.block_sparse_moe.experts.202.w3", "model.layers.32.block_sparse_moe.experts.203.w3", "model.layers.32.block_sparse_moe.experts.204.w3", "model.layers.32.block_sparse_moe.experts.205.w3", "model.layers.32.block_sparse_moe.experts.206.w3", "model.layers.32.block_sparse_moe.experts.207.w3", "model.layers.32.block_sparse_moe.experts.208.w3", "model.layers.32.block_sparse_moe.experts.209.w3", "model.layers.32.block_sparse_moe.experts.210.w3", "model.layers.32.block_sparse_moe.experts.211.w3", "model.layers.32.block_sparse_moe.experts.212.w3", "model.layers.32.block_sparse_moe.experts.213.w3", "model.layers.32.block_sparse_moe.experts.214.w3", "model.layers.32.block_sparse_moe.experts.215.w3", "model.layers.32.block_sparse_moe.experts.216.w3", "model.layers.32.block_sparse_moe.experts.217.w3", "model.layers.32.block_sparse_moe.experts.218.w3", "model.layers.32.block_sparse_moe.experts.219.w3", "model.layers.32.block_sparse_moe.experts.220.w3", "model.layers.32.block_sparse_moe.experts.221.w3", "model.layers.32.block_sparse_moe.experts.222.w3", "model.layers.32.block_sparse_moe.experts.223.w3", "model.layers.32.block_sparse_moe.experts.224.w3", "model.layers.32.block_sparse_moe.experts.225.w3", "model.layers.32.block_sparse_moe.experts.226.w3", "model.layers.32.block_sparse_moe.experts.227.w3", "model.layers.32.block_sparse_moe.experts.228.w3", "model.layers.32.block_sparse_moe.experts.229.w3", "model.layers.32.block_sparse_moe.experts.230.w3", "model.layers.32.block_sparse_moe.experts.231.w3", "model.layers.32.block_sparse_moe.experts.232.w3", "model.layers.32.block_sparse_moe.experts.233.w3", "model.layers.32.block_sparse_moe.experts.234.w3", "model.layers.32.block_sparse_moe.experts.235.w3", "model.layers.32.block_sparse_moe.experts.236.w3", "model.layers.32.block_sparse_moe.experts.237.w3", "model.layers.32.block_sparse_moe.experts.238.w3", "model.layers.32.block_sparse_moe.experts.239.w3", "model.layers.32.block_sparse_moe.experts.240.w3", "model.layers.32.block_sparse_moe.experts.241.w3", "model.layers.32.block_sparse_moe.experts.242.w3", "model.layers.32.block_sparse_moe.experts.243.w3", "model.layers.32.block_sparse_moe.experts.244.w3", "model.layers.32.block_sparse_moe.experts.245.w3", "model.layers.32.block_sparse_moe.experts.246.w3", "model.layers.32.block_sparse_moe.experts.247.w3", "model.layers.32.block_sparse_moe.experts.248.w3", "model.layers.32.block_sparse_moe.experts.249.w3", "model.layers.32.block_sparse_moe.experts.250.w3", "model.layers.32.block_sparse_moe.experts.251.w3", "model.layers.32.block_sparse_moe.experts.252.w3", "model.layers.32.block_sparse_moe.experts.253.w3", "model.layers.32.block_sparse_moe.experts.254.w3", "model.layers.32.block_sparse_moe.experts.255.w3", "model.layers.32.block_sparse_moe.experts.0.w2", "model.layers.32.block_sparse_moe.experts.1.w2", "model.layers.32.block_sparse_moe.experts.2.w2", "model.layers.32.block_sparse_moe.experts.3.w2", "model.layers.32.block_sparse_moe.experts.4.w2", "model.layers.32.block_sparse_moe.experts.5.w2", "model.layers.32.block_sparse_moe.experts.6.w2", "model.layers.32.block_sparse_moe.experts.7.w2", "model.layers.32.block_sparse_moe.experts.8.w2", "model.layers.32.block_sparse_moe.experts.9.w2", "model.layers.32.block_sparse_moe.experts.10.w2", "model.layers.32.block_sparse_moe.experts.11.w2", "model.layers.32.block_sparse_moe.experts.12.w2", "model.layers.32.block_sparse_moe.experts.13.w2", "model.layers.32.block_sparse_moe.experts.14.w2", "model.layers.32.block_sparse_moe.experts.15.w2", "model.layers.32.block_sparse_moe.experts.16.w2", "model.layers.32.block_sparse_moe.experts.17.w2", "model.layers.32.block_sparse_moe.experts.18.w2", "model.layers.32.block_sparse_moe.experts.19.w2", "model.layers.32.block_sparse_moe.experts.20.w2", "model.layers.32.block_sparse_moe.experts.21.w2", "model.layers.32.block_sparse_moe.experts.22.w2", "model.layers.32.block_sparse_moe.experts.23.w2", "model.layers.32.block_sparse_moe.experts.24.w2", "model.layers.32.block_sparse_moe.experts.25.w2", "model.layers.32.block_sparse_moe.experts.26.w2", "model.layers.32.block_sparse_moe.experts.27.w2", "model.layers.32.block_sparse_moe.experts.28.w2", "model.layers.32.block_sparse_moe.experts.29.w2", "model.layers.32.block_sparse_moe.experts.30.w2", "model.layers.32.block_sparse_moe.experts.31.w2", "model.layers.32.block_sparse_moe.experts.32.w2", "model.layers.32.block_sparse_moe.experts.33.w2", "model.layers.32.block_sparse_moe.experts.34.w2", "model.layers.32.block_sparse_moe.experts.35.w2", "model.layers.32.block_sparse_moe.experts.36.w2", "model.layers.32.block_sparse_moe.experts.37.w2", "model.layers.32.block_sparse_moe.experts.38.w2", "model.layers.32.block_sparse_moe.experts.39.w2", "model.layers.32.block_sparse_moe.experts.40.w2", "model.layers.32.block_sparse_moe.experts.41.w2", "model.layers.32.block_sparse_moe.experts.42.w2", "model.layers.32.block_sparse_moe.experts.43.w2", "model.layers.32.block_sparse_moe.experts.44.w2", "model.layers.32.block_sparse_moe.experts.45.w2", "model.layers.32.block_sparse_moe.experts.46.w2", "model.layers.32.block_sparse_moe.experts.47.w2", "model.layers.32.block_sparse_moe.experts.48.w2", "model.layers.32.block_sparse_moe.experts.49.w2", "model.layers.32.block_sparse_moe.experts.50.w2", "model.layers.32.block_sparse_moe.experts.51.w2", "model.layers.32.block_sparse_moe.experts.52.w2", "model.layers.32.block_sparse_moe.experts.53.w2", "model.layers.32.block_sparse_moe.experts.54.w2", "model.layers.32.block_sparse_moe.experts.55.w2", "model.layers.32.block_sparse_moe.experts.56.w2", "model.layers.32.block_sparse_moe.experts.57.w2", "model.layers.32.block_sparse_moe.experts.58.w2", "model.layers.32.block_sparse_moe.experts.59.w2", "model.layers.32.block_sparse_moe.experts.60.w2", "model.layers.32.block_sparse_moe.experts.61.w2", "model.layers.32.block_sparse_moe.experts.62.w2", "model.layers.32.block_sparse_moe.experts.63.w2", "model.layers.32.block_sparse_moe.experts.64.w2", "model.layers.32.block_sparse_moe.experts.65.w2", "model.layers.32.block_sparse_moe.experts.66.w2", "model.layers.32.block_sparse_moe.experts.67.w2", "model.layers.32.block_sparse_moe.experts.68.w2", "model.layers.32.block_sparse_moe.experts.69.w2", "model.layers.32.block_sparse_moe.experts.70.w2", "model.layers.32.block_sparse_moe.experts.71.w2", "model.layers.32.block_sparse_moe.experts.72.w2", "model.layers.32.block_sparse_moe.experts.73.w2", "model.layers.32.block_sparse_moe.experts.74.w2", "model.layers.32.block_sparse_moe.experts.75.w2", "model.layers.32.block_sparse_moe.experts.76.w2", "model.layers.32.block_sparse_moe.experts.77.w2", "model.layers.32.block_sparse_moe.experts.78.w2", "model.layers.32.block_sparse_moe.experts.79.w2", "model.layers.32.block_sparse_moe.experts.80.w2", "model.layers.32.block_sparse_moe.experts.81.w2", "model.layers.32.block_sparse_moe.experts.82.w2", "model.layers.32.block_sparse_moe.experts.83.w2", "model.layers.32.block_sparse_moe.experts.84.w2", "model.layers.32.block_sparse_moe.experts.85.w2", "model.layers.32.block_sparse_moe.experts.86.w2", "model.layers.32.block_sparse_moe.experts.87.w2", "model.layers.32.block_sparse_moe.experts.88.w2", "model.layers.32.block_sparse_moe.experts.89.w2", "model.layers.32.block_sparse_moe.experts.90.w2", "model.layers.32.block_sparse_moe.experts.91.w2", "model.layers.32.block_sparse_moe.experts.92.w2", "model.layers.32.block_sparse_moe.experts.93.w2", "model.layers.32.block_sparse_moe.experts.94.w2", "model.layers.32.block_sparse_moe.experts.95.w2", "model.layers.32.block_sparse_moe.experts.96.w2", "model.layers.32.block_sparse_moe.experts.97.w2", "model.layers.32.block_sparse_moe.experts.98.w2", "model.layers.32.block_sparse_moe.experts.99.w2", "model.layers.32.block_sparse_moe.experts.100.w2", "model.layers.32.block_sparse_moe.experts.101.w2", "model.layers.32.block_sparse_moe.experts.102.w2", "model.layers.32.block_sparse_moe.experts.103.w2", "model.layers.32.block_sparse_moe.experts.104.w2", "model.layers.32.block_sparse_moe.experts.105.w2", "model.layers.32.block_sparse_moe.experts.106.w2", "model.layers.32.block_sparse_moe.experts.107.w2", "model.layers.32.block_sparse_moe.experts.108.w2", "model.layers.32.block_sparse_moe.experts.109.w2", "model.layers.32.block_sparse_moe.experts.110.w2", "model.layers.32.block_sparse_moe.experts.111.w2", "model.layers.32.block_sparse_moe.experts.112.w2", "model.layers.32.block_sparse_moe.experts.113.w2", "model.layers.32.block_sparse_moe.experts.114.w2", "model.layers.32.block_sparse_moe.experts.115.w2", "model.layers.32.block_sparse_moe.experts.116.w2", "model.layers.32.block_sparse_moe.experts.117.w2", "model.layers.32.block_sparse_moe.experts.118.w2", "model.layers.32.block_sparse_moe.experts.119.w2", "model.layers.32.block_sparse_moe.experts.120.w2", "model.layers.32.block_sparse_moe.experts.121.w2", "model.layers.32.block_sparse_moe.experts.122.w2", "model.layers.32.block_sparse_moe.experts.123.w2", "model.layers.32.block_sparse_moe.experts.124.w2", "model.layers.32.block_sparse_moe.experts.125.w2", "model.layers.32.block_sparse_moe.experts.126.w2", "model.layers.32.block_sparse_moe.experts.127.w2", "model.layers.32.block_sparse_moe.experts.128.w2", "model.layers.32.block_sparse_moe.experts.129.w2", "model.layers.32.block_sparse_moe.experts.130.w2", "model.layers.32.block_sparse_moe.experts.131.w2", "model.layers.32.block_sparse_moe.experts.132.w2", "model.layers.32.block_sparse_moe.experts.133.w2", "model.layers.32.block_sparse_moe.experts.134.w2", "model.layers.32.block_sparse_moe.experts.135.w2", "model.layers.32.block_sparse_moe.experts.136.w2", "model.layers.32.block_sparse_moe.experts.137.w2", "model.layers.32.block_sparse_moe.experts.138.w2", "model.layers.32.block_sparse_moe.experts.139.w2", "model.layers.32.block_sparse_moe.experts.140.w2", "model.layers.32.block_sparse_moe.experts.141.w2", "model.layers.32.block_sparse_moe.experts.142.w2", "model.layers.32.block_sparse_moe.experts.143.w2", "model.layers.32.block_sparse_moe.experts.144.w2", "model.layers.32.block_sparse_moe.experts.145.w2", "model.layers.32.block_sparse_moe.experts.146.w2", "model.layers.32.block_sparse_moe.experts.147.w2", "model.layers.32.block_sparse_moe.experts.148.w2", "model.layers.32.block_sparse_moe.experts.149.w2", "model.layers.32.block_sparse_moe.experts.150.w2", "model.layers.32.block_sparse_moe.experts.151.w2", "model.layers.32.block_sparse_moe.experts.152.w2", "model.layers.32.block_sparse_moe.experts.153.w2", "model.layers.32.block_sparse_moe.experts.154.w2", "model.layers.32.block_sparse_moe.experts.155.w2", "model.layers.32.block_sparse_moe.experts.156.w2", "model.layers.32.block_sparse_moe.experts.157.w2", "model.layers.32.block_sparse_moe.experts.158.w2", "model.layers.32.block_sparse_moe.experts.159.w2", "model.layers.32.block_sparse_moe.experts.160.w2", "model.layers.32.block_sparse_moe.experts.161.w2", "model.layers.32.block_sparse_moe.experts.162.w2", "model.layers.32.block_sparse_moe.experts.163.w2", "model.layers.32.block_sparse_moe.experts.164.w2", "model.layers.32.block_sparse_moe.experts.165.w2", "model.layers.32.block_sparse_moe.experts.166.w2", "model.layers.32.block_sparse_moe.experts.167.w2", "model.layers.32.block_sparse_moe.experts.168.w2", "model.layers.32.block_sparse_moe.experts.169.w2", "model.layers.32.block_sparse_moe.experts.170.w2", "model.layers.32.block_sparse_moe.experts.171.w2", "model.layers.32.block_sparse_moe.experts.172.w2", "model.layers.32.block_sparse_moe.experts.173.w2", "model.layers.32.block_sparse_moe.experts.174.w2", "model.layers.32.block_sparse_moe.experts.175.w2", "model.layers.32.block_sparse_moe.experts.176.w2", "model.layers.32.block_sparse_moe.experts.177.w2", "model.layers.32.block_sparse_moe.experts.178.w2", "model.layers.32.block_sparse_moe.experts.179.w2", "model.layers.32.block_sparse_moe.experts.180.w2", "model.layers.32.block_sparse_moe.experts.181.w2", "model.layers.32.block_sparse_moe.experts.182.w2", "model.layers.32.block_sparse_moe.experts.183.w2", "model.layers.32.block_sparse_moe.experts.184.w2", "model.layers.32.block_sparse_moe.experts.185.w2", "model.layers.32.block_sparse_moe.experts.186.w2", "model.layers.32.block_sparse_moe.experts.187.w2", "model.layers.32.block_sparse_moe.experts.188.w2", "model.layers.32.block_sparse_moe.experts.189.w2", "model.layers.32.block_sparse_moe.experts.190.w2", "model.layers.32.block_sparse_moe.experts.191.w2", "model.layers.32.block_sparse_moe.experts.192.w2", "model.layers.32.block_sparse_moe.experts.193.w2", "model.layers.32.block_sparse_moe.experts.194.w2", "model.layers.32.block_sparse_moe.experts.195.w2", "model.layers.32.block_sparse_moe.experts.196.w2", "model.layers.32.block_sparse_moe.experts.197.w2", "model.layers.32.block_sparse_moe.experts.198.w2", "model.layers.32.block_sparse_moe.experts.199.w2", "model.layers.32.block_sparse_moe.experts.200.w2", "model.layers.32.block_sparse_moe.experts.201.w2", "model.layers.32.block_sparse_moe.experts.202.w2", "model.layers.32.block_sparse_moe.experts.203.w2", "model.layers.32.block_sparse_moe.experts.204.w2", "model.layers.32.block_sparse_moe.experts.205.w2", "model.layers.32.block_sparse_moe.experts.206.w2", "model.layers.32.block_sparse_moe.experts.207.w2", "model.layers.32.block_sparse_moe.experts.208.w2", "model.layers.32.block_sparse_moe.experts.209.w2", "model.layers.32.block_sparse_moe.experts.210.w2", "model.layers.32.block_sparse_moe.experts.211.w2", "model.layers.32.block_sparse_moe.experts.212.w2", "model.layers.32.block_sparse_moe.experts.213.w2", "model.layers.32.block_sparse_moe.experts.214.w2", "model.layers.32.block_sparse_moe.experts.215.w2", "model.layers.32.block_sparse_moe.experts.216.w2", "model.layers.32.block_sparse_moe.experts.217.w2", "model.layers.32.block_sparse_moe.experts.218.w2", "model.layers.32.block_sparse_moe.experts.219.w2", "model.layers.32.block_sparse_moe.experts.220.w2", "model.layers.32.block_sparse_moe.experts.221.w2", "model.layers.32.block_sparse_moe.experts.222.w2", "model.layers.32.block_sparse_moe.experts.223.w2", "model.layers.32.block_sparse_moe.experts.224.w2", "model.layers.32.block_sparse_moe.experts.225.w2", "model.layers.32.block_sparse_moe.experts.226.w2", "model.layers.32.block_sparse_moe.experts.227.w2", "model.layers.32.block_sparse_moe.experts.228.w2", "model.layers.32.block_sparse_moe.experts.229.w2", "model.layers.32.block_sparse_moe.experts.230.w2", "model.layers.32.block_sparse_moe.experts.231.w2", "model.layers.32.block_sparse_moe.experts.232.w2", "model.layers.32.block_sparse_moe.experts.233.w2", "model.layers.32.block_sparse_moe.experts.234.w2", "model.layers.32.block_sparse_moe.experts.235.w2", "model.layers.32.block_sparse_moe.experts.236.w2", "model.layers.32.block_sparse_moe.experts.237.w2", "model.layers.32.block_sparse_moe.experts.238.w2", "model.layers.32.block_sparse_moe.experts.239.w2", "model.layers.32.block_sparse_moe.experts.240.w2", "model.layers.32.block_sparse_moe.experts.241.w2", "model.layers.32.block_sparse_moe.experts.242.w2", "model.layers.32.block_sparse_moe.experts.243.w2", "model.layers.32.block_sparse_moe.experts.244.w2", "model.layers.32.block_sparse_moe.experts.245.w2", "model.layers.32.block_sparse_moe.experts.246.w2", "model.layers.32.block_sparse_moe.experts.247.w2", "model.layers.32.block_sparse_moe.experts.248.w2", "model.layers.32.block_sparse_moe.experts.249.w2", "model.layers.32.block_sparse_moe.experts.250.w2", "model.layers.32.block_sparse_moe.experts.251.w2", "model.layers.32.block_sparse_moe.experts.252.w2", "model.layers.32.block_sparse_moe.experts.253.w2", "model.layers.32.block_sparse_moe.experts.254.w2", "model.layers.32.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003402143716811912, "dbits": 3623878656 } ] }, { "idx": 66, "layers": [ "model.layers.33.self_attn.q_proj", "model.layers.33.self_attn.k_proj", "model.layers.33.self_attn.v_proj", "model.layers.33.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0039053266867995484, "dbits": 44040192 } ] }, { "idx": 67, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w1", "model.layers.33.block_sparse_moe.experts.1.w1", "model.layers.33.block_sparse_moe.experts.2.w1", "model.layers.33.block_sparse_moe.experts.3.w1", "model.layers.33.block_sparse_moe.experts.4.w1", "model.layers.33.block_sparse_moe.experts.5.w1", "model.layers.33.block_sparse_moe.experts.6.w1", "model.layers.33.block_sparse_moe.experts.7.w1", "model.layers.33.block_sparse_moe.experts.8.w1", "model.layers.33.block_sparse_moe.experts.9.w1", "model.layers.33.block_sparse_moe.experts.10.w1", "model.layers.33.block_sparse_moe.experts.11.w1", "model.layers.33.block_sparse_moe.experts.12.w1", "model.layers.33.block_sparse_moe.experts.13.w1", "model.layers.33.block_sparse_moe.experts.14.w1", "model.layers.33.block_sparse_moe.experts.15.w1", "model.layers.33.block_sparse_moe.experts.16.w1", "model.layers.33.block_sparse_moe.experts.17.w1", "model.layers.33.block_sparse_moe.experts.18.w1", "model.layers.33.block_sparse_moe.experts.19.w1", "model.layers.33.block_sparse_moe.experts.20.w1", "model.layers.33.block_sparse_moe.experts.21.w1", "model.layers.33.block_sparse_moe.experts.22.w1", "model.layers.33.block_sparse_moe.experts.23.w1", "model.layers.33.block_sparse_moe.experts.24.w1", "model.layers.33.block_sparse_moe.experts.25.w1", "model.layers.33.block_sparse_moe.experts.26.w1", "model.layers.33.block_sparse_moe.experts.27.w1", "model.layers.33.block_sparse_moe.experts.28.w1", "model.layers.33.block_sparse_moe.experts.29.w1", "model.layers.33.block_sparse_moe.experts.30.w1", "model.layers.33.block_sparse_moe.experts.31.w1", "model.layers.33.block_sparse_moe.experts.32.w1", "model.layers.33.block_sparse_moe.experts.33.w1", "model.layers.33.block_sparse_moe.experts.34.w1", "model.layers.33.block_sparse_moe.experts.35.w1", "model.layers.33.block_sparse_moe.experts.36.w1", "model.layers.33.block_sparse_moe.experts.37.w1", "model.layers.33.block_sparse_moe.experts.38.w1", "model.layers.33.block_sparse_moe.experts.39.w1", "model.layers.33.block_sparse_moe.experts.40.w1", "model.layers.33.block_sparse_moe.experts.41.w1", "model.layers.33.block_sparse_moe.experts.42.w1", "model.layers.33.block_sparse_moe.experts.43.w1", "model.layers.33.block_sparse_moe.experts.44.w1", "model.layers.33.block_sparse_moe.experts.45.w1", "model.layers.33.block_sparse_moe.experts.46.w1", "model.layers.33.block_sparse_moe.experts.47.w1", "model.layers.33.block_sparse_moe.experts.48.w1", "model.layers.33.block_sparse_moe.experts.49.w1", "model.layers.33.block_sparse_moe.experts.50.w1", "model.layers.33.block_sparse_moe.experts.51.w1", "model.layers.33.block_sparse_moe.experts.52.w1", "model.layers.33.block_sparse_moe.experts.53.w1", "model.layers.33.block_sparse_moe.experts.54.w1", "model.layers.33.block_sparse_moe.experts.55.w1", "model.layers.33.block_sparse_moe.experts.56.w1", "model.layers.33.block_sparse_moe.experts.57.w1", "model.layers.33.block_sparse_moe.experts.58.w1", "model.layers.33.block_sparse_moe.experts.59.w1", "model.layers.33.block_sparse_moe.experts.60.w1", "model.layers.33.block_sparse_moe.experts.61.w1", "model.layers.33.block_sparse_moe.experts.62.w1", "model.layers.33.block_sparse_moe.experts.63.w1", "model.layers.33.block_sparse_moe.experts.64.w1", "model.layers.33.block_sparse_moe.experts.65.w1", "model.layers.33.block_sparse_moe.experts.66.w1", "model.layers.33.block_sparse_moe.experts.67.w1", "model.layers.33.block_sparse_moe.experts.68.w1", "model.layers.33.block_sparse_moe.experts.69.w1", "model.layers.33.block_sparse_moe.experts.70.w1", "model.layers.33.block_sparse_moe.experts.71.w1", "model.layers.33.block_sparse_moe.experts.72.w1", "model.layers.33.block_sparse_moe.experts.73.w1", "model.layers.33.block_sparse_moe.experts.74.w1", "model.layers.33.block_sparse_moe.experts.75.w1", "model.layers.33.block_sparse_moe.experts.76.w1", "model.layers.33.block_sparse_moe.experts.77.w1", "model.layers.33.block_sparse_moe.experts.78.w1", "model.layers.33.block_sparse_moe.experts.79.w1", "model.layers.33.block_sparse_moe.experts.80.w1", "model.layers.33.block_sparse_moe.experts.81.w1", "model.layers.33.block_sparse_moe.experts.82.w1", "model.layers.33.block_sparse_moe.experts.83.w1", "model.layers.33.block_sparse_moe.experts.84.w1", "model.layers.33.block_sparse_moe.experts.85.w1", "model.layers.33.block_sparse_moe.experts.86.w1", "model.layers.33.block_sparse_moe.experts.87.w1", "model.layers.33.block_sparse_moe.experts.88.w1", "model.layers.33.block_sparse_moe.experts.89.w1", "model.layers.33.block_sparse_moe.experts.90.w1", "model.layers.33.block_sparse_moe.experts.91.w1", "model.layers.33.block_sparse_moe.experts.92.w1", "model.layers.33.block_sparse_moe.experts.93.w1", "model.layers.33.block_sparse_moe.experts.94.w1", "model.layers.33.block_sparse_moe.experts.95.w1", "model.layers.33.block_sparse_moe.experts.96.w1", "model.layers.33.block_sparse_moe.experts.97.w1", "model.layers.33.block_sparse_moe.experts.98.w1", "model.layers.33.block_sparse_moe.experts.99.w1", "model.layers.33.block_sparse_moe.experts.100.w1", "model.layers.33.block_sparse_moe.experts.101.w1", "model.layers.33.block_sparse_moe.experts.102.w1", "model.layers.33.block_sparse_moe.experts.103.w1", "model.layers.33.block_sparse_moe.experts.104.w1", "model.layers.33.block_sparse_moe.experts.105.w1", "model.layers.33.block_sparse_moe.experts.106.w1", "model.layers.33.block_sparse_moe.experts.107.w1", "model.layers.33.block_sparse_moe.experts.108.w1", "model.layers.33.block_sparse_moe.experts.109.w1", "model.layers.33.block_sparse_moe.experts.110.w1", "model.layers.33.block_sparse_moe.experts.111.w1", "model.layers.33.block_sparse_moe.experts.112.w1", "model.layers.33.block_sparse_moe.experts.113.w1", "model.layers.33.block_sparse_moe.experts.114.w1", "model.layers.33.block_sparse_moe.experts.115.w1", "model.layers.33.block_sparse_moe.experts.116.w1", "model.layers.33.block_sparse_moe.experts.117.w1", "model.layers.33.block_sparse_moe.experts.118.w1", "model.layers.33.block_sparse_moe.experts.119.w1", "model.layers.33.block_sparse_moe.experts.120.w1", "model.layers.33.block_sparse_moe.experts.121.w1", "model.layers.33.block_sparse_moe.experts.122.w1", "model.layers.33.block_sparse_moe.experts.123.w1", "model.layers.33.block_sparse_moe.experts.124.w1", "model.layers.33.block_sparse_moe.experts.125.w1", "model.layers.33.block_sparse_moe.experts.126.w1", "model.layers.33.block_sparse_moe.experts.127.w1", "model.layers.33.block_sparse_moe.experts.128.w1", "model.layers.33.block_sparse_moe.experts.129.w1", "model.layers.33.block_sparse_moe.experts.130.w1", "model.layers.33.block_sparse_moe.experts.131.w1", "model.layers.33.block_sparse_moe.experts.132.w1", "model.layers.33.block_sparse_moe.experts.133.w1", "model.layers.33.block_sparse_moe.experts.134.w1", "model.layers.33.block_sparse_moe.experts.135.w1", "model.layers.33.block_sparse_moe.experts.136.w1", "model.layers.33.block_sparse_moe.experts.137.w1", "model.layers.33.block_sparse_moe.experts.138.w1", "model.layers.33.block_sparse_moe.experts.139.w1", "model.layers.33.block_sparse_moe.experts.140.w1", "model.layers.33.block_sparse_moe.experts.141.w1", "model.layers.33.block_sparse_moe.experts.142.w1", "model.layers.33.block_sparse_moe.experts.143.w1", "model.layers.33.block_sparse_moe.experts.144.w1", "model.layers.33.block_sparse_moe.experts.145.w1", "model.layers.33.block_sparse_moe.experts.146.w1", "model.layers.33.block_sparse_moe.experts.147.w1", "model.layers.33.block_sparse_moe.experts.148.w1", "model.layers.33.block_sparse_moe.experts.149.w1", "model.layers.33.block_sparse_moe.experts.150.w1", "model.layers.33.block_sparse_moe.experts.151.w1", "model.layers.33.block_sparse_moe.experts.152.w1", "model.layers.33.block_sparse_moe.experts.153.w1", "model.layers.33.block_sparse_moe.experts.154.w1", "model.layers.33.block_sparse_moe.experts.155.w1", "model.layers.33.block_sparse_moe.experts.156.w1", "model.layers.33.block_sparse_moe.experts.157.w1", "model.layers.33.block_sparse_moe.experts.158.w1", "model.layers.33.block_sparse_moe.experts.159.w1", "model.layers.33.block_sparse_moe.experts.160.w1", "model.layers.33.block_sparse_moe.experts.161.w1", "model.layers.33.block_sparse_moe.experts.162.w1", "model.layers.33.block_sparse_moe.experts.163.w1", "model.layers.33.block_sparse_moe.experts.164.w1", "model.layers.33.block_sparse_moe.experts.165.w1", "model.layers.33.block_sparse_moe.experts.166.w1", "model.layers.33.block_sparse_moe.experts.167.w1", "model.layers.33.block_sparse_moe.experts.168.w1", "model.layers.33.block_sparse_moe.experts.169.w1", "model.layers.33.block_sparse_moe.experts.170.w1", "model.layers.33.block_sparse_moe.experts.171.w1", "model.layers.33.block_sparse_moe.experts.172.w1", "model.layers.33.block_sparse_moe.experts.173.w1", "model.layers.33.block_sparse_moe.experts.174.w1", "model.layers.33.block_sparse_moe.experts.175.w1", "model.layers.33.block_sparse_moe.experts.176.w1", "model.layers.33.block_sparse_moe.experts.177.w1", "model.layers.33.block_sparse_moe.experts.178.w1", "model.layers.33.block_sparse_moe.experts.179.w1", "model.layers.33.block_sparse_moe.experts.180.w1", "model.layers.33.block_sparse_moe.experts.181.w1", "model.layers.33.block_sparse_moe.experts.182.w1", "model.layers.33.block_sparse_moe.experts.183.w1", "model.layers.33.block_sparse_moe.experts.184.w1", "model.layers.33.block_sparse_moe.experts.185.w1", "model.layers.33.block_sparse_moe.experts.186.w1", "model.layers.33.block_sparse_moe.experts.187.w1", "model.layers.33.block_sparse_moe.experts.188.w1", "model.layers.33.block_sparse_moe.experts.189.w1", "model.layers.33.block_sparse_moe.experts.190.w1", "model.layers.33.block_sparse_moe.experts.191.w1", "model.layers.33.block_sparse_moe.experts.192.w1", "model.layers.33.block_sparse_moe.experts.193.w1", "model.layers.33.block_sparse_moe.experts.194.w1", "model.layers.33.block_sparse_moe.experts.195.w1", "model.layers.33.block_sparse_moe.experts.196.w1", "model.layers.33.block_sparse_moe.experts.197.w1", "model.layers.33.block_sparse_moe.experts.198.w1", "model.layers.33.block_sparse_moe.experts.199.w1", "model.layers.33.block_sparse_moe.experts.200.w1", "model.layers.33.block_sparse_moe.experts.201.w1", "model.layers.33.block_sparse_moe.experts.202.w1", "model.layers.33.block_sparse_moe.experts.203.w1", "model.layers.33.block_sparse_moe.experts.204.w1", "model.layers.33.block_sparse_moe.experts.205.w1", "model.layers.33.block_sparse_moe.experts.206.w1", "model.layers.33.block_sparse_moe.experts.207.w1", "model.layers.33.block_sparse_moe.experts.208.w1", "model.layers.33.block_sparse_moe.experts.209.w1", "model.layers.33.block_sparse_moe.experts.210.w1", "model.layers.33.block_sparse_moe.experts.211.w1", "model.layers.33.block_sparse_moe.experts.212.w1", "model.layers.33.block_sparse_moe.experts.213.w1", "model.layers.33.block_sparse_moe.experts.214.w1", "model.layers.33.block_sparse_moe.experts.215.w1", "model.layers.33.block_sparse_moe.experts.216.w1", "model.layers.33.block_sparse_moe.experts.217.w1", "model.layers.33.block_sparse_moe.experts.218.w1", "model.layers.33.block_sparse_moe.experts.219.w1", "model.layers.33.block_sparse_moe.experts.220.w1", "model.layers.33.block_sparse_moe.experts.221.w1", "model.layers.33.block_sparse_moe.experts.222.w1", "model.layers.33.block_sparse_moe.experts.223.w1", "model.layers.33.block_sparse_moe.experts.224.w1", "model.layers.33.block_sparse_moe.experts.225.w1", "model.layers.33.block_sparse_moe.experts.226.w1", "model.layers.33.block_sparse_moe.experts.227.w1", "model.layers.33.block_sparse_moe.experts.228.w1", "model.layers.33.block_sparse_moe.experts.229.w1", "model.layers.33.block_sparse_moe.experts.230.w1", "model.layers.33.block_sparse_moe.experts.231.w1", "model.layers.33.block_sparse_moe.experts.232.w1", "model.layers.33.block_sparse_moe.experts.233.w1", "model.layers.33.block_sparse_moe.experts.234.w1", "model.layers.33.block_sparse_moe.experts.235.w1", "model.layers.33.block_sparse_moe.experts.236.w1", "model.layers.33.block_sparse_moe.experts.237.w1", "model.layers.33.block_sparse_moe.experts.238.w1", "model.layers.33.block_sparse_moe.experts.239.w1", "model.layers.33.block_sparse_moe.experts.240.w1", "model.layers.33.block_sparse_moe.experts.241.w1", "model.layers.33.block_sparse_moe.experts.242.w1", "model.layers.33.block_sparse_moe.experts.243.w1", "model.layers.33.block_sparse_moe.experts.244.w1", "model.layers.33.block_sparse_moe.experts.245.w1", "model.layers.33.block_sparse_moe.experts.246.w1", "model.layers.33.block_sparse_moe.experts.247.w1", "model.layers.33.block_sparse_moe.experts.248.w1", "model.layers.33.block_sparse_moe.experts.249.w1", "model.layers.33.block_sparse_moe.experts.250.w1", "model.layers.33.block_sparse_moe.experts.251.w1", "model.layers.33.block_sparse_moe.experts.252.w1", "model.layers.33.block_sparse_moe.experts.253.w1", "model.layers.33.block_sparse_moe.experts.254.w1", "model.layers.33.block_sparse_moe.experts.255.w1", "model.layers.33.block_sparse_moe.experts.0.w3", "model.layers.33.block_sparse_moe.experts.1.w3", "model.layers.33.block_sparse_moe.experts.2.w3", "model.layers.33.block_sparse_moe.experts.3.w3", "model.layers.33.block_sparse_moe.experts.4.w3", "model.layers.33.block_sparse_moe.experts.5.w3", "model.layers.33.block_sparse_moe.experts.6.w3", "model.layers.33.block_sparse_moe.experts.7.w3", "model.layers.33.block_sparse_moe.experts.8.w3", "model.layers.33.block_sparse_moe.experts.9.w3", "model.layers.33.block_sparse_moe.experts.10.w3", "model.layers.33.block_sparse_moe.experts.11.w3", "model.layers.33.block_sparse_moe.experts.12.w3", "model.layers.33.block_sparse_moe.experts.13.w3", "model.layers.33.block_sparse_moe.experts.14.w3", "model.layers.33.block_sparse_moe.experts.15.w3", "model.layers.33.block_sparse_moe.experts.16.w3", "model.layers.33.block_sparse_moe.experts.17.w3", "model.layers.33.block_sparse_moe.experts.18.w3", "model.layers.33.block_sparse_moe.experts.19.w3", "model.layers.33.block_sparse_moe.experts.20.w3", "model.layers.33.block_sparse_moe.experts.21.w3", "model.layers.33.block_sparse_moe.experts.22.w3", "model.layers.33.block_sparse_moe.experts.23.w3", "model.layers.33.block_sparse_moe.experts.24.w3", "model.layers.33.block_sparse_moe.experts.25.w3", "model.layers.33.block_sparse_moe.experts.26.w3", "model.layers.33.block_sparse_moe.experts.27.w3", "model.layers.33.block_sparse_moe.experts.28.w3", "model.layers.33.block_sparse_moe.experts.29.w3", "model.layers.33.block_sparse_moe.experts.30.w3", "model.layers.33.block_sparse_moe.experts.31.w3", "model.layers.33.block_sparse_moe.experts.32.w3", "model.layers.33.block_sparse_moe.experts.33.w3", "model.layers.33.block_sparse_moe.experts.34.w3", "model.layers.33.block_sparse_moe.experts.35.w3", "model.layers.33.block_sparse_moe.experts.36.w3", "model.layers.33.block_sparse_moe.experts.37.w3", "model.layers.33.block_sparse_moe.experts.38.w3", "model.layers.33.block_sparse_moe.experts.39.w3", "model.layers.33.block_sparse_moe.experts.40.w3", "model.layers.33.block_sparse_moe.experts.41.w3", "model.layers.33.block_sparse_moe.experts.42.w3", "model.layers.33.block_sparse_moe.experts.43.w3", "model.layers.33.block_sparse_moe.experts.44.w3", "model.layers.33.block_sparse_moe.experts.45.w3", "model.layers.33.block_sparse_moe.experts.46.w3", "model.layers.33.block_sparse_moe.experts.47.w3", "model.layers.33.block_sparse_moe.experts.48.w3", "model.layers.33.block_sparse_moe.experts.49.w3", "model.layers.33.block_sparse_moe.experts.50.w3", "model.layers.33.block_sparse_moe.experts.51.w3", "model.layers.33.block_sparse_moe.experts.52.w3", "model.layers.33.block_sparse_moe.experts.53.w3", "model.layers.33.block_sparse_moe.experts.54.w3", "model.layers.33.block_sparse_moe.experts.55.w3", "model.layers.33.block_sparse_moe.experts.56.w3", "model.layers.33.block_sparse_moe.experts.57.w3", "model.layers.33.block_sparse_moe.experts.58.w3", "model.layers.33.block_sparse_moe.experts.59.w3", "model.layers.33.block_sparse_moe.experts.60.w3", "model.layers.33.block_sparse_moe.experts.61.w3", "model.layers.33.block_sparse_moe.experts.62.w3", "model.layers.33.block_sparse_moe.experts.63.w3", "model.layers.33.block_sparse_moe.experts.64.w3", "model.layers.33.block_sparse_moe.experts.65.w3", "model.layers.33.block_sparse_moe.experts.66.w3", "model.layers.33.block_sparse_moe.experts.67.w3", "model.layers.33.block_sparse_moe.experts.68.w3", "model.layers.33.block_sparse_moe.experts.69.w3", "model.layers.33.block_sparse_moe.experts.70.w3", "model.layers.33.block_sparse_moe.experts.71.w3", "model.layers.33.block_sparse_moe.experts.72.w3", "model.layers.33.block_sparse_moe.experts.73.w3", "model.layers.33.block_sparse_moe.experts.74.w3", "model.layers.33.block_sparse_moe.experts.75.w3", "model.layers.33.block_sparse_moe.experts.76.w3", "model.layers.33.block_sparse_moe.experts.77.w3", "model.layers.33.block_sparse_moe.experts.78.w3", "model.layers.33.block_sparse_moe.experts.79.w3", "model.layers.33.block_sparse_moe.experts.80.w3", "model.layers.33.block_sparse_moe.experts.81.w3", "model.layers.33.block_sparse_moe.experts.82.w3", "model.layers.33.block_sparse_moe.experts.83.w3", "model.layers.33.block_sparse_moe.experts.84.w3", "model.layers.33.block_sparse_moe.experts.85.w3", "model.layers.33.block_sparse_moe.experts.86.w3", "model.layers.33.block_sparse_moe.experts.87.w3", "model.layers.33.block_sparse_moe.experts.88.w3", "model.layers.33.block_sparse_moe.experts.89.w3", "model.layers.33.block_sparse_moe.experts.90.w3", "model.layers.33.block_sparse_moe.experts.91.w3", "model.layers.33.block_sparse_moe.experts.92.w3", "model.layers.33.block_sparse_moe.experts.93.w3", "model.layers.33.block_sparse_moe.experts.94.w3", "model.layers.33.block_sparse_moe.experts.95.w3", "model.layers.33.block_sparse_moe.experts.96.w3", "model.layers.33.block_sparse_moe.experts.97.w3", "model.layers.33.block_sparse_moe.experts.98.w3", "model.layers.33.block_sparse_moe.experts.99.w3", "model.layers.33.block_sparse_moe.experts.100.w3", "model.layers.33.block_sparse_moe.experts.101.w3", "model.layers.33.block_sparse_moe.experts.102.w3", "model.layers.33.block_sparse_moe.experts.103.w3", "model.layers.33.block_sparse_moe.experts.104.w3", "model.layers.33.block_sparse_moe.experts.105.w3", "model.layers.33.block_sparse_moe.experts.106.w3", "model.layers.33.block_sparse_moe.experts.107.w3", "model.layers.33.block_sparse_moe.experts.108.w3", "model.layers.33.block_sparse_moe.experts.109.w3", "model.layers.33.block_sparse_moe.experts.110.w3", "model.layers.33.block_sparse_moe.experts.111.w3", "model.layers.33.block_sparse_moe.experts.112.w3", "model.layers.33.block_sparse_moe.experts.113.w3", "model.layers.33.block_sparse_moe.experts.114.w3", "model.layers.33.block_sparse_moe.experts.115.w3", "model.layers.33.block_sparse_moe.experts.116.w3", "model.layers.33.block_sparse_moe.experts.117.w3", "model.layers.33.block_sparse_moe.experts.118.w3", "model.layers.33.block_sparse_moe.experts.119.w3", "model.layers.33.block_sparse_moe.experts.120.w3", "model.layers.33.block_sparse_moe.experts.121.w3", "model.layers.33.block_sparse_moe.experts.122.w3", "model.layers.33.block_sparse_moe.experts.123.w3", "model.layers.33.block_sparse_moe.experts.124.w3", "model.layers.33.block_sparse_moe.experts.125.w3", "model.layers.33.block_sparse_moe.experts.126.w3", "model.layers.33.block_sparse_moe.experts.127.w3", "model.layers.33.block_sparse_moe.experts.128.w3", "model.layers.33.block_sparse_moe.experts.129.w3", "model.layers.33.block_sparse_moe.experts.130.w3", "model.layers.33.block_sparse_moe.experts.131.w3", "model.layers.33.block_sparse_moe.experts.132.w3", "model.layers.33.block_sparse_moe.experts.133.w3", "model.layers.33.block_sparse_moe.experts.134.w3", "model.layers.33.block_sparse_moe.experts.135.w3", "model.layers.33.block_sparse_moe.experts.136.w3", "model.layers.33.block_sparse_moe.experts.137.w3", "model.layers.33.block_sparse_moe.experts.138.w3", "model.layers.33.block_sparse_moe.experts.139.w3", "model.layers.33.block_sparse_moe.experts.140.w3", "model.layers.33.block_sparse_moe.experts.141.w3", "model.layers.33.block_sparse_moe.experts.142.w3", "model.layers.33.block_sparse_moe.experts.143.w3", "model.layers.33.block_sparse_moe.experts.144.w3", "model.layers.33.block_sparse_moe.experts.145.w3", "model.layers.33.block_sparse_moe.experts.146.w3", "model.layers.33.block_sparse_moe.experts.147.w3", "model.layers.33.block_sparse_moe.experts.148.w3", "model.layers.33.block_sparse_moe.experts.149.w3", "model.layers.33.block_sparse_moe.experts.150.w3", "model.layers.33.block_sparse_moe.experts.151.w3", "model.layers.33.block_sparse_moe.experts.152.w3", "model.layers.33.block_sparse_moe.experts.153.w3", "model.layers.33.block_sparse_moe.experts.154.w3", "model.layers.33.block_sparse_moe.experts.155.w3", "model.layers.33.block_sparse_moe.experts.156.w3", "model.layers.33.block_sparse_moe.experts.157.w3", "model.layers.33.block_sparse_moe.experts.158.w3", "model.layers.33.block_sparse_moe.experts.159.w3", "model.layers.33.block_sparse_moe.experts.160.w3", "model.layers.33.block_sparse_moe.experts.161.w3", "model.layers.33.block_sparse_moe.experts.162.w3", "model.layers.33.block_sparse_moe.experts.163.w3", "model.layers.33.block_sparse_moe.experts.164.w3", "model.layers.33.block_sparse_moe.experts.165.w3", "model.layers.33.block_sparse_moe.experts.166.w3", "model.layers.33.block_sparse_moe.experts.167.w3", "model.layers.33.block_sparse_moe.experts.168.w3", "model.layers.33.block_sparse_moe.experts.169.w3", "model.layers.33.block_sparse_moe.experts.170.w3", "model.layers.33.block_sparse_moe.experts.171.w3", "model.layers.33.block_sparse_moe.experts.172.w3", "model.layers.33.block_sparse_moe.experts.173.w3", "model.layers.33.block_sparse_moe.experts.174.w3", "model.layers.33.block_sparse_moe.experts.175.w3", "model.layers.33.block_sparse_moe.experts.176.w3", "model.layers.33.block_sparse_moe.experts.177.w3", "model.layers.33.block_sparse_moe.experts.178.w3", "model.layers.33.block_sparse_moe.experts.179.w3", "model.layers.33.block_sparse_moe.experts.180.w3", "model.layers.33.block_sparse_moe.experts.181.w3", "model.layers.33.block_sparse_moe.experts.182.w3", "model.layers.33.block_sparse_moe.experts.183.w3", "model.layers.33.block_sparse_moe.experts.184.w3", "model.layers.33.block_sparse_moe.experts.185.w3", "model.layers.33.block_sparse_moe.experts.186.w3", "model.layers.33.block_sparse_moe.experts.187.w3", "model.layers.33.block_sparse_moe.experts.188.w3", "model.layers.33.block_sparse_moe.experts.189.w3", "model.layers.33.block_sparse_moe.experts.190.w3", "model.layers.33.block_sparse_moe.experts.191.w3", "model.layers.33.block_sparse_moe.experts.192.w3", "model.layers.33.block_sparse_moe.experts.193.w3", "model.layers.33.block_sparse_moe.experts.194.w3", "model.layers.33.block_sparse_moe.experts.195.w3", "model.layers.33.block_sparse_moe.experts.196.w3", "model.layers.33.block_sparse_moe.experts.197.w3", "model.layers.33.block_sparse_moe.experts.198.w3", "model.layers.33.block_sparse_moe.experts.199.w3", "model.layers.33.block_sparse_moe.experts.200.w3", "model.layers.33.block_sparse_moe.experts.201.w3", "model.layers.33.block_sparse_moe.experts.202.w3", "model.layers.33.block_sparse_moe.experts.203.w3", "model.layers.33.block_sparse_moe.experts.204.w3", "model.layers.33.block_sparse_moe.experts.205.w3", "model.layers.33.block_sparse_moe.experts.206.w3", "model.layers.33.block_sparse_moe.experts.207.w3", "model.layers.33.block_sparse_moe.experts.208.w3", "model.layers.33.block_sparse_moe.experts.209.w3", "model.layers.33.block_sparse_moe.experts.210.w3", "model.layers.33.block_sparse_moe.experts.211.w3", "model.layers.33.block_sparse_moe.experts.212.w3", "model.layers.33.block_sparse_moe.experts.213.w3", "model.layers.33.block_sparse_moe.experts.214.w3", "model.layers.33.block_sparse_moe.experts.215.w3", "model.layers.33.block_sparse_moe.experts.216.w3", "model.layers.33.block_sparse_moe.experts.217.w3", "model.layers.33.block_sparse_moe.experts.218.w3", "model.layers.33.block_sparse_moe.experts.219.w3", "model.layers.33.block_sparse_moe.experts.220.w3", "model.layers.33.block_sparse_moe.experts.221.w3", "model.layers.33.block_sparse_moe.experts.222.w3", "model.layers.33.block_sparse_moe.experts.223.w3", "model.layers.33.block_sparse_moe.experts.224.w3", "model.layers.33.block_sparse_moe.experts.225.w3", "model.layers.33.block_sparse_moe.experts.226.w3", "model.layers.33.block_sparse_moe.experts.227.w3", "model.layers.33.block_sparse_moe.experts.228.w3", "model.layers.33.block_sparse_moe.experts.229.w3", "model.layers.33.block_sparse_moe.experts.230.w3", "model.layers.33.block_sparse_moe.experts.231.w3", "model.layers.33.block_sparse_moe.experts.232.w3", "model.layers.33.block_sparse_moe.experts.233.w3", "model.layers.33.block_sparse_moe.experts.234.w3", "model.layers.33.block_sparse_moe.experts.235.w3", "model.layers.33.block_sparse_moe.experts.236.w3", "model.layers.33.block_sparse_moe.experts.237.w3", "model.layers.33.block_sparse_moe.experts.238.w3", "model.layers.33.block_sparse_moe.experts.239.w3", "model.layers.33.block_sparse_moe.experts.240.w3", "model.layers.33.block_sparse_moe.experts.241.w3", "model.layers.33.block_sparse_moe.experts.242.w3", "model.layers.33.block_sparse_moe.experts.243.w3", "model.layers.33.block_sparse_moe.experts.244.w3", "model.layers.33.block_sparse_moe.experts.245.w3", "model.layers.33.block_sparse_moe.experts.246.w3", "model.layers.33.block_sparse_moe.experts.247.w3", "model.layers.33.block_sparse_moe.experts.248.w3", "model.layers.33.block_sparse_moe.experts.249.w3", "model.layers.33.block_sparse_moe.experts.250.w3", "model.layers.33.block_sparse_moe.experts.251.w3", "model.layers.33.block_sparse_moe.experts.252.w3", "model.layers.33.block_sparse_moe.experts.253.w3", "model.layers.33.block_sparse_moe.experts.254.w3", "model.layers.33.block_sparse_moe.experts.255.w3", "model.layers.33.block_sparse_moe.experts.0.w2", "model.layers.33.block_sparse_moe.experts.1.w2", "model.layers.33.block_sparse_moe.experts.2.w2", "model.layers.33.block_sparse_moe.experts.3.w2", "model.layers.33.block_sparse_moe.experts.4.w2", "model.layers.33.block_sparse_moe.experts.5.w2", "model.layers.33.block_sparse_moe.experts.6.w2", "model.layers.33.block_sparse_moe.experts.7.w2", "model.layers.33.block_sparse_moe.experts.8.w2", "model.layers.33.block_sparse_moe.experts.9.w2", "model.layers.33.block_sparse_moe.experts.10.w2", "model.layers.33.block_sparse_moe.experts.11.w2", "model.layers.33.block_sparse_moe.experts.12.w2", "model.layers.33.block_sparse_moe.experts.13.w2", "model.layers.33.block_sparse_moe.experts.14.w2", "model.layers.33.block_sparse_moe.experts.15.w2", "model.layers.33.block_sparse_moe.experts.16.w2", "model.layers.33.block_sparse_moe.experts.17.w2", "model.layers.33.block_sparse_moe.experts.18.w2", "model.layers.33.block_sparse_moe.experts.19.w2", "model.layers.33.block_sparse_moe.experts.20.w2", "model.layers.33.block_sparse_moe.experts.21.w2", "model.layers.33.block_sparse_moe.experts.22.w2", "model.layers.33.block_sparse_moe.experts.23.w2", "model.layers.33.block_sparse_moe.experts.24.w2", "model.layers.33.block_sparse_moe.experts.25.w2", "model.layers.33.block_sparse_moe.experts.26.w2", "model.layers.33.block_sparse_moe.experts.27.w2", "model.layers.33.block_sparse_moe.experts.28.w2", "model.layers.33.block_sparse_moe.experts.29.w2", "model.layers.33.block_sparse_moe.experts.30.w2", "model.layers.33.block_sparse_moe.experts.31.w2", "model.layers.33.block_sparse_moe.experts.32.w2", "model.layers.33.block_sparse_moe.experts.33.w2", "model.layers.33.block_sparse_moe.experts.34.w2", "model.layers.33.block_sparse_moe.experts.35.w2", "model.layers.33.block_sparse_moe.experts.36.w2", "model.layers.33.block_sparse_moe.experts.37.w2", "model.layers.33.block_sparse_moe.experts.38.w2", "model.layers.33.block_sparse_moe.experts.39.w2", "model.layers.33.block_sparse_moe.experts.40.w2", "model.layers.33.block_sparse_moe.experts.41.w2", "model.layers.33.block_sparse_moe.experts.42.w2", "model.layers.33.block_sparse_moe.experts.43.w2", "model.layers.33.block_sparse_moe.experts.44.w2", "model.layers.33.block_sparse_moe.experts.45.w2", "model.layers.33.block_sparse_moe.experts.46.w2", "model.layers.33.block_sparse_moe.experts.47.w2", "model.layers.33.block_sparse_moe.experts.48.w2", "model.layers.33.block_sparse_moe.experts.49.w2", "model.layers.33.block_sparse_moe.experts.50.w2", "model.layers.33.block_sparse_moe.experts.51.w2", "model.layers.33.block_sparse_moe.experts.52.w2", "model.layers.33.block_sparse_moe.experts.53.w2", "model.layers.33.block_sparse_moe.experts.54.w2", "model.layers.33.block_sparse_moe.experts.55.w2", "model.layers.33.block_sparse_moe.experts.56.w2", "model.layers.33.block_sparse_moe.experts.57.w2", "model.layers.33.block_sparse_moe.experts.58.w2", "model.layers.33.block_sparse_moe.experts.59.w2", "model.layers.33.block_sparse_moe.experts.60.w2", "model.layers.33.block_sparse_moe.experts.61.w2", "model.layers.33.block_sparse_moe.experts.62.w2", "model.layers.33.block_sparse_moe.experts.63.w2", "model.layers.33.block_sparse_moe.experts.64.w2", "model.layers.33.block_sparse_moe.experts.65.w2", "model.layers.33.block_sparse_moe.experts.66.w2", "model.layers.33.block_sparse_moe.experts.67.w2", "model.layers.33.block_sparse_moe.experts.68.w2", "model.layers.33.block_sparse_moe.experts.69.w2", "model.layers.33.block_sparse_moe.experts.70.w2", "model.layers.33.block_sparse_moe.experts.71.w2", "model.layers.33.block_sparse_moe.experts.72.w2", "model.layers.33.block_sparse_moe.experts.73.w2", "model.layers.33.block_sparse_moe.experts.74.w2", "model.layers.33.block_sparse_moe.experts.75.w2", "model.layers.33.block_sparse_moe.experts.76.w2", "model.layers.33.block_sparse_moe.experts.77.w2", "model.layers.33.block_sparse_moe.experts.78.w2", "model.layers.33.block_sparse_moe.experts.79.w2", "model.layers.33.block_sparse_moe.experts.80.w2", "model.layers.33.block_sparse_moe.experts.81.w2", "model.layers.33.block_sparse_moe.experts.82.w2", "model.layers.33.block_sparse_moe.experts.83.w2", "model.layers.33.block_sparse_moe.experts.84.w2", "model.layers.33.block_sparse_moe.experts.85.w2", "model.layers.33.block_sparse_moe.experts.86.w2", "model.layers.33.block_sparse_moe.experts.87.w2", "model.layers.33.block_sparse_moe.experts.88.w2", "model.layers.33.block_sparse_moe.experts.89.w2", "model.layers.33.block_sparse_moe.experts.90.w2", "model.layers.33.block_sparse_moe.experts.91.w2", "model.layers.33.block_sparse_moe.experts.92.w2", "model.layers.33.block_sparse_moe.experts.93.w2", "model.layers.33.block_sparse_moe.experts.94.w2", "model.layers.33.block_sparse_moe.experts.95.w2", "model.layers.33.block_sparse_moe.experts.96.w2", "model.layers.33.block_sparse_moe.experts.97.w2", "model.layers.33.block_sparse_moe.experts.98.w2", "model.layers.33.block_sparse_moe.experts.99.w2", "model.layers.33.block_sparse_moe.experts.100.w2", "model.layers.33.block_sparse_moe.experts.101.w2", "model.layers.33.block_sparse_moe.experts.102.w2", "model.layers.33.block_sparse_moe.experts.103.w2", "model.layers.33.block_sparse_moe.experts.104.w2", "model.layers.33.block_sparse_moe.experts.105.w2", "model.layers.33.block_sparse_moe.experts.106.w2", "model.layers.33.block_sparse_moe.experts.107.w2", "model.layers.33.block_sparse_moe.experts.108.w2", "model.layers.33.block_sparse_moe.experts.109.w2", "model.layers.33.block_sparse_moe.experts.110.w2", "model.layers.33.block_sparse_moe.experts.111.w2", "model.layers.33.block_sparse_moe.experts.112.w2", "model.layers.33.block_sparse_moe.experts.113.w2", "model.layers.33.block_sparse_moe.experts.114.w2", "model.layers.33.block_sparse_moe.experts.115.w2", "model.layers.33.block_sparse_moe.experts.116.w2", "model.layers.33.block_sparse_moe.experts.117.w2", "model.layers.33.block_sparse_moe.experts.118.w2", "model.layers.33.block_sparse_moe.experts.119.w2", "model.layers.33.block_sparse_moe.experts.120.w2", "model.layers.33.block_sparse_moe.experts.121.w2", "model.layers.33.block_sparse_moe.experts.122.w2", "model.layers.33.block_sparse_moe.experts.123.w2", "model.layers.33.block_sparse_moe.experts.124.w2", "model.layers.33.block_sparse_moe.experts.125.w2", "model.layers.33.block_sparse_moe.experts.126.w2", "model.layers.33.block_sparse_moe.experts.127.w2", "model.layers.33.block_sparse_moe.experts.128.w2", "model.layers.33.block_sparse_moe.experts.129.w2", "model.layers.33.block_sparse_moe.experts.130.w2", "model.layers.33.block_sparse_moe.experts.131.w2", "model.layers.33.block_sparse_moe.experts.132.w2", "model.layers.33.block_sparse_moe.experts.133.w2", "model.layers.33.block_sparse_moe.experts.134.w2", "model.layers.33.block_sparse_moe.experts.135.w2", "model.layers.33.block_sparse_moe.experts.136.w2", "model.layers.33.block_sparse_moe.experts.137.w2", "model.layers.33.block_sparse_moe.experts.138.w2", "model.layers.33.block_sparse_moe.experts.139.w2", "model.layers.33.block_sparse_moe.experts.140.w2", "model.layers.33.block_sparse_moe.experts.141.w2", "model.layers.33.block_sparse_moe.experts.142.w2", "model.layers.33.block_sparse_moe.experts.143.w2", "model.layers.33.block_sparse_moe.experts.144.w2", "model.layers.33.block_sparse_moe.experts.145.w2", "model.layers.33.block_sparse_moe.experts.146.w2", "model.layers.33.block_sparse_moe.experts.147.w2", "model.layers.33.block_sparse_moe.experts.148.w2", "model.layers.33.block_sparse_moe.experts.149.w2", "model.layers.33.block_sparse_moe.experts.150.w2", "model.layers.33.block_sparse_moe.experts.151.w2", "model.layers.33.block_sparse_moe.experts.152.w2", "model.layers.33.block_sparse_moe.experts.153.w2", "model.layers.33.block_sparse_moe.experts.154.w2", "model.layers.33.block_sparse_moe.experts.155.w2", "model.layers.33.block_sparse_moe.experts.156.w2", "model.layers.33.block_sparse_moe.experts.157.w2", "model.layers.33.block_sparse_moe.experts.158.w2", "model.layers.33.block_sparse_moe.experts.159.w2", "model.layers.33.block_sparse_moe.experts.160.w2", "model.layers.33.block_sparse_moe.experts.161.w2", "model.layers.33.block_sparse_moe.experts.162.w2", "model.layers.33.block_sparse_moe.experts.163.w2", "model.layers.33.block_sparse_moe.experts.164.w2", "model.layers.33.block_sparse_moe.experts.165.w2", "model.layers.33.block_sparse_moe.experts.166.w2", "model.layers.33.block_sparse_moe.experts.167.w2", "model.layers.33.block_sparse_moe.experts.168.w2", "model.layers.33.block_sparse_moe.experts.169.w2", "model.layers.33.block_sparse_moe.experts.170.w2", "model.layers.33.block_sparse_moe.experts.171.w2", "model.layers.33.block_sparse_moe.experts.172.w2", "model.layers.33.block_sparse_moe.experts.173.w2", "model.layers.33.block_sparse_moe.experts.174.w2", "model.layers.33.block_sparse_moe.experts.175.w2", "model.layers.33.block_sparse_moe.experts.176.w2", "model.layers.33.block_sparse_moe.experts.177.w2", "model.layers.33.block_sparse_moe.experts.178.w2", "model.layers.33.block_sparse_moe.experts.179.w2", "model.layers.33.block_sparse_moe.experts.180.w2", "model.layers.33.block_sparse_moe.experts.181.w2", "model.layers.33.block_sparse_moe.experts.182.w2", "model.layers.33.block_sparse_moe.experts.183.w2", "model.layers.33.block_sparse_moe.experts.184.w2", "model.layers.33.block_sparse_moe.experts.185.w2", "model.layers.33.block_sparse_moe.experts.186.w2", "model.layers.33.block_sparse_moe.experts.187.w2", "model.layers.33.block_sparse_moe.experts.188.w2", "model.layers.33.block_sparse_moe.experts.189.w2", "model.layers.33.block_sparse_moe.experts.190.w2", "model.layers.33.block_sparse_moe.experts.191.w2", "model.layers.33.block_sparse_moe.experts.192.w2", "model.layers.33.block_sparse_moe.experts.193.w2", "model.layers.33.block_sparse_moe.experts.194.w2", "model.layers.33.block_sparse_moe.experts.195.w2", "model.layers.33.block_sparse_moe.experts.196.w2", "model.layers.33.block_sparse_moe.experts.197.w2", "model.layers.33.block_sparse_moe.experts.198.w2", "model.layers.33.block_sparse_moe.experts.199.w2", "model.layers.33.block_sparse_moe.experts.200.w2", "model.layers.33.block_sparse_moe.experts.201.w2", "model.layers.33.block_sparse_moe.experts.202.w2", "model.layers.33.block_sparse_moe.experts.203.w2", "model.layers.33.block_sparse_moe.experts.204.w2", "model.layers.33.block_sparse_moe.experts.205.w2", "model.layers.33.block_sparse_moe.experts.206.w2", "model.layers.33.block_sparse_moe.experts.207.w2", "model.layers.33.block_sparse_moe.experts.208.w2", "model.layers.33.block_sparse_moe.experts.209.w2", "model.layers.33.block_sparse_moe.experts.210.w2", "model.layers.33.block_sparse_moe.experts.211.w2", "model.layers.33.block_sparse_moe.experts.212.w2", "model.layers.33.block_sparse_moe.experts.213.w2", "model.layers.33.block_sparse_moe.experts.214.w2", "model.layers.33.block_sparse_moe.experts.215.w2", "model.layers.33.block_sparse_moe.experts.216.w2", "model.layers.33.block_sparse_moe.experts.217.w2", "model.layers.33.block_sparse_moe.experts.218.w2", "model.layers.33.block_sparse_moe.experts.219.w2", "model.layers.33.block_sparse_moe.experts.220.w2", "model.layers.33.block_sparse_moe.experts.221.w2", "model.layers.33.block_sparse_moe.experts.222.w2", "model.layers.33.block_sparse_moe.experts.223.w2", "model.layers.33.block_sparse_moe.experts.224.w2", "model.layers.33.block_sparse_moe.experts.225.w2", "model.layers.33.block_sparse_moe.experts.226.w2", "model.layers.33.block_sparse_moe.experts.227.w2", "model.layers.33.block_sparse_moe.experts.228.w2", "model.layers.33.block_sparse_moe.experts.229.w2", "model.layers.33.block_sparse_moe.experts.230.w2", "model.layers.33.block_sparse_moe.experts.231.w2", "model.layers.33.block_sparse_moe.experts.232.w2", "model.layers.33.block_sparse_moe.experts.233.w2", "model.layers.33.block_sparse_moe.experts.234.w2", "model.layers.33.block_sparse_moe.experts.235.w2", "model.layers.33.block_sparse_moe.experts.236.w2", "model.layers.33.block_sparse_moe.experts.237.w2", "model.layers.33.block_sparse_moe.experts.238.w2", "model.layers.33.block_sparse_moe.experts.239.w2", "model.layers.33.block_sparse_moe.experts.240.w2", "model.layers.33.block_sparse_moe.experts.241.w2", "model.layers.33.block_sparse_moe.experts.242.w2", "model.layers.33.block_sparse_moe.experts.243.w2", "model.layers.33.block_sparse_moe.experts.244.w2", "model.layers.33.block_sparse_moe.experts.245.w2", "model.layers.33.block_sparse_moe.experts.246.w2", "model.layers.33.block_sparse_moe.experts.247.w2", "model.layers.33.block_sparse_moe.experts.248.w2", "model.layers.33.block_sparse_moe.experts.249.w2", "model.layers.33.block_sparse_moe.experts.250.w2", "model.layers.33.block_sparse_moe.experts.251.w2", "model.layers.33.block_sparse_moe.experts.252.w2", "model.layers.33.block_sparse_moe.experts.253.w2", "model.layers.33.block_sparse_moe.experts.254.w2", "model.layers.33.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -5.518347024918713e-05, "dbits": 3623878656 } ] }, { "idx": 68, "layers": [ "model.layers.34.self_attn.q_proj", "model.layers.34.self_attn.k_proj", "model.layers.34.self_attn.v_proj", "model.layers.34.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0069501521065831184, "dbits": 44040192 } ] }, { "idx": 69, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w1", "model.layers.34.block_sparse_moe.experts.1.w1", "model.layers.34.block_sparse_moe.experts.2.w1", "model.layers.34.block_sparse_moe.experts.3.w1", "model.layers.34.block_sparse_moe.experts.4.w1", "model.layers.34.block_sparse_moe.experts.5.w1", "model.layers.34.block_sparse_moe.experts.6.w1", "model.layers.34.block_sparse_moe.experts.7.w1", "model.layers.34.block_sparse_moe.experts.8.w1", "model.layers.34.block_sparse_moe.experts.9.w1", "model.layers.34.block_sparse_moe.experts.10.w1", "model.layers.34.block_sparse_moe.experts.11.w1", "model.layers.34.block_sparse_moe.experts.12.w1", "model.layers.34.block_sparse_moe.experts.13.w1", "model.layers.34.block_sparse_moe.experts.14.w1", "model.layers.34.block_sparse_moe.experts.15.w1", "model.layers.34.block_sparse_moe.experts.16.w1", "model.layers.34.block_sparse_moe.experts.17.w1", "model.layers.34.block_sparse_moe.experts.18.w1", "model.layers.34.block_sparse_moe.experts.19.w1", "model.layers.34.block_sparse_moe.experts.20.w1", "model.layers.34.block_sparse_moe.experts.21.w1", "model.layers.34.block_sparse_moe.experts.22.w1", "model.layers.34.block_sparse_moe.experts.23.w1", "model.layers.34.block_sparse_moe.experts.24.w1", "model.layers.34.block_sparse_moe.experts.25.w1", "model.layers.34.block_sparse_moe.experts.26.w1", "model.layers.34.block_sparse_moe.experts.27.w1", "model.layers.34.block_sparse_moe.experts.28.w1", "model.layers.34.block_sparse_moe.experts.29.w1", "model.layers.34.block_sparse_moe.experts.30.w1", "model.layers.34.block_sparse_moe.experts.31.w1", "model.layers.34.block_sparse_moe.experts.32.w1", "model.layers.34.block_sparse_moe.experts.33.w1", "model.layers.34.block_sparse_moe.experts.34.w1", "model.layers.34.block_sparse_moe.experts.35.w1", "model.layers.34.block_sparse_moe.experts.36.w1", "model.layers.34.block_sparse_moe.experts.37.w1", "model.layers.34.block_sparse_moe.experts.38.w1", "model.layers.34.block_sparse_moe.experts.39.w1", "model.layers.34.block_sparse_moe.experts.40.w1", "model.layers.34.block_sparse_moe.experts.41.w1", "model.layers.34.block_sparse_moe.experts.42.w1", "model.layers.34.block_sparse_moe.experts.43.w1", "model.layers.34.block_sparse_moe.experts.44.w1", "model.layers.34.block_sparse_moe.experts.45.w1", "model.layers.34.block_sparse_moe.experts.46.w1", "model.layers.34.block_sparse_moe.experts.47.w1", "model.layers.34.block_sparse_moe.experts.48.w1", "model.layers.34.block_sparse_moe.experts.49.w1", "model.layers.34.block_sparse_moe.experts.50.w1", "model.layers.34.block_sparse_moe.experts.51.w1", "model.layers.34.block_sparse_moe.experts.52.w1", "model.layers.34.block_sparse_moe.experts.53.w1", "model.layers.34.block_sparse_moe.experts.54.w1", "model.layers.34.block_sparse_moe.experts.55.w1", "model.layers.34.block_sparse_moe.experts.56.w1", "model.layers.34.block_sparse_moe.experts.57.w1", "model.layers.34.block_sparse_moe.experts.58.w1", "model.layers.34.block_sparse_moe.experts.59.w1", "model.layers.34.block_sparse_moe.experts.60.w1", "model.layers.34.block_sparse_moe.experts.61.w1", "model.layers.34.block_sparse_moe.experts.62.w1", "model.layers.34.block_sparse_moe.experts.63.w1", "model.layers.34.block_sparse_moe.experts.64.w1", "model.layers.34.block_sparse_moe.experts.65.w1", "model.layers.34.block_sparse_moe.experts.66.w1", "model.layers.34.block_sparse_moe.experts.67.w1", "model.layers.34.block_sparse_moe.experts.68.w1", "model.layers.34.block_sparse_moe.experts.69.w1", "model.layers.34.block_sparse_moe.experts.70.w1", "model.layers.34.block_sparse_moe.experts.71.w1", "model.layers.34.block_sparse_moe.experts.72.w1", "model.layers.34.block_sparse_moe.experts.73.w1", "model.layers.34.block_sparse_moe.experts.74.w1", "model.layers.34.block_sparse_moe.experts.75.w1", "model.layers.34.block_sparse_moe.experts.76.w1", "model.layers.34.block_sparse_moe.experts.77.w1", "model.layers.34.block_sparse_moe.experts.78.w1", "model.layers.34.block_sparse_moe.experts.79.w1", "model.layers.34.block_sparse_moe.experts.80.w1", "model.layers.34.block_sparse_moe.experts.81.w1", "model.layers.34.block_sparse_moe.experts.82.w1", "model.layers.34.block_sparse_moe.experts.83.w1", "model.layers.34.block_sparse_moe.experts.84.w1", "model.layers.34.block_sparse_moe.experts.85.w1", "model.layers.34.block_sparse_moe.experts.86.w1", "model.layers.34.block_sparse_moe.experts.87.w1", "model.layers.34.block_sparse_moe.experts.88.w1", "model.layers.34.block_sparse_moe.experts.89.w1", "model.layers.34.block_sparse_moe.experts.90.w1", "model.layers.34.block_sparse_moe.experts.91.w1", "model.layers.34.block_sparse_moe.experts.92.w1", "model.layers.34.block_sparse_moe.experts.93.w1", "model.layers.34.block_sparse_moe.experts.94.w1", "model.layers.34.block_sparse_moe.experts.95.w1", "model.layers.34.block_sparse_moe.experts.96.w1", "model.layers.34.block_sparse_moe.experts.97.w1", "model.layers.34.block_sparse_moe.experts.98.w1", "model.layers.34.block_sparse_moe.experts.99.w1", "model.layers.34.block_sparse_moe.experts.100.w1", "model.layers.34.block_sparse_moe.experts.101.w1", "model.layers.34.block_sparse_moe.experts.102.w1", "model.layers.34.block_sparse_moe.experts.103.w1", "model.layers.34.block_sparse_moe.experts.104.w1", "model.layers.34.block_sparse_moe.experts.105.w1", "model.layers.34.block_sparse_moe.experts.106.w1", "model.layers.34.block_sparse_moe.experts.107.w1", "model.layers.34.block_sparse_moe.experts.108.w1", "model.layers.34.block_sparse_moe.experts.109.w1", "model.layers.34.block_sparse_moe.experts.110.w1", "model.layers.34.block_sparse_moe.experts.111.w1", "model.layers.34.block_sparse_moe.experts.112.w1", "model.layers.34.block_sparse_moe.experts.113.w1", "model.layers.34.block_sparse_moe.experts.114.w1", "model.layers.34.block_sparse_moe.experts.115.w1", "model.layers.34.block_sparse_moe.experts.116.w1", "model.layers.34.block_sparse_moe.experts.117.w1", "model.layers.34.block_sparse_moe.experts.118.w1", "model.layers.34.block_sparse_moe.experts.119.w1", "model.layers.34.block_sparse_moe.experts.120.w1", "model.layers.34.block_sparse_moe.experts.121.w1", "model.layers.34.block_sparse_moe.experts.122.w1", "model.layers.34.block_sparse_moe.experts.123.w1", "model.layers.34.block_sparse_moe.experts.124.w1", "model.layers.34.block_sparse_moe.experts.125.w1", "model.layers.34.block_sparse_moe.experts.126.w1", "model.layers.34.block_sparse_moe.experts.127.w1", "model.layers.34.block_sparse_moe.experts.128.w1", "model.layers.34.block_sparse_moe.experts.129.w1", "model.layers.34.block_sparse_moe.experts.130.w1", "model.layers.34.block_sparse_moe.experts.131.w1", "model.layers.34.block_sparse_moe.experts.132.w1", "model.layers.34.block_sparse_moe.experts.133.w1", "model.layers.34.block_sparse_moe.experts.134.w1", "model.layers.34.block_sparse_moe.experts.135.w1", "model.layers.34.block_sparse_moe.experts.136.w1", "model.layers.34.block_sparse_moe.experts.137.w1", "model.layers.34.block_sparse_moe.experts.138.w1", "model.layers.34.block_sparse_moe.experts.139.w1", "model.layers.34.block_sparse_moe.experts.140.w1", "model.layers.34.block_sparse_moe.experts.141.w1", "model.layers.34.block_sparse_moe.experts.142.w1", "model.layers.34.block_sparse_moe.experts.143.w1", "model.layers.34.block_sparse_moe.experts.144.w1", "model.layers.34.block_sparse_moe.experts.145.w1", "model.layers.34.block_sparse_moe.experts.146.w1", "model.layers.34.block_sparse_moe.experts.147.w1", "model.layers.34.block_sparse_moe.experts.148.w1", "model.layers.34.block_sparse_moe.experts.149.w1", "model.layers.34.block_sparse_moe.experts.150.w1", "model.layers.34.block_sparse_moe.experts.151.w1", "model.layers.34.block_sparse_moe.experts.152.w1", "model.layers.34.block_sparse_moe.experts.153.w1", "model.layers.34.block_sparse_moe.experts.154.w1", "model.layers.34.block_sparse_moe.experts.155.w1", "model.layers.34.block_sparse_moe.experts.156.w1", "model.layers.34.block_sparse_moe.experts.157.w1", "model.layers.34.block_sparse_moe.experts.158.w1", "model.layers.34.block_sparse_moe.experts.159.w1", "model.layers.34.block_sparse_moe.experts.160.w1", "model.layers.34.block_sparse_moe.experts.161.w1", "model.layers.34.block_sparse_moe.experts.162.w1", "model.layers.34.block_sparse_moe.experts.163.w1", "model.layers.34.block_sparse_moe.experts.164.w1", "model.layers.34.block_sparse_moe.experts.165.w1", "model.layers.34.block_sparse_moe.experts.166.w1", "model.layers.34.block_sparse_moe.experts.167.w1", "model.layers.34.block_sparse_moe.experts.168.w1", "model.layers.34.block_sparse_moe.experts.169.w1", "model.layers.34.block_sparse_moe.experts.170.w1", "model.layers.34.block_sparse_moe.experts.171.w1", "model.layers.34.block_sparse_moe.experts.172.w1", "model.layers.34.block_sparse_moe.experts.173.w1", "model.layers.34.block_sparse_moe.experts.174.w1", "model.layers.34.block_sparse_moe.experts.175.w1", "model.layers.34.block_sparse_moe.experts.176.w1", "model.layers.34.block_sparse_moe.experts.177.w1", "model.layers.34.block_sparse_moe.experts.178.w1", "model.layers.34.block_sparse_moe.experts.179.w1", "model.layers.34.block_sparse_moe.experts.180.w1", "model.layers.34.block_sparse_moe.experts.181.w1", "model.layers.34.block_sparse_moe.experts.182.w1", "model.layers.34.block_sparse_moe.experts.183.w1", "model.layers.34.block_sparse_moe.experts.184.w1", "model.layers.34.block_sparse_moe.experts.185.w1", "model.layers.34.block_sparse_moe.experts.186.w1", "model.layers.34.block_sparse_moe.experts.187.w1", "model.layers.34.block_sparse_moe.experts.188.w1", "model.layers.34.block_sparse_moe.experts.189.w1", "model.layers.34.block_sparse_moe.experts.190.w1", "model.layers.34.block_sparse_moe.experts.191.w1", "model.layers.34.block_sparse_moe.experts.192.w1", "model.layers.34.block_sparse_moe.experts.193.w1", "model.layers.34.block_sparse_moe.experts.194.w1", "model.layers.34.block_sparse_moe.experts.195.w1", "model.layers.34.block_sparse_moe.experts.196.w1", "model.layers.34.block_sparse_moe.experts.197.w1", "model.layers.34.block_sparse_moe.experts.198.w1", "model.layers.34.block_sparse_moe.experts.199.w1", "model.layers.34.block_sparse_moe.experts.200.w1", "model.layers.34.block_sparse_moe.experts.201.w1", "model.layers.34.block_sparse_moe.experts.202.w1", "model.layers.34.block_sparse_moe.experts.203.w1", "model.layers.34.block_sparse_moe.experts.204.w1", "model.layers.34.block_sparse_moe.experts.205.w1", "model.layers.34.block_sparse_moe.experts.206.w1", "model.layers.34.block_sparse_moe.experts.207.w1", "model.layers.34.block_sparse_moe.experts.208.w1", "model.layers.34.block_sparse_moe.experts.209.w1", "model.layers.34.block_sparse_moe.experts.210.w1", "model.layers.34.block_sparse_moe.experts.211.w1", "model.layers.34.block_sparse_moe.experts.212.w1", "model.layers.34.block_sparse_moe.experts.213.w1", "model.layers.34.block_sparse_moe.experts.214.w1", "model.layers.34.block_sparse_moe.experts.215.w1", "model.layers.34.block_sparse_moe.experts.216.w1", "model.layers.34.block_sparse_moe.experts.217.w1", "model.layers.34.block_sparse_moe.experts.218.w1", "model.layers.34.block_sparse_moe.experts.219.w1", "model.layers.34.block_sparse_moe.experts.220.w1", "model.layers.34.block_sparse_moe.experts.221.w1", "model.layers.34.block_sparse_moe.experts.222.w1", "model.layers.34.block_sparse_moe.experts.223.w1", "model.layers.34.block_sparse_moe.experts.224.w1", "model.layers.34.block_sparse_moe.experts.225.w1", "model.layers.34.block_sparse_moe.experts.226.w1", "model.layers.34.block_sparse_moe.experts.227.w1", "model.layers.34.block_sparse_moe.experts.228.w1", "model.layers.34.block_sparse_moe.experts.229.w1", "model.layers.34.block_sparse_moe.experts.230.w1", "model.layers.34.block_sparse_moe.experts.231.w1", "model.layers.34.block_sparse_moe.experts.232.w1", "model.layers.34.block_sparse_moe.experts.233.w1", "model.layers.34.block_sparse_moe.experts.234.w1", "model.layers.34.block_sparse_moe.experts.235.w1", "model.layers.34.block_sparse_moe.experts.236.w1", "model.layers.34.block_sparse_moe.experts.237.w1", "model.layers.34.block_sparse_moe.experts.238.w1", "model.layers.34.block_sparse_moe.experts.239.w1", "model.layers.34.block_sparse_moe.experts.240.w1", "model.layers.34.block_sparse_moe.experts.241.w1", "model.layers.34.block_sparse_moe.experts.242.w1", "model.layers.34.block_sparse_moe.experts.243.w1", "model.layers.34.block_sparse_moe.experts.244.w1", "model.layers.34.block_sparse_moe.experts.245.w1", "model.layers.34.block_sparse_moe.experts.246.w1", "model.layers.34.block_sparse_moe.experts.247.w1", "model.layers.34.block_sparse_moe.experts.248.w1", "model.layers.34.block_sparse_moe.experts.249.w1", "model.layers.34.block_sparse_moe.experts.250.w1", "model.layers.34.block_sparse_moe.experts.251.w1", "model.layers.34.block_sparse_moe.experts.252.w1", "model.layers.34.block_sparse_moe.experts.253.w1", "model.layers.34.block_sparse_moe.experts.254.w1", "model.layers.34.block_sparse_moe.experts.255.w1", "model.layers.34.block_sparse_moe.experts.0.w3", "model.layers.34.block_sparse_moe.experts.1.w3", "model.layers.34.block_sparse_moe.experts.2.w3", "model.layers.34.block_sparse_moe.experts.3.w3", "model.layers.34.block_sparse_moe.experts.4.w3", "model.layers.34.block_sparse_moe.experts.5.w3", "model.layers.34.block_sparse_moe.experts.6.w3", "model.layers.34.block_sparse_moe.experts.7.w3", "model.layers.34.block_sparse_moe.experts.8.w3", "model.layers.34.block_sparse_moe.experts.9.w3", "model.layers.34.block_sparse_moe.experts.10.w3", "model.layers.34.block_sparse_moe.experts.11.w3", "model.layers.34.block_sparse_moe.experts.12.w3", "model.layers.34.block_sparse_moe.experts.13.w3", "model.layers.34.block_sparse_moe.experts.14.w3", "model.layers.34.block_sparse_moe.experts.15.w3", "model.layers.34.block_sparse_moe.experts.16.w3", "model.layers.34.block_sparse_moe.experts.17.w3", "model.layers.34.block_sparse_moe.experts.18.w3", "model.layers.34.block_sparse_moe.experts.19.w3", "model.layers.34.block_sparse_moe.experts.20.w3", "model.layers.34.block_sparse_moe.experts.21.w3", "model.layers.34.block_sparse_moe.experts.22.w3", "model.layers.34.block_sparse_moe.experts.23.w3", "model.layers.34.block_sparse_moe.experts.24.w3", "model.layers.34.block_sparse_moe.experts.25.w3", "model.layers.34.block_sparse_moe.experts.26.w3", "model.layers.34.block_sparse_moe.experts.27.w3", "model.layers.34.block_sparse_moe.experts.28.w3", "model.layers.34.block_sparse_moe.experts.29.w3", "model.layers.34.block_sparse_moe.experts.30.w3", "model.layers.34.block_sparse_moe.experts.31.w3", "model.layers.34.block_sparse_moe.experts.32.w3", "model.layers.34.block_sparse_moe.experts.33.w3", "model.layers.34.block_sparse_moe.experts.34.w3", "model.layers.34.block_sparse_moe.experts.35.w3", "model.layers.34.block_sparse_moe.experts.36.w3", "model.layers.34.block_sparse_moe.experts.37.w3", "model.layers.34.block_sparse_moe.experts.38.w3", "model.layers.34.block_sparse_moe.experts.39.w3", "model.layers.34.block_sparse_moe.experts.40.w3", "model.layers.34.block_sparse_moe.experts.41.w3", "model.layers.34.block_sparse_moe.experts.42.w3", "model.layers.34.block_sparse_moe.experts.43.w3", "model.layers.34.block_sparse_moe.experts.44.w3", "model.layers.34.block_sparse_moe.experts.45.w3", "model.layers.34.block_sparse_moe.experts.46.w3", "model.layers.34.block_sparse_moe.experts.47.w3", "model.layers.34.block_sparse_moe.experts.48.w3", "model.layers.34.block_sparse_moe.experts.49.w3", "model.layers.34.block_sparse_moe.experts.50.w3", "model.layers.34.block_sparse_moe.experts.51.w3", "model.layers.34.block_sparse_moe.experts.52.w3", "model.layers.34.block_sparse_moe.experts.53.w3", "model.layers.34.block_sparse_moe.experts.54.w3", "model.layers.34.block_sparse_moe.experts.55.w3", "model.layers.34.block_sparse_moe.experts.56.w3", "model.layers.34.block_sparse_moe.experts.57.w3", "model.layers.34.block_sparse_moe.experts.58.w3", "model.layers.34.block_sparse_moe.experts.59.w3", "model.layers.34.block_sparse_moe.experts.60.w3", "model.layers.34.block_sparse_moe.experts.61.w3", "model.layers.34.block_sparse_moe.experts.62.w3", "model.layers.34.block_sparse_moe.experts.63.w3", "model.layers.34.block_sparse_moe.experts.64.w3", "model.layers.34.block_sparse_moe.experts.65.w3", "model.layers.34.block_sparse_moe.experts.66.w3", "model.layers.34.block_sparse_moe.experts.67.w3", "model.layers.34.block_sparse_moe.experts.68.w3", "model.layers.34.block_sparse_moe.experts.69.w3", "model.layers.34.block_sparse_moe.experts.70.w3", "model.layers.34.block_sparse_moe.experts.71.w3", "model.layers.34.block_sparse_moe.experts.72.w3", "model.layers.34.block_sparse_moe.experts.73.w3", "model.layers.34.block_sparse_moe.experts.74.w3", "model.layers.34.block_sparse_moe.experts.75.w3", "model.layers.34.block_sparse_moe.experts.76.w3", "model.layers.34.block_sparse_moe.experts.77.w3", "model.layers.34.block_sparse_moe.experts.78.w3", "model.layers.34.block_sparse_moe.experts.79.w3", "model.layers.34.block_sparse_moe.experts.80.w3", "model.layers.34.block_sparse_moe.experts.81.w3", "model.layers.34.block_sparse_moe.experts.82.w3", "model.layers.34.block_sparse_moe.experts.83.w3", "model.layers.34.block_sparse_moe.experts.84.w3", "model.layers.34.block_sparse_moe.experts.85.w3", "model.layers.34.block_sparse_moe.experts.86.w3", "model.layers.34.block_sparse_moe.experts.87.w3", "model.layers.34.block_sparse_moe.experts.88.w3", "model.layers.34.block_sparse_moe.experts.89.w3", "model.layers.34.block_sparse_moe.experts.90.w3", "model.layers.34.block_sparse_moe.experts.91.w3", "model.layers.34.block_sparse_moe.experts.92.w3", "model.layers.34.block_sparse_moe.experts.93.w3", "model.layers.34.block_sparse_moe.experts.94.w3", "model.layers.34.block_sparse_moe.experts.95.w3", "model.layers.34.block_sparse_moe.experts.96.w3", "model.layers.34.block_sparse_moe.experts.97.w3", "model.layers.34.block_sparse_moe.experts.98.w3", "model.layers.34.block_sparse_moe.experts.99.w3", "model.layers.34.block_sparse_moe.experts.100.w3", "model.layers.34.block_sparse_moe.experts.101.w3", "model.layers.34.block_sparse_moe.experts.102.w3", "model.layers.34.block_sparse_moe.experts.103.w3", "model.layers.34.block_sparse_moe.experts.104.w3", "model.layers.34.block_sparse_moe.experts.105.w3", "model.layers.34.block_sparse_moe.experts.106.w3", "model.layers.34.block_sparse_moe.experts.107.w3", "model.layers.34.block_sparse_moe.experts.108.w3", "model.layers.34.block_sparse_moe.experts.109.w3", "model.layers.34.block_sparse_moe.experts.110.w3", "model.layers.34.block_sparse_moe.experts.111.w3", "model.layers.34.block_sparse_moe.experts.112.w3", "model.layers.34.block_sparse_moe.experts.113.w3", "model.layers.34.block_sparse_moe.experts.114.w3", "model.layers.34.block_sparse_moe.experts.115.w3", "model.layers.34.block_sparse_moe.experts.116.w3", "model.layers.34.block_sparse_moe.experts.117.w3", "model.layers.34.block_sparse_moe.experts.118.w3", "model.layers.34.block_sparse_moe.experts.119.w3", "model.layers.34.block_sparse_moe.experts.120.w3", "model.layers.34.block_sparse_moe.experts.121.w3", "model.layers.34.block_sparse_moe.experts.122.w3", "model.layers.34.block_sparse_moe.experts.123.w3", "model.layers.34.block_sparse_moe.experts.124.w3", "model.layers.34.block_sparse_moe.experts.125.w3", "model.layers.34.block_sparse_moe.experts.126.w3", "model.layers.34.block_sparse_moe.experts.127.w3", "model.layers.34.block_sparse_moe.experts.128.w3", "model.layers.34.block_sparse_moe.experts.129.w3", "model.layers.34.block_sparse_moe.experts.130.w3", "model.layers.34.block_sparse_moe.experts.131.w3", "model.layers.34.block_sparse_moe.experts.132.w3", "model.layers.34.block_sparse_moe.experts.133.w3", "model.layers.34.block_sparse_moe.experts.134.w3", "model.layers.34.block_sparse_moe.experts.135.w3", "model.layers.34.block_sparse_moe.experts.136.w3", "model.layers.34.block_sparse_moe.experts.137.w3", "model.layers.34.block_sparse_moe.experts.138.w3", "model.layers.34.block_sparse_moe.experts.139.w3", "model.layers.34.block_sparse_moe.experts.140.w3", "model.layers.34.block_sparse_moe.experts.141.w3", "model.layers.34.block_sparse_moe.experts.142.w3", "model.layers.34.block_sparse_moe.experts.143.w3", "model.layers.34.block_sparse_moe.experts.144.w3", "model.layers.34.block_sparse_moe.experts.145.w3", "model.layers.34.block_sparse_moe.experts.146.w3", "model.layers.34.block_sparse_moe.experts.147.w3", "model.layers.34.block_sparse_moe.experts.148.w3", "model.layers.34.block_sparse_moe.experts.149.w3", "model.layers.34.block_sparse_moe.experts.150.w3", "model.layers.34.block_sparse_moe.experts.151.w3", "model.layers.34.block_sparse_moe.experts.152.w3", "model.layers.34.block_sparse_moe.experts.153.w3", "model.layers.34.block_sparse_moe.experts.154.w3", "model.layers.34.block_sparse_moe.experts.155.w3", "model.layers.34.block_sparse_moe.experts.156.w3", "model.layers.34.block_sparse_moe.experts.157.w3", "model.layers.34.block_sparse_moe.experts.158.w3", "model.layers.34.block_sparse_moe.experts.159.w3", "model.layers.34.block_sparse_moe.experts.160.w3", "model.layers.34.block_sparse_moe.experts.161.w3", "model.layers.34.block_sparse_moe.experts.162.w3", "model.layers.34.block_sparse_moe.experts.163.w3", "model.layers.34.block_sparse_moe.experts.164.w3", "model.layers.34.block_sparse_moe.experts.165.w3", "model.layers.34.block_sparse_moe.experts.166.w3", "model.layers.34.block_sparse_moe.experts.167.w3", "model.layers.34.block_sparse_moe.experts.168.w3", "model.layers.34.block_sparse_moe.experts.169.w3", "model.layers.34.block_sparse_moe.experts.170.w3", "model.layers.34.block_sparse_moe.experts.171.w3", "model.layers.34.block_sparse_moe.experts.172.w3", "model.layers.34.block_sparse_moe.experts.173.w3", "model.layers.34.block_sparse_moe.experts.174.w3", "model.layers.34.block_sparse_moe.experts.175.w3", "model.layers.34.block_sparse_moe.experts.176.w3", "model.layers.34.block_sparse_moe.experts.177.w3", "model.layers.34.block_sparse_moe.experts.178.w3", "model.layers.34.block_sparse_moe.experts.179.w3", "model.layers.34.block_sparse_moe.experts.180.w3", "model.layers.34.block_sparse_moe.experts.181.w3", "model.layers.34.block_sparse_moe.experts.182.w3", "model.layers.34.block_sparse_moe.experts.183.w3", "model.layers.34.block_sparse_moe.experts.184.w3", "model.layers.34.block_sparse_moe.experts.185.w3", "model.layers.34.block_sparse_moe.experts.186.w3", "model.layers.34.block_sparse_moe.experts.187.w3", "model.layers.34.block_sparse_moe.experts.188.w3", "model.layers.34.block_sparse_moe.experts.189.w3", "model.layers.34.block_sparse_moe.experts.190.w3", "model.layers.34.block_sparse_moe.experts.191.w3", "model.layers.34.block_sparse_moe.experts.192.w3", "model.layers.34.block_sparse_moe.experts.193.w3", "model.layers.34.block_sparse_moe.experts.194.w3", "model.layers.34.block_sparse_moe.experts.195.w3", "model.layers.34.block_sparse_moe.experts.196.w3", "model.layers.34.block_sparse_moe.experts.197.w3", "model.layers.34.block_sparse_moe.experts.198.w3", "model.layers.34.block_sparse_moe.experts.199.w3", "model.layers.34.block_sparse_moe.experts.200.w3", "model.layers.34.block_sparse_moe.experts.201.w3", "model.layers.34.block_sparse_moe.experts.202.w3", "model.layers.34.block_sparse_moe.experts.203.w3", "model.layers.34.block_sparse_moe.experts.204.w3", "model.layers.34.block_sparse_moe.experts.205.w3", "model.layers.34.block_sparse_moe.experts.206.w3", "model.layers.34.block_sparse_moe.experts.207.w3", "model.layers.34.block_sparse_moe.experts.208.w3", "model.layers.34.block_sparse_moe.experts.209.w3", "model.layers.34.block_sparse_moe.experts.210.w3", "model.layers.34.block_sparse_moe.experts.211.w3", "model.layers.34.block_sparse_moe.experts.212.w3", "model.layers.34.block_sparse_moe.experts.213.w3", "model.layers.34.block_sparse_moe.experts.214.w3", "model.layers.34.block_sparse_moe.experts.215.w3", "model.layers.34.block_sparse_moe.experts.216.w3", "model.layers.34.block_sparse_moe.experts.217.w3", "model.layers.34.block_sparse_moe.experts.218.w3", "model.layers.34.block_sparse_moe.experts.219.w3", "model.layers.34.block_sparse_moe.experts.220.w3", "model.layers.34.block_sparse_moe.experts.221.w3", "model.layers.34.block_sparse_moe.experts.222.w3", "model.layers.34.block_sparse_moe.experts.223.w3", "model.layers.34.block_sparse_moe.experts.224.w3", "model.layers.34.block_sparse_moe.experts.225.w3", "model.layers.34.block_sparse_moe.experts.226.w3", "model.layers.34.block_sparse_moe.experts.227.w3", "model.layers.34.block_sparse_moe.experts.228.w3", "model.layers.34.block_sparse_moe.experts.229.w3", "model.layers.34.block_sparse_moe.experts.230.w3", "model.layers.34.block_sparse_moe.experts.231.w3", "model.layers.34.block_sparse_moe.experts.232.w3", "model.layers.34.block_sparse_moe.experts.233.w3", "model.layers.34.block_sparse_moe.experts.234.w3", "model.layers.34.block_sparse_moe.experts.235.w3", "model.layers.34.block_sparse_moe.experts.236.w3", "model.layers.34.block_sparse_moe.experts.237.w3", "model.layers.34.block_sparse_moe.experts.238.w3", "model.layers.34.block_sparse_moe.experts.239.w3", "model.layers.34.block_sparse_moe.experts.240.w3", "model.layers.34.block_sparse_moe.experts.241.w3", "model.layers.34.block_sparse_moe.experts.242.w3", "model.layers.34.block_sparse_moe.experts.243.w3", "model.layers.34.block_sparse_moe.experts.244.w3", "model.layers.34.block_sparse_moe.experts.245.w3", "model.layers.34.block_sparse_moe.experts.246.w3", "model.layers.34.block_sparse_moe.experts.247.w3", "model.layers.34.block_sparse_moe.experts.248.w3", "model.layers.34.block_sparse_moe.experts.249.w3", "model.layers.34.block_sparse_moe.experts.250.w3", "model.layers.34.block_sparse_moe.experts.251.w3", "model.layers.34.block_sparse_moe.experts.252.w3", "model.layers.34.block_sparse_moe.experts.253.w3", "model.layers.34.block_sparse_moe.experts.254.w3", "model.layers.34.block_sparse_moe.experts.255.w3", "model.layers.34.block_sparse_moe.experts.0.w2", "model.layers.34.block_sparse_moe.experts.1.w2", "model.layers.34.block_sparse_moe.experts.2.w2", "model.layers.34.block_sparse_moe.experts.3.w2", "model.layers.34.block_sparse_moe.experts.4.w2", "model.layers.34.block_sparse_moe.experts.5.w2", "model.layers.34.block_sparse_moe.experts.6.w2", "model.layers.34.block_sparse_moe.experts.7.w2", "model.layers.34.block_sparse_moe.experts.8.w2", "model.layers.34.block_sparse_moe.experts.9.w2", "model.layers.34.block_sparse_moe.experts.10.w2", "model.layers.34.block_sparse_moe.experts.11.w2", "model.layers.34.block_sparse_moe.experts.12.w2", "model.layers.34.block_sparse_moe.experts.13.w2", "model.layers.34.block_sparse_moe.experts.14.w2", "model.layers.34.block_sparse_moe.experts.15.w2", "model.layers.34.block_sparse_moe.experts.16.w2", "model.layers.34.block_sparse_moe.experts.17.w2", "model.layers.34.block_sparse_moe.experts.18.w2", "model.layers.34.block_sparse_moe.experts.19.w2", "model.layers.34.block_sparse_moe.experts.20.w2", "model.layers.34.block_sparse_moe.experts.21.w2", "model.layers.34.block_sparse_moe.experts.22.w2", "model.layers.34.block_sparse_moe.experts.23.w2", "model.layers.34.block_sparse_moe.experts.24.w2", "model.layers.34.block_sparse_moe.experts.25.w2", "model.layers.34.block_sparse_moe.experts.26.w2", "model.layers.34.block_sparse_moe.experts.27.w2", "model.layers.34.block_sparse_moe.experts.28.w2", "model.layers.34.block_sparse_moe.experts.29.w2", "model.layers.34.block_sparse_moe.experts.30.w2", "model.layers.34.block_sparse_moe.experts.31.w2", "model.layers.34.block_sparse_moe.experts.32.w2", "model.layers.34.block_sparse_moe.experts.33.w2", "model.layers.34.block_sparse_moe.experts.34.w2", "model.layers.34.block_sparse_moe.experts.35.w2", "model.layers.34.block_sparse_moe.experts.36.w2", "model.layers.34.block_sparse_moe.experts.37.w2", "model.layers.34.block_sparse_moe.experts.38.w2", "model.layers.34.block_sparse_moe.experts.39.w2", "model.layers.34.block_sparse_moe.experts.40.w2", "model.layers.34.block_sparse_moe.experts.41.w2", "model.layers.34.block_sparse_moe.experts.42.w2", "model.layers.34.block_sparse_moe.experts.43.w2", "model.layers.34.block_sparse_moe.experts.44.w2", "model.layers.34.block_sparse_moe.experts.45.w2", "model.layers.34.block_sparse_moe.experts.46.w2", "model.layers.34.block_sparse_moe.experts.47.w2", "model.layers.34.block_sparse_moe.experts.48.w2", "model.layers.34.block_sparse_moe.experts.49.w2", "model.layers.34.block_sparse_moe.experts.50.w2", "model.layers.34.block_sparse_moe.experts.51.w2", "model.layers.34.block_sparse_moe.experts.52.w2", "model.layers.34.block_sparse_moe.experts.53.w2", "model.layers.34.block_sparse_moe.experts.54.w2", "model.layers.34.block_sparse_moe.experts.55.w2", "model.layers.34.block_sparse_moe.experts.56.w2", "model.layers.34.block_sparse_moe.experts.57.w2", "model.layers.34.block_sparse_moe.experts.58.w2", "model.layers.34.block_sparse_moe.experts.59.w2", "model.layers.34.block_sparse_moe.experts.60.w2", "model.layers.34.block_sparse_moe.experts.61.w2", "model.layers.34.block_sparse_moe.experts.62.w2", "model.layers.34.block_sparse_moe.experts.63.w2", "model.layers.34.block_sparse_moe.experts.64.w2", "model.layers.34.block_sparse_moe.experts.65.w2", "model.layers.34.block_sparse_moe.experts.66.w2", "model.layers.34.block_sparse_moe.experts.67.w2", "model.layers.34.block_sparse_moe.experts.68.w2", "model.layers.34.block_sparse_moe.experts.69.w2", "model.layers.34.block_sparse_moe.experts.70.w2", "model.layers.34.block_sparse_moe.experts.71.w2", "model.layers.34.block_sparse_moe.experts.72.w2", "model.layers.34.block_sparse_moe.experts.73.w2", "model.layers.34.block_sparse_moe.experts.74.w2", "model.layers.34.block_sparse_moe.experts.75.w2", "model.layers.34.block_sparse_moe.experts.76.w2", "model.layers.34.block_sparse_moe.experts.77.w2", "model.layers.34.block_sparse_moe.experts.78.w2", "model.layers.34.block_sparse_moe.experts.79.w2", "model.layers.34.block_sparse_moe.experts.80.w2", "model.layers.34.block_sparse_moe.experts.81.w2", "model.layers.34.block_sparse_moe.experts.82.w2", "model.layers.34.block_sparse_moe.experts.83.w2", "model.layers.34.block_sparse_moe.experts.84.w2", "model.layers.34.block_sparse_moe.experts.85.w2", "model.layers.34.block_sparse_moe.experts.86.w2", "model.layers.34.block_sparse_moe.experts.87.w2", "model.layers.34.block_sparse_moe.experts.88.w2", "model.layers.34.block_sparse_moe.experts.89.w2", "model.layers.34.block_sparse_moe.experts.90.w2", "model.layers.34.block_sparse_moe.experts.91.w2", "model.layers.34.block_sparse_moe.experts.92.w2", "model.layers.34.block_sparse_moe.experts.93.w2", "model.layers.34.block_sparse_moe.experts.94.w2", "model.layers.34.block_sparse_moe.experts.95.w2", "model.layers.34.block_sparse_moe.experts.96.w2", "model.layers.34.block_sparse_moe.experts.97.w2", "model.layers.34.block_sparse_moe.experts.98.w2", "model.layers.34.block_sparse_moe.experts.99.w2", "model.layers.34.block_sparse_moe.experts.100.w2", "model.layers.34.block_sparse_moe.experts.101.w2", "model.layers.34.block_sparse_moe.experts.102.w2", "model.layers.34.block_sparse_moe.experts.103.w2", "model.layers.34.block_sparse_moe.experts.104.w2", "model.layers.34.block_sparse_moe.experts.105.w2", "model.layers.34.block_sparse_moe.experts.106.w2", "model.layers.34.block_sparse_moe.experts.107.w2", "model.layers.34.block_sparse_moe.experts.108.w2", "model.layers.34.block_sparse_moe.experts.109.w2", "model.layers.34.block_sparse_moe.experts.110.w2", "model.layers.34.block_sparse_moe.experts.111.w2", "model.layers.34.block_sparse_moe.experts.112.w2", "model.layers.34.block_sparse_moe.experts.113.w2", "model.layers.34.block_sparse_moe.experts.114.w2", "model.layers.34.block_sparse_moe.experts.115.w2", "model.layers.34.block_sparse_moe.experts.116.w2", "model.layers.34.block_sparse_moe.experts.117.w2", "model.layers.34.block_sparse_moe.experts.118.w2", "model.layers.34.block_sparse_moe.experts.119.w2", "model.layers.34.block_sparse_moe.experts.120.w2", "model.layers.34.block_sparse_moe.experts.121.w2", "model.layers.34.block_sparse_moe.experts.122.w2", "model.layers.34.block_sparse_moe.experts.123.w2", "model.layers.34.block_sparse_moe.experts.124.w2", "model.layers.34.block_sparse_moe.experts.125.w2", "model.layers.34.block_sparse_moe.experts.126.w2", "model.layers.34.block_sparse_moe.experts.127.w2", "model.layers.34.block_sparse_moe.experts.128.w2", "model.layers.34.block_sparse_moe.experts.129.w2", "model.layers.34.block_sparse_moe.experts.130.w2", "model.layers.34.block_sparse_moe.experts.131.w2", "model.layers.34.block_sparse_moe.experts.132.w2", "model.layers.34.block_sparse_moe.experts.133.w2", "model.layers.34.block_sparse_moe.experts.134.w2", "model.layers.34.block_sparse_moe.experts.135.w2", "model.layers.34.block_sparse_moe.experts.136.w2", "model.layers.34.block_sparse_moe.experts.137.w2", "model.layers.34.block_sparse_moe.experts.138.w2", "model.layers.34.block_sparse_moe.experts.139.w2", "model.layers.34.block_sparse_moe.experts.140.w2", "model.layers.34.block_sparse_moe.experts.141.w2", "model.layers.34.block_sparse_moe.experts.142.w2", "model.layers.34.block_sparse_moe.experts.143.w2", "model.layers.34.block_sparse_moe.experts.144.w2", "model.layers.34.block_sparse_moe.experts.145.w2", "model.layers.34.block_sparse_moe.experts.146.w2", "model.layers.34.block_sparse_moe.experts.147.w2", "model.layers.34.block_sparse_moe.experts.148.w2", "model.layers.34.block_sparse_moe.experts.149.w2", "model.layers.34.block_sparse_moe.experts.150.w2", "model.layers.34.block_sparse_moe.experts.151.w2", "model.layers.34.block_sparse_moe.experts.152.w2", "model.layers.34.block_sparse_moe.experts.153.w2", "model.layers.34.block_sparse_moe.experts.154.w2", "model.layers.34.block_sparse_moe.experts.155.w2", "model.layers.34.block_sparse_moe.experts.156.w2", "model.layers.34.block_sparse_moe.experts.157.w2", "model.layers.34.block_sparse_moe.experts.158.w2", "model.layers.34.block_sparse_moe.experts.159.w2", "model.layers.34.block_sparse_moe.experts.160.w2", "model.layers.34.block_sparse_moe.experts.161.w2", "model.layers.34.block_sparse_moe.experts.162.w2", "model.layers.34.block_sparse_moe.experts.163.w2", "model.layers.34.block_sparse_moe.experts.164.w2", "model.layers.34.block_sparse_moe.experts.165.w2", "model.layers.34.block_sparse_moe.experts.166.w2", "model.layers.34.block_sparse_moe.experts.167.w2", "model.layers.34.block_sparse_moe.experts.168.w2", "model.layers.34.block_sparse_moe.experts.169.w2", "model.layers.34.block_sparse_moe.experts.170.w2", "model.layers.34.block_sparse_moe.experts.171.w2", "model.layers.34.block_sparse_moe.experts.172.w2", "model.layers.34.block_sparse_moe.experts.173.w2", "model.layers.34.block_sparse_moe.experts.174.w2", "model.layers.34.block_sparse_moe.experts.175.w2", "model.layers.34.block_sparse_moe.experts.176.w2", "model.layers.34.block_sparse_moe.experts.177.w2", "model.layers.34.block_sparse_moe.experts.178.w2", "model.layers.34.block_sparse_moe.experts.179.w2", "model.layers.34.block_sparse_moe.experts.180.w2", "model.layers.34.block_sparse_moe.experts.181.w2", "model.layers.34.block_sparse_moe.experts.182.w2", "model.layers.34.block_sparse_moe.experts.183.w2", "model.layers.34.block_sparse_moe.experts.184.w2", "model.layers.34.block_sparse_moe.experts.185.w2", "model.layers.34.block_sparse_moe.experts.186.w2", "model.layers.34.block_sparse_moe.experts.187.w2", "model.layers.34.block_sparse_moe.experts.188.w2", "model.layers.34.block_sparse_moe.experts.189.w2", "model.layers.34.block_sparse_moe.experts.190.w2", "model.layers.34.block_sparse_moe.experts.191.w2", "model.layers.34.block_sparse_moe.experts.192.w2", "model.layers.34.block_sparse_moe.experts.193.w2", "model.layers.34.block_sparse_moe.experts.194.w2", "model.layers.34.block_sparse_moe.experts.195.w2", "model.layers.34.block_sparse_moe.experts.196.w2", "model.layers.34.block_sparse_moe.experts.197.w2", "model.layers.34.block_sparse_moe.experts.198.w2", "model.layers.34.block_sparse_moe.experts.199.w2", "model.layers.34.block_sparse_moe.experts.200.w2", "model.layers.34.block_sparse_moe.experts.201.w2", "model.layers.34.block_sparse_moe.experts.202.w2", "model.layers.34.block_sparse_moe.experts.203.w2", "model.layers.34.block_sparse_moe.experts.204.w2", "model.layers.34.block_sparse_moe.experts.205.w2", "model.layers.34.block_sparse_moe.experts.206.w2", "model.layers.34.block_sparse_moe.experts.207.w2", "model.layers.34.block_sparse_moe.experts.208.w2", "model.layers.34.block_sparse_moe.experts.209.w2", "model.layers.34.block_sparse_moe.experts.210.w2", "model.layers.34.block_sparse_moe.experts.211.w2", "model.layers.34.block_sparse_moe.experts.212.w2", "model.layers.34.block_sparse_moe.experts.213.w2", "model.layers.34.block_sparse_moe.experts.214.w2", "model.layers.34.block_sparse_moe.experts.215.w2", "model.layers.34.block_sparse_moe.experts.216.w2", "model.layers.34.block_sparse_moe.experts.217.w2", "model.layers.34.block_sparse_moe.experts.218.w2", "model.layers.34.block_sparse_moe.experts.219.w2", "model.layers.34.block_sparse_moe.experts.220.w2", "model.layers.34.block_sparse_moe.experts.221.w2", "model.layers.34.block_sparse_moe.experts.222.w2", "model.layers.34.block_sparse_moe.experts.223.w2", "model.layers.34.block_sparse_moe.experts.224.w2", "model.layers.34.block_sparse_moe.experts.225.w2", "model.layers.34.block_sparse_moe.experts.226.w2", "model.layers.34.block_sparse_moe.experts.227.w2", "model.layers.34.block_sparse_moe.experts.228.w2", "model.layers.34.block_sparse_moe.experts.229.w2", "model.layers.34.block_sparse_moe.experts.230.w2", "model.layers.34.block_sparse_moe.experts.231.w2", "model.layers.34.block_sparse_moe.experts.232.w2", "model.layers.34.block_sparse_moe.experts.233.w2", "model.layers.34.block_sparse_moe.experts.234.w2", "model.layers.34.block_sparse_moe.experts.235.w2", "model.layers.34.block_sparse_moe.experts.236.w2", "model.layers.34.block_sparse_moe.experts.237.w2", "model.layers.34.block_sparse_moe.experts.238.w2", "model.layers.34.block_sparse_moe.experts.239.w2", "model.layers.34.block_sparse_moe.experts.240.w2", "model.layers.34.block_sparse_moe.experts.241.w2", "model.layers.34.block_sparse_moe.experts.242.w2", "model.layers.34.block_sparse_moe.experts.243.w2", "model.layers.34.block_sparse_moe.experts.244.w2", "model.layers.34.block_sparse_moe.experts.245.w2", "model.layers.34.block_sparse_moe.experts.246.w2", "model.layers.34.block_sparse_moe.experts.247.w2", "model.layers.34.block_sparse_moe.experts.248.w2", "model.layers.34.block_sparse_moe.experts.249.w2", "model.layers.34.block_sparse_moe.experts.250.w2", "model.layers.34.block_sparse_moe.experts.251.w2", "model.layers.34.block_sparse_moe.experts.252.w2", "model.layers.34.block_sparse_moe.experts.253.w2", "model.layers.34.block_sparse_moe.experts.254.w2", "model.layers.34.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.002775551751255989, "dbits": 3623878656 } ] }, { "idx": 70, "layers": [ "model.layers.35.self_attn.q_proj", "model.layers.35.self_attn.k_proj", "model.layers.35.self_attn.v_proj", "model.layers.35.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0011717256158590428, "dbits": 44040192 } ] }, { "idx": 71, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w1", "model.layers.35.block_sparse_moe.experts.1.w1", "model.layers.35.block_sparse_moe.experts.2.w1", "model.layers.35.block_sparse_moe.experts.3.w1", "model.layers.35.block_sparse_moe.experts.4.w1", "model.layers.35.block_sparse_moe.experts.5.w1", "model.layers.35.block_sparse_moe.experts.6.w1", "model.layers.35.block_sparse_moe.experts.7.w1", "model.layers.35.block_sparse_moe.experts.8.w1", "model.layers.35.block_sparse_moe.experts.9.w1", "model.layers.35.block_sparse_moe.experts.10.w1", "model.layers.35.block_sparse_moe.experts.11.w1", "model.layers.35.block_sparse_moe.experts.12.w1", "model.layers.35.block_sparse_moe.experts.13.w1", "model.layers.35.block_sparse_moe.experts.14.w1", "model.layers.35.block_sparse_moe.experts.15.w1", "model.layers.35.block_sparse_moe.experts.16.w1", "model.layers.35.block_sparse_moe.experts.17.w1", "model.layers.35.block_sparse_moe.experts.18.w1", "model.layers.35.block_sparse_moe.experts.19.w1", "model.layers.35.block_sparse_moe.experts.20.w1", "model.layers.35.block_sparse_moe.experts.21.w1", "model.layers.35.block_sparse_moe.experts.22.w1", "model.layers.35.block_sparse_moe.experts.23.w1", "model.layers.35.block_sparse_moe.experts.24.w1", "model.layers.35.block_sparse_moe.experts.25.w1", "model.layers.35.block_sparse_moe.experts.26.w1", "model.layers.35.block_sparse_moe.experts.27.w1", "model.layers.35.block_sparse_moe.experts.28.w1", "model.layers.35.block_sparse_moe.experts.29.w1", "model.layers.35.block_sparse_moe.experts.30.w1", "model.layers.35.block_sparse_moe.experts.31.w1", "model.layers.35.block_sparse_moe.experts.32.w1", "model.layers.35.block_sparse_moe.experts.33.w1", "model.layers.35.block_sparse_moe.experts.34.w1", "model.layers.35.block_sparse_moe.experts.35.w1", "model.layers.35.block_sparse_moe.experts.36.w1", "model.layers.35.block_sparse_moe.experts.37.w1", "model.layers.35.block_sparse_moe.experts.38.w1", "model.layers.35.block_sparse_moe.experts.39.w1", "model.layers.35.block_sparse_moe.experts.40.w1", "model.layers.35.block_sparse_moe.experts.41.w1", "model.layers.35.block_sparse_moe.experts.42.w1", "model.layers.35.block_sparse_moe.experts.43.w1", "model.layers.35.block_sparse_moe.experts.44.w1", "model.layers.35.block_sparse_moe.experts.45.w1", "model.layers.35.block_sparse_moe.experts.46.w1", "model.layers.35.block_sparse_moe.experts.47.w1", "model.layers.35.block_sparse_moe.experts.48.w1", "model.layers.35.block_sparse_moe.experts.49.w1", "model.layers.35.block_sparse_moe.experts.50.w1", "model.layers.35.block_sparse_moe.experts.51.w1", "model.layers.35.block_sparse_moe.experts.52.w1", "model.layers.35.block_sparse_moe.experts.53.w1", "model.layers.35.block_sparse_moe.experts.54.w1", "model.layers.35.block_sparse_moe.experts.55.w1", "model.layers.35.block_sparse_moe.experts.56.w1", "model.layers.35.block_sparse_moe.experts.57.w1", "model.layers.35.block_sparse_moe.experts.58.w1", "model.layers.35.block_sparse_moe.experts.59.w1", "model.layers.35.block_sparse_moe.experts.60.w1", "model.layers.35.block_sparse_moe.experts.61.w1", "model.layers.35.block_sparse_moe.experts.62.w1", "model.layers.35.block_sparse_moe.experts.63.w1", "model.layers.35.block_sparse_moe.experts.64.w1", "model.layers.35.block_sparse_moe.experts.65.w1", "model.layers.35.block_sparse_moe.experts.66.w1", "model.layers.35.block_sparse_moe.experts.67.w1", "model.layers.35.block_sparse_moe.experts.68.w1", "model.layers.35.block_sparse_moe.experts.69.w1", "model.layers.35.block_sparse_moe.experts.70.w1", "model.layers.35.block_sparse_moe.experts.71.w1", "model.layers.35.block_sparse_moe.experts.72.w1", "model.layers.35.block_sparse_moe.experts.73.w1", "model.layers.35.block_sparse_moe.experts.74.w1", "model.layers.35.block_sparse_moe.experts.75.w1", "model.layers.35.block_sparse_moe.experts.76.w1", "model.layers.35.block_sparse_moe.experts.77.w1", "model.layers.35.block_sparse_moe.experts.78.w1", "model.layers.35.block_sparse_moe.experts.79.w1", "model.layers.35.block_sparse_moe.experts.80.w1", "model.layers.35.block_sparse_moe.experts.81.w1", "model.layers.35.block_sparse_moe.experts.82.w1", "model.layers.35.block_sparse_moe.experts.83.w1", "model.layers.35.block_sparse_moe.experts.84.w1", "model.layers.35.block_sparse_moe.experts.85.w1", "model.layers.35.block_sparse_moe.experts.86.w1", "model.layers.35.block_sparse_moe.experts.87.w1", "model.layers.35.block_sparse_moe.experts.88.w1", "model.layers.35.block_sparse_moe.experts.89.w1", "model.layers.35.block_sparse_moe.experts.90.w1", "model.layers.35.block_sparse_moe.experts.91.w1", "model.layers.35.block_sparse_moe.experts.92.w1", "model.layers.35.block_sparse_moe.experts.93.w1", "model.layers.35.block_sparse_moe.experts.94.w1", "model.layers.35.block_sparse_moe.experts.95.w1", "model.layers.35.block_sparse_moe.experts.96.w1", "model.layers.35.block_sparse_moe.experts.97.w1", "model.layers.35.block_sparse_moe.experts.98.w1", "model.layers.35.block_sparse_moe.experts.99.w1", "model.layers.35.block_sparse_moe.experts.100.w1", "model.layers.35.block_sparse_moe.experts.101.w1", "model.layers.35.block_sparse_moe.experts.102.w1", "model.layers.35.block_sparse_moe.experts.103.w1", "model.layers.35.block_sparse_moe.experts.104.w1", "model.layers.35.block_sparse_moe.experts.105.w1", "model.layers.35.block_sparse_moe.experts.106.w1", "model.layers.35.block_sparse_moe.experts.107.w1", "model.layers.35.block_sparse_moe.experts.108.w1", "model.layers.35.block_sparse_moe.experts.109.w1", "model.layers.35.block_sparse_moe.experts.110.w1", "model.layers.35.block_sparse_moe.experts.111.w1", "model.layers.35.block_sparse_moe.experts.112.w1", "model.layers.35.block_sparse_moe.experts.113.w1", "model.layers.35.block_sparse_moe.experts.114.w1", "model.layers.35.block_sparse_moe.experts.115.w1", "model.layers.35.block_sparse_moe.experts.116.w1", "model.layers.35.block_sparse_moe.experts.117.w1", "model.layers.35.block_sparse_moe.experts.118.w1", "model.layers.35.block_sparse_moe.experts.119.w1", "model.layers.35.block_sparse_moe.experts.120.w1", "model.layers.35.block_sparse_moe.experts.121.w1", "model.layers.35.block_sparse_moe.experts.122.w1", "model.layers.35.block_sparse_moe.experts.123.w1", "model.layers.35.block_sparse_moe.experts.124.w1", "model.layers.35.block_sparse_moe.experts.125.w1", "model.layers.35.block_sparse_moe.experts.126.w1", "model.layers.35.block_sparse_moe.experts.127.w1", "model.layers.35.block_sparse_moe.experts.128.w1", "model.layers.35.block_sparse_moe.experts.129.w1", "model.layers.35.block_sparse_moe.experts.130.w1", "model.layers.35.block_sparse_moe.experts.131.w1", "model.layers.35.block_sparse_moe.experts.132.w1", "model.layers.35.block_sparse_moe.experts.133.w1", "model.layers.35.block_sparse_moe.experts.134.w1", "model.layers.35.block_sparse_moe.experts.135.w1", "model.layers.35.block_sparse_moe.experts.136.w1", "model.layers.35.block_sparse_moe.experts.137.w1", "model.layers.35.block_sparse_moe.experts.138.w1", "model.layers.35.block_sparse_moe.experts.139.w1", "model.layers.35.block_sparse_moe.experts.140.w1", "model.layers.35.block_sparse_moe.experts.141.w1", "model.layers.35.block_sparse_moe.experts.142.w1", "model.layers.35.block_sparse_moe.experts.143.w1", "model.layers.35.block_sparse_moe.experts.144.w1", "model.layers.35.block_sparse_moe.experts.145.w1", "model.layers.35.block_sparse_moe.experts.146.w1", "model.layers.35.block_sparse_moe.experts.147.w1", "model.layers.35.block_sparse_moe.experts.148.w1", "model.layers.35.block_sparse_moe.experts.149.w1", "model.layers.35.block_sparse_moe.experts.150.w1", "model.layers.35.block_sparse_moe.experts.151.w1", "model.layers.35.block_sparse_moe.experts.152.w1", "model.layers.35.block_sparse_moe.experts.153.w1", "model.layers.35.block_sparse_moe.experts.154.w1", "model.layers.35.block_sparse_moe.experts.155.w1", "model.layers.35.block_sparse_moe.experts.156.w1", "model.layers.35.block_sparse_moe.experts.157.w1", "model.layers.35.block_sparse_moe.experts.158.w1", "model.layers.35.block_sparse_moe.experts.159.w1", "model.layers.35.block_sparse_moe.experts.160.w1", "model.layers.35.block_sparse_moe.experts.161.w1", "model.layers.35.block_sparse_moe.experts.162.w1", "model.layers.35.block_sparse_moe.experts.163.w1", "model.layers.35.block_sparse_moe.experts.164.w1", "model.layers.35.block_sparse_moe.experts.165.w1", "model.layers.35.block_sparse_moe.experts.166.w1", "model.layers.35.block_sparse_moe.experts.167.w1", "model.layers.35.block_sparse_moe.experts.168.w1", "model.layers.35.block_sparse_moe.experts.169.w1", "model.layers.35.block_sparse_moe.experts.170.w1", "model.layers.35.block_sparse_moe.experts.171.w1", "model.layers.35.block_sparse_moe.experts.172.w1", "model.layers.35.block_sparse_moe.experts.173.w1", "model.layers.35.block_sparse_moe.experts.174.w1", "model.layers.35.block_sparse_moe.experts.175.w1", "model.layers.35.block_sparse_moe.experts.176.w1", "model.layers.35.block_sparse_moe.experts.177.w1", "model.layers.35.block_sparse_moe.experts.178.w1", "model.layers.35.block_sparse_moe.experts.179.w1", "model.layers.35.block_sparse_moe.experts.180.w1", "model.layers.35.block_sparse_moe.experts.181.w1", "model.layers.35.block_sparse_moe.experts.182.w1", "model.layers.35.block_sparse_moe.experts.183.w1", "model.layers.35.block_sparse_moe.experts.184.w1", "model.layers.35.block_sparse_moe.experts.185.w1", "model.layers.35.block_sparse_moe.experts.186.w1", "model.layers.35.block_sparse_moe.experts.187.w1", "model.layers.35.block_sparse_moe.experts.188.w1", "model.layers.35.block_sparse_moe.experts.189.w1", "model.layers.35.block_sparse_moe.experts.190.w1", "model.layers.35.block_sparse_moe.experts.191.w1", "model.layers.35.block_sparse_moe.experts.192.w1", "model.layers.35.block_sparse_moe.experts.193.w1", "model.layers.35.block_sparse_moe.experts.194.w1", "model.layers.35.block_sparse_moe.experts.195.w1", "model.layers.35.block_sparse_moe.experts.196.w1", "model.layers.35.block_sparse_moe.experts.197.w1", "model.layers.35.block_sparse_moe.experts.198.w1", "model.layers.35.block_sparse_moe.experts.199.w1", "model.layers.35.block_sparse_moe.experts.200.w1", "model.layers.35.block_sparse_moe.experts.201.w1", "model.layers.35.block_sparse_moe.experts.202.w1", "model.layers.35.block_sparse_moe.experts.203.w1", "model.layers.35.block_sparse_moe.experts.204.w1", "model.layers.35.block_sparse_moe.experts.205.w1", "model.layers.35.block_sparse_moe.experts.206.w1", "model.layers.35.block_sparse_moe.experts.207.w1", "model.layers.35.block_sparse_moe.experts.208.w1", "model.layers.35.block_sparse_moe.experts.209.w1", "model.layers.35.block_sparse_moe.experts.210.w1", "model.layers.35.block_sparse_moe.experts.211.w1", "model.layers.35.block_sparse_moe.experts.212.w1", "model.layers.35.block_sparse_moe.experts.213.w1", "model.layers.35.block_sparse_moe.experts.214.w1", "model.layers.35.block_sparse_moe.experts.215.w1", "model.layers.35.block_sparse_moe.experts.216.w1", "model.layers.35.block_sparse_moe.experts.217.w1", "model.layers.35.block_sparse_moe.experts.218.w1", "model.layers.35.block_sparse_moe.experts.219.w1", "model.layers.35.block_sparse_moe.experts.220.w1", "model.layers.35.block_sparse_moe.experts.221.w1", "model.layers.35.block_sparse_moe.experts.222.w1", "model.layers.35.block_sparse_moe.experts.223.w1", "model.layers.35.block_sparse_moe.experts.224.w1", "model.layers.35.block_sparse_moe.experts.225.w1", "model.layers.35.block_sparse_moe.experts.226.w1", "model.layers.35.block_sparse_moe.experts.227.w1", "model.layers.35.block_sparse_moe.experts.228.w1", "model.layers.35.block_sparse_moe.experts.229.w1", "model.layers.35.block_sparse_moe.experts.230.w1", "model.layers.35.block_sparse_moe.experts.231.w1", "model.layers.35.block_sparse_moe.experts.232.w1", "model.layers.35.block_sparse_moe.experts.233.w1", "model.layers.35.block_sparse_moe.experts.234.w1", "model.layers.35.block_sparse_moe.experts.235.w1", "model.layers.35.block_sparse_moe.experts.236.w1", "model.layers.35.block_sparse_moe.experts.237.w1", "model.layers.35.block_sparse_moe.experts.238.w1", "model.layers.35.block_sparse_moe.experts.239.w1", "model.layers.35.block_sparse_moe.experts.240.w1", "model.layers.35.block_sparse_moe.experts.241.w1", "model.layers.35.block_sparse_moe.experts.242.w1", "model.layers.35.block_sparse_moe.experts.243.w1", "model.layers.35.block_sparse_moe.experts.244.w1", "model.layers.35.block_sparse_moe.experts.245.w1", "model.layers.35.block_sparse_moe.experts.246.w1", "model.layers.35.block_sparse_moe.experts.247.w1", "model.layers.35.block_sparse_moe.experts.248.w1", "model.layers.35.block_sparse_moe.experts.249.w1", "model.layers.35.block_sparse_moe.experts.250.w1", "model.layers.35.block_sparse_moe.experts.251.w1", "model.layers.35.block_sparse_moe.experts.252.w1", "model.layers.35.block_sparse_moe.experts.253.w1", "model.layers.35.block_sparse_moe.experts.254.w1", "model.layers.35.block_sparse_moe.experts.255.w1", "model.layers.35.block_sparse_moe.experts.0.w3", "model.layers.35.block_sparse_moe.experts.1.w3", "model.layers.35.block_sparse_moe.experts.2.w3", "model.layers.35.block_sparse_moe.experts.3.w3", "model.layers.35.block_sparse_moe.experts.4.w3", "model.layers.35.block_sparse_moe.experts.5.w3", "model.layers.35.block_sparse_moe.experts.6.w3", "model.layers.35.block_sparse_moe.experts.7.w3", "model.layers.35.block_sparse_moe.experts.8.w3", "model.layers.35.block_sparse_moe.experts.9.w3", "model.layers.35.block_sparse_moe.experts.10.w3", "model.layers.35.block_sparse_moe.experts.11.w3", "model.layers.35.block_sparse_moe.experts.12.w3", "model.layers.35.block_sparse_moe.experts.13.w3", "model.layers.35.block_sparse_moe.experts.14.w3", "model.layers.35.block_sparse_moe.experts.15.w3", "model.layers.35.block_sparse_moe.experts.16.w3", "model.layers.35.block_sparse_moe.experts.17.w3", "model.layers.35.block_sparse_moe.experts.18.w3", "model.layers.35.block_sparse_moe.experts.19.w3", "model.layers.35.block_sparse_moe.experts.20.w3", "model.layers.35.block_sparse_moe.experts.21.w3", "model.layers.35.block_sparse_moe.experts.22.w3", "model.layers.35.block_sparse_moe.experts.23.w3", "model.layers.35.block_sparse_moe.experts.24.w3", "model.layers.35.block_sparse_moe.experts.25.w3", "model.layers.35.block_sparse_moe.experts.26.w3", "model.layers.35.block_sparse_moe.experts.27.w3", "model.layers.35.block_sparse_moe.experts.28.w3", "model.layers.35.block_sparse_moe.experts.29.w3", "model.layers.35.block_sparse_moe.experts.30.w3", "model.layers.35.block_sparse_moe.experts.31.w3", "model.layers.35.block_sparse_moe.experts.32.w3", "model.layers.35.block_sparse_moe.experts.33.w3", "model.layers.35.block_sparse_moe.experts.34.w3", "model.layers.35.block_sparse_moe.experts.35.w3", "model.layers.35.block_sparse_moe.experts.36.w3", "model.layers.35.block_sparse_moe.experts.37.w3", "model.layers.35.block_sparse_moe.experts.38.w3", "model.layers.35.block_sparse_moe.experts.39.w3", "model.layers.35.block_sparse_moe.experts.40.w3", "model.layers.35.block_sparse_moe.experts.41.w3", "model.layers.35.block_sparse_moe.experts.42.w3", "model.layers.35.block_sparse_moe.experts.43.w3", "model.layers.35.block_sparse_moe.experts.44.w3", "model.layers.35.block_sparse_moe.experts.45.w3", "model.layers.35.block_sparse_moe.experts.46.w3", "model.layers.35.block_sparse_moe.experts.47.w3", "model.layers.35.block_sparse_moe.experts.48.w3", "model.layers.35.block_sparse_moe.experts.49.w3", "model.layers.35.block_sparse_moe.experts.50.w3", "model.layers.35.block_sparse_moe.experts.51.w3", "model.layers.35.block_sparse_moe.experts.52.w3", "model.layers.35.block_sparse_moe.experts.53.w3", "model.layers.35.block_sparse_moe.experts.54.w3", "model.layers.35.block_sparse_moe.experts.55.w3", "model.layers.35.block_sparse_moe.experts.56.w3", "model.layers.35.block_sparse_moe.experts.57.w3", "model.layers.35.block_sparse_moe.experts.58.w3", "model.layers.35.block_sparse_moe.experts.59.w3", "model.layers.35.block_sparse_moe.experts.60.w3", "model.layers.35.block_sparse_moe.experts.61.w3", "model.layers.35.block_sparse_moe.experts.62.w3", "model.layers.35.block_sparse_moe.experts.63.w3", "model.layers.35.block_sparse_moe.experts.64.w3", "model.layers.35.block_sparse_moe.experts.65.w3", "model.layers.35.block_sparse_moe.experts.66.w3", "model.layers.35.block_sparse_moe.experts.67.w3", "model.layers.35.block_sparse_moe.experts.68.w3", "model.layers.35.block_sparse_moe.experts.69.w3", "model.layers.35.block_sparse_moe.experts.70.w3", "model.layers.35.block_sparse_moe.experts.71.w3", "model.layers.35.block_sparse_moe.experts.72.w3", "model.layers.35.block_sparse_moe.experts.73.w3", "model.layers.35.block_sparse_moe.experts.74.w3", "model.layers.35.block_sparse_moe.experts.75.w3", "model.layers.35.block_sparse_moe.experts.76.w3", "model.layers.35.block_sparse_moe.experts.77.w3", "model.layers.35.block_sparse_moe.experts.78.w3", "model.layers.35.block_sparse_moe.experts.79.w3", "model.layers.35.block_sparse_moe.experts.80.w3", "model.layers.35.block_sparse_moe.experts.81.w3", "model.layers.35.block_sparse_moe.experts.82.w3", "model.layers.35.block_sparse_moe.experts.83.w3", "model.layers.35.block_sparse_moe.experts.84.w3", "model.layers.35.block_sparse_moe.experts.85.w3", "model.layers.35.block_sparse_moe.experts.86.w3", "model.layers.35.block_sparse_moe.experts.87.w3", "model.layers.35.block_sparse_moe.experts.88.w3", "model.layers.35.block_sparse_moe.experts.89.w3", "model.layers.35.block_sparse_moe.experts.90.w3", "model.layers.35.block_sparse_moe.experts.91.w3", "model.layers.35.block_sparse_moe.experts.92.w3", "model.layers.35.block_sparse_moe.experts.93.w3", "model.layers.35.block_sparse_moe.experts.94.w3", "model.layers.35.block_sparse_moe.experts.95.w3", "model.layers.35.block_sparse_moe.experts.96.w3", "model.layers.35.block_sparse_moe.experts.97.w3", "model.layers.35.block_sparse_moe.experts.98.w3", "model.layers.35.block_sparse_moe.experts.99.w3", "model.layers.35.block_sparse_moe.experts.100.w3", "model.layers.35.block_sparse_moe.experts.101.w3", "model.layers.35.block_sparse_moe.experts.102.w3", "model.layers.35.block_sparse_moe.experts.103.w3", "model.layers.35.block_sparse_moe.experts.104.w3", "model.layers.35.block_sparse_moe.experts.105.w3", "model.layers.35.block_sparse_moe.experts.106.w3", "model.layers.35.block_sparse_moe.experts.107.w3", "model.layers.35.block_sparse_moe.experts.108.w3", "model.layers.35.block_sparse_moe.experts.109.w3", "model.layers.35.block_sparse_moe.experts.110.w3", "model.layers.35.block_sparse_moe.experts.111.w3", "model.layers.35.block_sparse_moe.experts.112.w3", "model.layers.35.block_sparse_moe.experts.113.w3", "model.layers.35.block_sparse_moe.experts.114.w3", "model.layers.35.block_sparse_moe.experts.115.w3", "model.layers.35.block_sparse_moe.experts.116.w3", "model.layers.35.block_sparse_moe.experts.117.w3", "model.layers.35.block_sparse_moe.experts.118.w3", "model.layers.35.block_sparse_moe.experts.119.w3", "model.layers.35.block_sparse_moe.experts.120.w3", "model.layers.35.block_sparse_moe.experts.121.w3", "model.layers.35.block_sparse_moe.experts.122.w3", "model.layers.35.block_sparse_moe.experts.123.w3", "model.layers.35.block_sparse_moe.experts.124.w3", "model.layers.35.block_sparse_moe.experts.125.w3", "model.layers.35.block_sparse_moe.experts.126.w3", "model.layers.35.block_sparse_moe.experts.127.w3", "model.layers.35.block_sparse_moe.experts.128.w3", "model.layers.35.block_sparse_moe.experts.129.w3", "model.layers.35.block_sparse_moe.experts.130.w3", "model.layers.35.block_sparse_moe.experts.131.w3", "model.layers.35.block_sparse_moe.experts.132.w3", "model.layers.35.block_sparse_moe.experts.133.w3", "model.layers.35.block_sparse_moe.experts.134.w3", "model.layers.35.block_sparse_moe.experts.135.w3", "model.layers.35.block_sparse_moe.experts.136.w3", "model.layers.35.block_sparse_moe.experts.137.w3", "model.layers.35.block_sparse_moe.experts.138.w3", "model.layers.35.block_sparse_moe.experts.139.w3", "model.layers.35.block_sparse_moe.experts.140.w3", "model.layers.35.block_sparse_moe.experts.141.w3", "model.layers.35.block_sparse_moe.experts.142.w3", "model.layers.35.block_sparse_moe.experts.143.w3", "model.layers.35.block_sparse_moe.experts.144.w3", "model.layers.35.block_sparse_moe.experts.145.w3", "model.layers.35.block_sparse_moe.experts.146.w3", "model.layers.35.block_sparse_moe.experts.147.w3", "model.layers.35.block_sparse_moe.experts.148.w3", "model.layers.35.block_sparse_moe.experts.149.w3", "model.layers.35.block_sparse_moe.experts.150.w3", "model.layers.35.block_sparse_moe.experts.151.w3", "model.layers.35.block_sparse_moe.experts.152.w3", "model.layers.35.block_sparse_moe.experts.153.w3", "model.layers.35.block_sparse_moe.experts.154.w3", "model.layers.35.block_sparse_moe.experts.155.w3", "model.layers.35.block_sparse_moe.experts.156.w3", "model.layers.35.block_sparse_moe.experts.157.w3", "model.layers.35.block_sparse_moe.experts.158.w3", "model.layers.35.block_sparse_moe.experts.159.w3", "model.layers.35.block_sparse_moe.experts.160.w3", "model.layers.35.block_sparse_moe.experts.161.w3", "model.layers.35.block_sparse_moe.experts.162.w3", "model.layers.35.block_sparse_moe.experts.163.w3", "model.layers.35.block_sparse_moe.experts.164.w3", "model.layers.35.block_sparse_moe.experts.165.w3", "model.layers.35.block_sparse_moe.experts.166.w3", "model.layers.35.block_sparse_moe.experts.167.w3", "model.layers.35.block_sparse_moe.experts.168.w3", "model.layers.35.block_sparse_moe.experts.169.w3", "model.layers.35.block_sparse_moe.experts.170.w3", "model.layers.35.block_sparse_moe.experts.171.w3", "model.layers.35.block_sparse_moe.experts.172.w3", "model.layers.35.block_sparse_moe.experts.173.w3", "model.layers.35.block_sparse_moe.experts.174.w3", "model.layers.35.block_sparse_moe.experts.175.w3", "model.layers.35.block_sparse_moe.experts.176.w3", "model.layers.35.block_sparse_moe.experts.177.w3", "model.layers.35.block_sparse_moe.experts.178.w3", "model.layers.35.block_sparse_moe.experts.179.w3", "model.layers.35.block_sparse_moe.experts.180.w3", "model.layers.35.block_sparse_moe.experts.181.w3", "model.layers.35.block_sparse_moe.experts.182.w3", "model.layers.35.block_sparse_moe.experts.183.w3", "model.layers.35.block_sparse_moe.experts.184.w3", "model.layers.35.block_sparse_moe.experts.185.w3", "model.layers.35.block_sparse_moe.experts.186.w3", "model.layers.35.block_sparse_moe.experts.187.w3", "model.layers.35.block_sparse_moe.experts.188.w3", "model.layers.35.block_sparse_moe.experts.189.w3", "model.layers.35.block_sparse_moe.experts.190.w3", "model.layers.35.block_sparse_moe.experts.191.w3", "model.layers.35.block_sparse_moe.experts.192.w3", "model.layers.35.block_sparse_moe.experts.193.w3", "model.layers.35.block_sparse_moe.experts.194.w3", "model.layers.35.block_sparse_moe.experts.195.w3", "model.layers.35.block_sparse_moe.experts.196.w3", "model.layers.35.block_sparse_moe.experts.197.w3", "model.layers.35.block_sparse_moe.experts.198.w3", "model.layers.35.block_sparse_moe.experts.199.w3", "model.layers.35.block_sparse_moe.experts.200.w3", "model.layers.35.block_sparse_moe.experts.201.w3", "model.layers.35.block_sparse_moe.experts.202.w3", "model.layers.35.block_sparse_moe.experts.203.w3", "model.layers.35.block_sparse_moe.experts.204.w3", "model.layers.35.block_sparse_moe.experts.205.w3", "model.layers.35.block_sparse_moe.experts.206.w3", "model.layers.35.block_sparse_moe.experts.207.w3", "model.layers.35.block_sparse_moe.experts.208.w3", "model.layers.35.block_sparse_moe.experts.209.w3", "model.layers.35.block_sparse_moe.experts.210.w3", "model.layers.35.block_sparse_moe.experts.211.w3", "model.layers.35.block_sparse_moe.experts.212.w3", "model.layers.35.block_sparse_moe.experts.213.w3", "model.layers.35.block_sparse_moe.experts.214.w3", "model.layers.35.block_sparse_moe.experts.215.w3", "model.layers.35.block_sparse_moe.experts.216.w3", "model.layers.35.block_sparse_moe.experts.217.w3", "model.layers.35.block_sparse_moe.experts.218.w3", "model.layers.35.block_sparse_moe.experts.219.w3", "model.layers.35.block_sparse_moe.experts.220.w3", "model.layers.35.block_sparse_moe.experts.221.w3", "model.layers.35.block_sparse_moe.experts.222.w3", "model.layers.35.block_sparse_moe.experts.223.w3", "model.layers.35.block_sparse_moe.experts.224.w3", "model.layers.35.block_sparse_moe.experts.225.w3", "model.layers.35.block_sparse_moe.experts.226.w3", "model.layers.35.block_sparse_moe.experts.227.w3", "model.layers.35.block_sparse_moe.experts.228.w3", "model.layers.35.block_sparse_moe.experts.229.w3", "model.layers.35.block_sparse_moe.experts.230.w3", "model.layers.35.block_sparse_moe.experts.231.w3", "model.layers.35.block_sparse_moe.experts.232.w3", "model.layers.35.block_sparse_moe.experts.233.w3", "model.layers.35.block_sparse_moe.experts.234.w3", "model.layers.35.block_sparse_moe.experts.235.w3", "model.layers.35.block_sparse_moe.experts.236.w3", "model.layers.35.block_sparse_moe.experts.237.w3", "model.layers.35.block_sparse_moe.experts.238.w3", "model.layers.35.block_sparse_moe.experts.239.w3", "model.layers.35.block_sparse_moe.experts.240.w3", "model.layers.35.block_sparse_moe.experts.241.w3", "model.layers.35.block_sparse_moe.experts.242.w3", "model.layers.35.block_sparse_moe.experts.243.w3", "model.layers.35.block_sparse_moe.experts.244.w3", "model.layers.35.block_sparse_moe.experts.245.w3", "model.layers.35.block_sparse_moe.experts.246.w3", "model.layers.35.block_sparse_moe.experts.247.w3", "model.layers.35.block_sparse_moe.experts.248.w3", "model.layers.35.block_sparse_moe.experts.249.w3", "model.layers.35.block_sparse_moe.experts.250.w3", "model.layers.35.block_sparse_moe.experts.251.w3", "model.layers.35.block_sparse_moe.experts.252.w3", "model.layers.35.block_sparse_moe.experts.253.w3", "model.layers.35.block_sparse_moe.experts.254.w3", "model.layers.35.block_sparse_moe.experts.255.w3", "model.layers.35.block_sparse_moe.experts.0.w2", "model.layers.35.block_sparse_moe.experts.1.w2", "model.layers.35.block_sparse_moe.experts.2.w2", "model.layers.35.block_sparse_moe.experts.3.w2", "model.layers.35.block_sparse_moe.experts.4.w2", "model.layers.35.block_sparse_moe.experts.5.w2", "model.layers.35.block_sparse_moe.experts.6.w2", "model.layers.35.block_sparse_moe.experts.7.w2", "model.layers.35.block_sparse_moe.experts.8.w2", "model.layers.35.block_sparse_moe.experts.9.w2", "model.layers.35.block_sparse_moe.experts.10.w2", "model.layers.35.block_sparse_moe.experts.11.w2", "model.layers.35.block_sparse_moe.experts.12.w2", "model.layers.35.block_sparse_moe.experts.13.w2", "model.layers.35.block_sparse_moe.experts.14.w2", "model.layers.35.block_sparse_moe.experts.15.w2", "model.layers.35.block_sparse_moe.experts.16.w2", "model.layers.35.block_sparse_moe.experts.17.w2", "model.layers.35.block_sparse_moe.experts.18.w2", "model.layers.35.block_sparse_moe.experts.19.w2", "model.layers.35.block_sparse_moe.experts.20.w2", "model.layers.35.block_sparse_moe.experts.21.w2", "model.layers.35.block_sparse_moe.experts.22.w2", "model.layers.35.block_sparse_moe.experts.23.w2", "model.layers.35.block_sparse_moe.experts.24.w2", "model.layers.35.block_sparse_moe.experts.25.w2", "model.layers.35.block_sparse_moe.experts.26.w2", "model.layers.35.block_sparse_moe.experts.27.w2", "model.layers.35.block_sparse_moe.experts.28.w2", "model.layers.35.block_sparse_moe.experts.29.w2", "model.layers.35.block_sparse_moe.experts.30.w2", "model.layers.35.block_sparse_moe.experts.31.w2", "model.layers.35.block_sparse_moe.experts.32.w2", "model.layers.35.block_sparse_moe.experts.33.w2", "model.layers.35.block_sparse_moe.experts.34.w2", "model.layers.35.block_sparse_moe.experts.35.w2", "model.layers.35.block_sparse_moe.experts.36.w2", "model.layers.35.block_sparse_moe.experts.37.w2", "model.layers.35.block_sparse_moe.experts.38.w2", "model.layers.35.block_sparse_moe.experts.39.w2", "model.layers.35.block_sparse_moe.experts.40.w2", "model.layers.35.block_sparse_moe.experts.41.w2", "model.layers.35.block_sparse_moe.experts.42.w2", "model.layers.35.block_sparse_moe.experts.43.w2", "model.layers.35.block_sparse_moe.experts.44.w2", "model.layers.35.block_sparse_moe.experts.45.w2", "model.layers.35.block_sparse_moe.experts.46.w2", "model.layers.35.block_sparse_moe.experts.47.w2", "model.layers.35.block_sparse_moe.experts.48.w2", "model.layers.35.block_sparse_moe.experts.49.w2", "model.layers.35.block_sparse_moe.experts.50.w2", "model.layers.35.block_sparse_moe.experts.51.w2", "model.layers.35.block_sparse_moe.experts.52.w2", "model.layers.35.block_sparse_moe.experts.53.w2", "model.layers.35.block_sparse_moe.experts.54.w2", "model.layers.35.block_sparse_moe.experts.55.w2", "model.layers.35.block_sparse_moe.experts.56.w2", "model.layers.35.block_sparse_moe.experts.57.w2", "model.layers.35.block_sparse_moe.experts.58.w2", "model.layers.35.block_sparse_moe.experts.59.w2", "model.layers.35.block_sparse_moe.experts.60.w2", "model.layers.35.block_sparse_moe.experts.61.w2", "model.layers.35.block_sparse_moe.experts.62.w2", "model.layers.35.block_sparse_moe.experts.63.w2", "model.layers.35.block_sparse_moe.experts.64.w2", "model.layers.35.block_sparse_moe.experts.65.w2", "model.layers.35.block_sparse_moe.experts.66.w2", "model.layers.35.block_sparse_moe.experts.67.w2", "model.layers.35.block_sparse_moe.experts.68.w2", "model.layers.35.block_sparse_moe.experts.69.w2", "model.layers.35.block_sparse_moe.experts.70.w2", "model.layers.35.block_sparse_moe.experts.71.w2", "model.layers.35.block_sparse_moe.experts.72.w2", "model.layers.35.block_sparse_moe.experts.73.w2", "model.layers.35.block_sparse_moe.experts.74.w2", "model.layers.35.block_sparse_moe.experts.75.w2", "model.layers.35.block_sparse_moe.experts.76.w2", "model.layers.35.block_sparse_moe.experts.77.w2", "model.layers.35.block_sparse_moe.experts.78.w2", "model.layers.35.block_sparse_moe.experts.79.w2", "model.layers.35.block_sparse_moe.experts.80.w2", "model.layers.35.block_sparse_moe.experts.81.w2", "model.layers.35.block_sparse_moe.experts.82.w2", "model.layers.35.block_sparse_moe.experts.83.w2", "model.layers.35.block_sparse_moe.experts.84.w2", "model.layers.35.block_sparse_moe.experts.85.w2", "model.layers.35.block_sparse_moe.experts.86.w2", "model.layers.35.block_sparse_moe.experts.87.w2", "model.layers.35.block_sparse_moe.experts.88.w2", "model.layers.35.block_sparse_moe.experts.89.w2", "model.layers.35.block_sparse_moe.experts.90.w2", "model.layers.35.block_sparse_moe.experts.91.w2", "model.layers.35.block_sparse_moe.experts.92.w2", "model.layers.35.block_sparse_moe.experts.93.w2", "model.layers.35.block_sparse_moe.experts.94.w2", "model.layers.35.block_sparse_moe.experts.95.w2", "model.layers.35.block_sparse_moe.experts.96.w2", "model.layers.35.block_sparse_moe.experts.97.w2", "model.layers.35.block_sparse_moe.experts.98.w2", "model.layers.35.block_sparse_moe.experts.99.w2", "model.layers.35.block_sparse_moe.experts.100.w2", "model.layers.35.block_sparse_moe.experts.101.w2", "model.layers.35.block_sparse_moe.experts.102.w2", "model.layers.35.block_sparse_moe.experts.103.w2", "model.layers.35.block_sparse_moe.experts.104.w2", "model.layers.35.block_sparse_moe.experts.105.w2", "model.layers.35.block_sparse_moe.experts.106.w2", "model.layers.35.block_sparse_moe.experts.107.w2", "model.layers.35.block_sparse_moe.experts.108.w2", "model.layers.35.block_sparse_moe.experts.109.w2", "model.layers.35.block_sparse_moe.experts.110.w2", "model.layers.35.block_sparse_moe.experts.111.w2", "model.layers.35.block_sparse_moe.experts.112.w2", "model.layers.35.block_sparse_moe.experts.113.w2", "model.layers.35.block_sparse_moe.experts.114.w2", "model.layers.35.block_sparse_moe.experts.115.w2", "model.layers.35.block_sparse_moe.experts.116.w2", "model.layers.35.block_sparse_moe.experts.117.w2", "model.layers.35.block_sparse_moe.experts.118.w2", "model.layers.35.block_sparse_moe.experts.119.w2", "model.layers.35.block_sparse_moe.experts.120.w2", "model.layers.35.block_sparse_moe.experts.121.w2", "model.layers.35.block_sparse_moe.experts.122.w2", "model.layers.35.block_sparse_moe.experts.123.w2", "model.layers.35.block_sparse_moe.experts.124.w2", "model.layers.35.block_sparse_moe.experts.125.w2", "model.layers.35.block_sparse_moe.experts.126.w2", "model.layers.35.block_sparse_moe.experts.127.w2", "model.layers.35.block_sparse_moe.experts.128.w2", "model.layers.35.block_sparse_moe.experts.129.w2", "model.layers.35.block_sparse_moe.experts.130.w2", "model.layers.35.block_sparse_moe.experts.131.w2", "model.layers.35.block_sparse_moe.experts.132.w2", "model.layers.35.block_sparse_moe.experts.133.w2", "model.layers.35.block_sparse_moe.experts.134.w2", "model.layers.35.block_sparse_moe.experts.135.w2", "model.layers.35.block_sparse_moe.experts.136.w2", "model.layers.35.block_sparse_moe.experts.137.w2", "model.layers.35.block_sparse_moe.experts.138.w2", "model.layers.35.block_sparse_moe.experts.139.w2", "model.layers.35.block_sparse_moe.experts.140.w2", "model.layers.35.block_sparse_moe.experts.141.w2", "model.layers.35.block_sparse_moe.experts.142.w2", "model.layers.35.block_sparse_moe.experts.143.w2", "model.layers.35.block_sparse_moe.experts.144.w2", "model.layers.35.block_sparse_moe.experts.145.w2", "model.layers.35.block_sparse_moe.experts.146.w2", "model.layers.35.block_sparse_moe.experts.147.w2", "model.layers.35.block_sparse_moe.experts.148.w2", "model.layers.35.block_sparse_moe.experts.149.w2", "model.layers.35.block_sparse_moe.experts.150.w2", "model.layers.35.block_sparse_moe.experts.151.w2", "model.layers.35.block_sparse_moe.experts.152.w2", "model.layers.35.block_sparse_moe.experts.153.w2", "model.layers.35.block_sparse_moe.experts.154.w2", "model.layers.35.block_sparse_moe.experts.155.w2", "model.layers.35.block_sparse_moe.experts.156.w2", "model.layers.35.block_sparse_moe.experts.157.w2", "model.layers.35.block_sparse_moe.experts.158.w2", "model.layers.35.block_sparse_moe.experts.159.w2", "model.layers.35.block_sparse_moe.experts.160.w2", "model.layers.35.block_sparse_moe.experts.161.w2", "model.layers.35.block_sparse_moe.experts.162.w2", "model.layers.35.block_sparse_moe.experts.163.w2", "model.layers.35.block_sparse_moe.experts.164.w2", "model.layers.35.block_sparse_moe.experts.165.w2", "model.layers.35.block_sparse_moe.experts.166.w2", "model.layers.35.block_sparse_moe.experts.167.w2", "model.layers.35.block_sparse_moe.experts.168.w2", "model.layers.35.block_sparse_moe.experts.169.w2", "model.layers.35.block_sparse_moe.experts.170.w2", "model.layers.35.block_sparse_moe.experts.171.w2", "model.layers.35.block_sparse_moe.experts.172.w2", "model.layers.35.block_sparse_moe.experts.173.w2", "model.layers.35.block_sparse_moe.experts.174.w2", "model.layers.35.block_sparse_moe.experts.175.w2", "model.layers.35.block_sparse_moe.experts.176.w2", "model.layers.35.block_sparse_moe.experts.177.w2", "model.layers.35.block_sparse_moe.experts.178.w2", "model.layers.35.block_sparse_moe.experts.179.w2", "model.layers.35.block_sparse_moe.experts.180.w2", "model.layers.35.block_sparse_moe.experts.181.w2", "model.layers.35.block_sparse_moe.experts.182.w2", "model.layers.35.block_sparse_moe.experts.183.w2", "model.layers.35.block_sparse_moe.experts.184.w2", "model.layers.35.block_sparse_moe.experts.185.w2", "model.layers.35.block_sparse_moe.experts.186.w2", "model.layers.35.block_sparse_moe.experts.187.w2", "model.layers.35.block_sparse_moe.experts.188.w2", "model.layers.35.block_sparse_moe.experts.189.w2", "model.layers.35.block_sparse_moe.experts.190.w2", "model.layers.35.block_sparse_moe.experts.191.w2", "model.layers.35.block_sparse_moe.experts.192.w2", "model.layers.35.block_sparse_moe.experts.193.w2", "model.layers.35.block_sparse_moe.experts.194.w2", "model.layers.35.block_sparse_moe.experts.195.w2", "model.layers.35.block_sparse_moe.experts.196.w2", "model.layers.35.block_sparse_moe.experts.197.w2", "model.layers.35.block_sparse_moe.experts.198.w2", "model.layers.35.block_sparse_moe.experts.199.w2", "model.layers.35.block_sparse_moe.experts.200.w2", "model.layers.35.block_sparse_moe.experts.201.w2", "model.layers.35.block_sparse_moe.experts.202.w2", "model.layers.35.block_sparse_moe.experts.203.w2", "model.layers.35.block_sparse_moe.experts.204.w2", "model.layers.35.block_sparse_moe.experts.205.w2", "model.layers.35.block_sparse_moe.experts.206.w2", "model.layers.35.block_sparse_moe.experts.207.w2", "model.layers.35.block_sparse_moe.experts.208.w2", "model.layers.35.block_sparse_moe.experts.209.w2", "model.layers.35.block_sparse_moe.experts.210.w2", "model.layers.35.block_sparse_moe.experts.211.w2", "model.layers.35.block_sparse_moe.experts.212.w2", "model.layers.35.block_sparse_moe.experts.213.w2", "model.layers.35.block_sparse_moe.experts.214.w2", "model.layers.35.block_sparse_moe.experts.215.w2", "model.layers.35.block_sparse_moe.experts.216.w2", "model.layers.35.block_sparse_moe.experts.217.w2", "model.layers.35.block_sparse_moe.experts.218.w2", "model.layers.35.block_sparse_moe.experts.219.w2", "model.layers.35.block_sparse_moe.experts.220.w2", "model.layers.35.block_sparse_moe.experts.221.w2", "model.layers.35.block_sparse_moe.experts.222.w2", "model.layers.35.block_sparse_moe.experts.223.w2", "model.layers.35.block_sparse_moe.experts.224.w2", "model.layers.35.block_sparse_moe.experts.225.w2", "model.layers.35.block_sparse_moe.experts.226.w2", "model.layers.35.block_sparse_moe.experts.227.w2", "model.layers.35.block_sparse_moe.experts.228.w2", "model.layers.35.block_sparse_moe.experts.229.w2", "model.layers.35.block_sparse_moe.experts.230.w2", "model.layers.35.block_sparse_moe.experts.231.w2", "model.layers.35.block_sparse_moe.experts.232.w2", "model.layers.35.block_sparse_moe.experts.233.w2", "model.layers.35.block_sparse_moe.experts.234.w2", "model.layers.35.block_sparse_moe.experts.235.w2", "model.layers.35.block_sparse_moe.experts.236.w2", "model.layers.35.block_sparse_moe.experts.237.w2", "model.layers.35.block_sparse_moe.experts.238.w2", "model.layers.35.block_sparse_moe.experts.239.w2", "model.layers.35.block_sparse_moe.experts.240.w2", "model.layers.35.block_sparse_moe.experts.241.w2", "model.layers.35.block_sparse_moe.experts.242.w2", "model.layers.35.block_sparse_moe.experts.243.w2", "model.layers.35.block_sparse_moe.experts.244.w2", "model.layers.35.block_sparse_moe.experts.245.w2", "model.layers.35.block_sparse_moe.experts.246.w2", "model.layers.35.block_sparse_moe.experts.247.w2", "model.layers.35.block_sparse_moe.experts.248.w2", "model.layers.35.block_sparse_moe.experts.249.w2", "model.layers.35.block_sparse_moe.experts.250.w2", "model.layers.35.block_sparse_moe.experts.251.w2", "model.layers.35.block_sparse_moe.experts.252.w2", "model.layers.35.block_sparse_moe.experts.253.w2", "model.layers.35.block_sparse_moe.experts.254.w2", "model.layers.35.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00085516236722466, "dbits": 3623878656 } ] }, { "idx": 72, "layers": [ "model.layers.36.self_attn.q_proj", "model.layers.36.self_attn.k_proj", "model.layers.36.self_attn.v_proj", "model.layers.36.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0026279956102371327, "dbits": 44040192 } ] }, { "idx": 73, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w1", "model.layers.36.block_sparse_moe.experts.1.w1", "model.layers.36.block_sparse_moe.experts.2.w1", "model.layers.36.block_sparse_moe.experts.3.w1", "model.layers.36.block_sparse_moe.experts.4.w1", "model.layers.36.block_sparse_moe.experts.5.w1", "model.layers.36.block_sparse_moe.experts.6.w1", "model.layers.36.block_sparse_moe.experts.7.w1", "model.layers.36.block_sparse_moe.experts.8.w1", "model.layers.36.block_sparse_moe.experts.9.w1", "model.layers.36.block_sparse_moe.experts.10.w1", "model.layers.36.block_sparse_moe.experts.11.w1", "model.layers.36.block_sparse_moe.experts.12.w1", "model.layers.36.block_sparse_moe.experts.13.w1", "model.layers.36.block_sparse_moe.experts.14.w1", "model.layers.36.block_sparse_moe.experts.15.w1", "model.layers.36.block_sparse_moe.experts.16.w1", "model.layers.36.block_sparse_moe.experts.17.w1", "model.layers.36.block_sparse_moe.experts.18.w1", "model.layers.36.block_sparse_moe.experts.19.w1", "model.layers.36.block_sparse_moe.experts.20.w1", "model.layers.36.block_sparse_moe.experts.21.w1", "model.layers.36.block_sparse_moe.experts.22.w1", "model.layers.36.block_sparse_moe.experts.23.w1", "model.layers.36.block_sparse_moe.experts.24.w1", "model.layers.36.block_sparse_moe.experts.25.w1", "model.layers.36.block_sparse_moe.experts.26.w1", "model.layers.36.block_sparse_moe.experts.27.w1", "model.layers.36.block_sparse_moe.experts.28.w1", "model.layers.36.block_sparse_moe.experts.29.w1", "model.layers.36.block_sparse_moe.experts.30.w1", "model.layers.36.block_sparse_moe.experts.31.w1", "model.layers.36.block_sparse_moe.experts.32.w1", "model.layers.36.block_sparse_moe.experts.33.w1", "model.layers.36.block_sparse_moe.experts.34.w1", "model.layers.36.block_sparse_moe.experts.35.w1", "model.layers.36.block_sparse_moe.experts.36.w1", "model.layers.36.block_sparse_moe.experts.37.w1", "model.layers.36.block_sparse_moe.experts.38.w1", "model.layers.36.block_sparse_moe.experts.39.w1", "model.layers.36.block_sparse_moe.experts.40.w1", "model.layers.36.block_sparse_moe.experts.41.w1", "model.layers.36.block_sparse_moe.experts.42.w1", "model.layers.36.block_sparse_moe.experts.43.w1", "model.layers.36.block_sparse_moe.experts.44.w1", "model.layers.36.block_sparse_moe.experts.45.w1", "model.layers.36.block_sparse_moe.experts.46.w1", "model.layers.36.block_sparse_moe.experts.47.w1", "model.layers.36.block_sparse_moe.experts.48.w1", "model.layers.36.block_sparse_moe.experts.49.w1", "model.layers.36.block_sparse_moe.experts.50.w1", "model.layers.36.block_sparse_moe.experts.51.w1", "model.layers.36.block_sparse_moe.experts.52.w1", "model.layers.36.block_sparse_moe.experts.53.w1", "model.layers.36.block_sparse_moe.experts.54.w1", "model.layers.36.block_sparse_moe.experts.55.w1", "model.layers.36.block_sparse_moe.experts.56.w1", "model.layers.36.block_sparse_moe.experts.57.w1", "model.layers.36.block_sparse_moe.experts.58.w1", "model.layers.36.block_sparse_moe.experts.59.w1", "model.layers.36.block_sparse_moe.experts.60.w1", "model.layers.36.block_sparse_moe.experts.61.w1", "model.layers.36.block_sparse_moe.experts.62.w1", "model.layers.36.block_sparse_moe.experts.63.w1", "model.layers.36.block_sparse_moe.experts.64.w1", "model.layers.36.block_sparse_moe.experts.65.w1", "model.layers.36.block_sparse_moe.experts.66.w1", "model.layers.36.block_sparse_moe.experts.67.w1", "model.layers.36.block_sparse_moe.experts.68.w1", "model.layers.36.block_sparse_moe.experts.69.w1", "model.layers.36.block_sparse_moe.experts.70.w1", "model.layers.36.block_sparse_moe.experts.71.w1", "model.layers.36.block_sparse_moe.experts.72.w1", "model.layers.36.block_sparse_moe.experts.73.w1", "model.layers.36.block_sparse_moe.experts.74.w1", "model.layers.36.block_sparse_moe.experts.75.w1", "model.layers.36.block_sparse_moe.experts.76.w1", "model.layers.36.block_sparse_moe.experts.77.w1", "model.layers.36.block_sparse_moe.experts.78.w1", "model.layers.36.block_sparse_moe.experts.79.w1", "model.layers.36.block_sparse_moe.experts.80.w1", "model.layers.36.block_sparse_moe.experts.81.w1", "model.layers.36.block_sparse_moe.experts.82.w1", "model.layers.36.block_sparse_moe.experts.83.w1", "model.layers.36.block_sparse_moe.experts.84.w1", "model.layers.36.block_sparse_moe.experts.85.w1", "model.layers.36.block_sparse_moe.experts.86.w1", "model.layers.36.block_sparse_moe.experts.87.w1", "model.layers.36.block_sparse_moe.experts.88.w1", "model.layers.36.block_sparse_moe.experts.89.w1", "model.layers.36.block_sparse_moe.experts.90.w1", "model.layers.36.block_sparse_moe.experts.91.w1", "model.layers.36.block_sparse_moe.experts.92.w1", "model.layers.36.block_sparse_moe.experts.93.w1", "model.layers.36.block_sparse_moe.experts.94.w1", "model.layers.36.block_sparse_moe.experts.95.w1", "model.layers.36.block_sparse_moe.experts.96.w1", "model.layers.36.block_sparse_moe.experts.97.w1", "model.layers.36.block_sparse_moe.experts.98.w1", "model.layers.36.block_sparse_moe.experts.99.w1", "model.layers.36.block_sparse_moe.experts.100.w1", "model.layers.36.block_sparse_moe.experts.101.w1", "model.layers.36.block_sparse_moe.experts.102.w1", "model.layers.36.block_sparse_moe.experts.103.w1", "model.layers.36.block_sparse_moe.experts.104.w1", "model.layers.36.block_sparse_moe.experts.105.w1", "model.layers.36.block_sparse_moe.experts.106.w1", "model.layers.36.block_sparse_moe.experts.107.w1", "model.layers.36.block_sparse_moe.experts.108.w1", "model.layers.36.block_sparse_moe.experts.109.w1", "model.layers.36.block_sparse_moe.experts.110.w1", "model.layers.36.block_sparse_moe.experts.111.w1", "model.layers.36.block_sparse_moe.experts.112.w1", "model.layers.36.block_sparse_moe.experts.113.w1", "model.layers.36.block_sparse_moe.experts.114.w1", "model.layers.36.block_sparse_moe.experts.115.w1", "model.layers.36.block_sparse_moe.experts.116.w1", "model.layers.36.block_sparse_moe.experts.117.w1", "model.layers.36.block_sparse_moe.experts.118.w1", "model.layers.36.block_sparse_moe.experts.119.w1", "model.layers.36.block_sparse_moe.experts.120.w1", "model.layers.36.block_sparse_moe.experts.121.w1", "model.layers.36.block_sparse_moe.experts.122.w1", "model.layers.36.block_sparse_moe.experts.123.w1", "model.layers.36.block_sparse_moe.experts.124.w1", "model.layers.36.block_sparse_moe.experts.125.w1", "model.layers.36.block_sparse_moe.experts.126.w1", "model.layers.36.block_sparse_moe.experts.127.w1", "model.layers.36.block_sparse_moe.experts.128.w1", "model.layers.36.block_sparse_moe.experts.129.w1", "model.layers.36.block_sparse_moe.experts.130.w1", "model.layers.36.block_sparse_moe.experts.131.w1", "model.layers.36.block_sparse_moe.experts.132.w1", "model.layers.36.block_sparse_moe.experts.133.w1", "model.layers.36.block_sparse_moe.experts.134.w1", "model.layers.36.block_sparse_moe.experts.135.w1", "model.layers.36.block_sparse_moe.experts.136.w1", "model.layers.36.block_sparse_moe.experts.137.w1", "model.layers.36.block_sparse_moe.experts.138.w1", "model.layers.36.block_sparse_moe.experts.139.w1", "model.layers.36.block_sparse_moe.experts.140.w1", "model.layers.36.block_sparse_moe.experts.141.w1", "model.layers.36.block_sparse_moe.experts.142.w1", "model.layers.36.block_sparse_moe.experts.143.w1", "model.layers.36.block_sparse_moe.experts.144.w1", "model.layers.36.block_sparse_moe.experts.145.w1", "model.layers.36.block_sparse_moe.experts.146.w1", "model.layers.36.block_sparse_moe.experts.147.w1", "model.layers.36.block_sparse_moe.experts.148.w1", "model.layers.36.block_sparse_moe.experts.149.w1", "model.layers.36.block_sparse_moe.experts.150.w1", "model.layers.36.block_sparse_moe.experts.151.w1", "model.layers.36.block_sparse_moe.experts.152.w1", "model.layers.36.block_sparse_moe.experts.153.w1", "model.layers.36.block_sparse_moe.experts.154.w1", "model.layers.36.block_sparse_moe.experts.155.w1", "model.layers.36.block_sparse_moe.experts.156.w1", "model.layers.36.block_sparse_moe.experts.157.w1", "model.layers.36.block_sparse_moe.experts.158.w1", "model.layers.36.block_sparse_moe.experts.159.w1", "model.layers.36.block_sparse_moe.experts.160.w1", "model.layers.36.block_sparse_moe.experts.161.w1", "model.layers.36.block_sparse_moe.experts.162.w1", "model.layers.36.block_sparse_moe.experts.163.w1", "model.layers.36.block_sparse_moe.experts.164.w1", "model.layers.36.block_sparse_moe.experts.165.w1", "model.layers.36.block_sparse_moe.experts.166.w1", "model.layers.36.block_sparse_moe.experts.167.w1", "model.layers.36.block_sparse_moe.experts.168.w1", "model.layers.36.block_sparse_moe.experts.169.w1", "model.layers.36.block_sparse_moe.experts.170.w1", "model.layers.36.block_sparse_moe.experts.171.w1", "model.layers.36.block_sparse_moe.experts.172.w1", "model.layers.36.block_sparse_moe.experts.173.w1", "model.layers.36.block_sparse_moe.experts.174.w1", "model.layers.36.block_sparse_moe.experts.175.w1", "model.layers.36.block_sparse_moe.experts.176.w1", "model.layers.36.block_sparse_moe.experts.177.w1", "model.layers.36.block_sparse_moe.experts.178.w1", "model.layers.36.block_sparse_moe.experts.179.w1", "model.layers.36.block_sparse_moe.experts.180.w1", "model.layers.36.block_sparse_moe.experts.181.w1", "model.layers.36.block_sparse_moe.experts.182.w1", "model.layers.36.block_sparse_moe.experts.183.w1", "model.layers.36.block_sparse_moe.experts.184.w1", "model.layers.36.block_sparse_moe.experts.185.w1", "model.layers.36.block_sparse_moe.experts.186.w1", "model.layers.36.block_sparse_moe.experts.187.w1", "model.layers.36.block_sparse_moe.experts.188.w1", "model.layers.36.block_sparse_moe.experts.189.w1", "model.layers.36.block_sparse_moe.experts.190.w1", "model.layers.36.block_sparse_moe.experts.191.w1", "model.layers.36.block_sparse_moe.experts.192.w1", "model.layers.36.block_sparse_moe.experts.193.w1", "model.layers.36.block_sparse_moe.experts.194.w1", "model.layers.36.block_sparse_moe.experts.195.w1", "model.layers.36.block_sparse_moe.experts.196.w1", "model.layers.36.block_sparse_moe.experts.197.w1", "model.layers.36.block_sparse_moe.experts.198.w1", "model.layers.36.block_sparse_moe.experts.199.w1", "model.layers.36.block_sparse_moe.experts.200.w1", "model.layers.36.block_sparse_moe.experts.201.w1", "model.layers.36.block_sparse_moe.experts.202.w1", "model.layers.36.block_sparse_moe.experts.203.w1", "model.layers.36.block_sparse_moe.experts.204.w1", "model.layers.36.block_sparse_moe.experts.205.w1", "model.layers.36.block_sparse_moe.experts.206.w1", "model.layers.36.block_sparse_moe.experts.207.w1", "model.layers.36.block_sparse_moe.experts.208.w1", "model.layers.36.block_sparse_moe.experts.209.w1", "model.layers.36.block_sparse_moe.experts.210.w1", "model.layers.36.block_sparse_moe.experts.211.w1", "model.layers.36.block_sparse_moe.experts.212.w1", "model.layers.36.block_sparse_moe.experts.213.w1", "model.layers.36.block_sparse_moe.experts.214.w1", "model.layers.36.block_sparse_moe.experts.215.w1", "model.layers.36.block_sparse_moe.experts.216.w1", "model.layers.36.block_sparse_moe.experts.217.w1", "model.layers.36.block_sparse_moe.experts.218.w1", "model.layers.36.block_sparse_moe.experts.219.w1", "model.layers.36.block_sparse_moe.experts.220.w1", "model.layers.36.block_sparse_moe.experts.221.w1", "model.layers.36.block_sparse_moe.experts.222.w1", "model.layers.36.block_sparse_moe.experts.223.w1", "model.layers.36.block_sparse_moe.experts.224.w1", "model.layers.36.block_sparse_moe.experts.225.w1", "model.layers.36.block_sparse_moe.experts.226.w1", "model.layers.36.block_sparse_moe.experts.227.w1", "model.layers.36.block_sparse_moe.experts.228.w1", "model.layers.36.block_sparse_moe.experts.229.w1", "model.layers.36.block_sparse_moe.experts.230.w1", "model.layers.36.block_sparse_moe.experts.231.w1", "model.layers.36.block_sparse_moe.experts.232.w1", "model.layers.36.block_sparse_moe.experts.233.w1", "model.layers.36.block_sparse_moe.experts.234.w1", "model.layers.36.block_sparse_moe.experts.235.w1", "model.layers.36.block_sparse_moe.experts.236.w1", "model.layers.36.block_sparse_moe.experts.237.w1", "model.layers.36.block_sparse_moe.experts.238.w1", "model.layers.36.block_sparse_moe.experts.239.w1", "model.layers.36.block_sparse_moe.experts.240.w1", "model.layers.36.block_sparse_moe.experts.241.w1", "model.layers.36.block_sparse_moe.experts.242.w1", "model.layers.36.block_sparse_moe.experts.243.w1", "model.layers.36.block_sparse_moe.experts.244.w1", "model.layers.36.block_sparse_moe.experts.245.w1", "model.layers.36.block_sparse_moe.experts.246.w1", "model.layers.36.block_sparse_moe.experts.247.w1", "model.layers.36.block_sparse_moe.experts.248.w1", "model.layers.36.block_sparse_moe.experts.249.w1", "model.layers.36.block_sparse_moe.experts.250.w1", "model.layers.36.block_sparse_moe.experts.251.w1", "model.layers.36.block_sparse_moe.experts.252.w1", "model.layers.36.block_sparse_moe.experts.253.w1", "model.layers.36.block_sparse_moe.experts.254.w1", "model.layers.36.block_sparse_moe.experts.255.w1", "model.layers.36.block_sparse_moe.experts.0.w3", "model.layers.36.block_sparse_moe.experts.1.w3", "model.layers.36.block_sparse_moe.experts.2.w3", "model.layers.36.block_sparse_moe.experts.3.w3", "model.layers.36.block_sparse_moe.experts.4.w3", "model.layers.36.block_sparse_moe.experts.5.w3", "model.layers.36.block_sparse_moe.experts.6.w3", "model.layers.36.block_sparse_moe.experts.7.w3", "model.layers.36.block_sparse_moe.experts.8.w3", "model.layers.36.block_sparse_moe.experts.9.w3", "model.layers.36.block_sparse_moe.experts.10.w3", "model.layers.36.block_sparse_moe.experts.11.w3", "model.layers.36.block_sparse_moe.experts.12.w3", "model.layers.36.block_sparse_moe.experts.13.w3", "model.layers.36.block_sparse_moe.experts.14.w3", "model.layers.36.block_sparse_moe.experts.15.w3", "model.layers.36.block_sparse_moe.experts.16.w3", "model.layers.36.block_sparse_moe.experts.17.w3", "model.layers.36.block_sparse_moe.experts.18.w3", "model.layers.36.block_sparse_moe.experts.19.w3", "model.layers.36.block_sparse_moe.experts.20.w3", "model.layers.36.block_sparse_moe.experts.21.w3", "model.layers.36.block_sparse_moe.experts.22.w3", "model.layers.36.block_sparse_moe.experts.23.w3", "model.layers.36.block_sparse_moe.experts.24.w3", "model.layers.36.block_sparse_moe.experts.25.w3", "model.layers.36.block_sparse_moe.experts.26.w3", "model.layers.36.block_sparse_moe.experts.27.w3", "model.layers.36.block_sparse_moe.experts.28.w3", "model.layers.36.block_sparse_moe.experts.29.w3", "model.layers.36.block_sparse_moe.experts.30.w3", "model.layers.36.block_sparse_moe.experts.31.w3", "model.layers.36.block_sparse_moe.experts.32.w3", "model.layers.36.block_sparse_moe.experts.33.w3", "model.layers.36.block_sparse_moe.experts.34.w3", "model.layers.36.block_sparse_moe.experts.35.w3", "model.layers.36.block_sparse_moe.experts.36.w3", "model.layers.36.block_sparse_moe.experts.37.w3", "model.layers.36.block_sparse_moe.experts.38.w3", "model.layers.36.block_sparse_moe.experts.39.w3", "model.layers.36.block_sparse_moe.experts.40.w3", "model.layers.36.block_sparse_moe.experts.41.w3", "model.layers.36.block_sparse_moe.experts.42.w3", "model.layers.36.block_sparse_moe.experts.43.w3", "model.layers.36.block_sparse_moe.experts.44.w3", "model.layers.36.block_sparse_moe.experts.45.w3", "model.layers.36.block_sparse_moe.experts.46.w3", "model.layers.36.block_sparse_moe.experts.47.w3", "model.layers.36.block_sparse_moe.experts.48.w3", "model.layers.36.block_sparse_moe.experts.49.w3", "model.layers.36.block_sparse_moe.experts.50.w3", "model.layers.36.block_sparse_moe.experts.51.w3", "model.layers.36.block_sparse_moe.experts.52.w3", "model.layers.36.block_sparse_moe.experts.53.w3", "model.layers.36.block_sparse_moe.experts.54.w3", "model.layers.36.block_sparse_moe.experts.55.w3", "model.layers.36.block_sparse_moe.experts.56.w3", "model.layers.36.block_sparse_moe.experts.57.w3", "model.layers.36.block_sparse_moe.experts.58.w3", "model.layers.36.block_sparse_moe.experts.59.w3", "model.layers.36.block_sparse_moe.experts.60.w3", "model.layers.36.block_sparse_moe.experts.61.w3", "model.layers.36.block_sparse_moe.experts.62.w3", "model.layers.36.block_sparse_moe.experts.63.w3", "model.layers.36.block_sparse_moe.experts.64.w3", "model.layers.36.block_sparse_moe.experts.65.w3", "model.layers.36.block_sparse_moe.experts.66.w3", "model.layers.36.block_sparse_moe.experts.67.w3", "model.layers.36.block_sparse_moe.experts.68.w3", "model.layers.36.block_sparse_moe.experts.69.w3", "model.layers.36.block_sparse_moe.experts.70.w3", "model.layers.36.block_sparse_moe.experts.71.w3", "model.layers.36.block_sparse_moe.experts.72.w3", "model.layers.36.block_sparse_moe.experts.73.w3", "model.layers.36.block_sparse_moe.experts.74.w3", "model.layers.36.block_sparse_moe.experts.75.w3", "model.layers.36.block_sparse_moe.experts.76.w3", "model.layers.36.block_sparse_moe.experts.77.w3", "model.layers.36.block_sparse_moe.experts.78.w3", "model.layers.36.block_sparse_moe.experts.79.w3", "model.layers.36.block_sparse_moe.experts.80.w3", "model.layers.36.block_sparse_moe.experts.81.w3", "model.layers.36.block_sparse_moe.experts.82.w3", "model.layers.36.block_sparse_moe.experts.83.w3", "model.layers.36.block_sparse_moe.experts.84.w3", "model.layers.36.block_sparse_moe.experts.85.w3", "model.layers.36.block_sparse_moe.experts.86.w3", "model.layers.36.block_sparse_moe.experts.87.w3", "model.layers.36.block_sparse_moe.experts.88.w3", "model.layers.36.block_sparse_moe.experts.89.w3", "model.layers.36.block_sparse_moe.experts.90.w3", "model.layers.36.block_sparse_moe.experts.91.w3", "model.layers.36.block_sparse_moe.experts.92.w3", "model.layers.36.block_sparse_moe.experts.93.w3", "model.layers.36.block_sparse_moe.experts.94.w3", "model.layers.36.block_sparse_moe.experts.95.w3", "model.layers.36.block_sparse_moe.experts.96.w3", "model.layers.36.block_sparse_moe.experts.97.w3", "model.layers.36.block_sparse_moe.experts.98.w3", "model.layers.36.block_sparse_moe.experts.99.w3", "model.layers.36.block_sparse_moe.experts.100.w3", "model.layers.36.block_sparse_moe.experts.101.w3", "model.layers.36.block_sparse_moe.experts.102.w3", "model.layers.36.block_sparse_moe.experts.103.w3", "model.layers.36.block_sparse_moe.experts.104.w3", "model.layers.36.block_sparse_moe.experts.105.w3", "model.layers.36.block_sparse_moe.experts.106.w3", "model.layers.36.block_sparse_moe.experts.107.w3", "model.layers.36.block_sparse_moe.experts.108.w3", "model.layers.36.block_sparse_moe.experts.109.w3", "model.layers.36.block_sparse_moe.experts.110.w3", "model.layers.36.block_sparse_moe.experts.111.w3", "model.layers.36.block_sparse_moe.experts.112.w3", "model.layers.36.block_sparse_moe.experts.113.w3", "model.layers.36.block_sparse_moe.experts.114.w3", "model.layers.36.block_sparse_moe.experts.115.w3", "model.layers.36.block_sparse_moe.experts.116.w3", "model.layers.36.block_sparse_moe.experts.117.w3", "model.layers.36.block_sparse_moe.experts.118.w3", "model.layers.36.block_sparse_moe.experts.119.w3", "model.layers.36.block_sparse_moe.experts.120.w3", "model.layers.36.block_sparse_moe.experts.121.w3", "model.layers.36.block_sparse_moe.experts.122.w3", "model.layers.36.block_sparse_moe.experts.123.w3", "model.layers.36.block_sparse_moe.experts.124.w3", "model.layers.36.block_sparse_moe.experts.125.w3", "model.layers.36.block_sparse_moe.experts.126.w3", "model.layers.36.block_sparse_moe.experts.127.w3", "model.layers.36.block_sparse_moe.experts.128.w3", "model.layers.36.block_sparse_moe.experts.129.w3", "model.layers.36.block_sparse_moe.experts.130.w3", "model.layers.36.block_sparse_moe.experts.131.w3", "model.layers.36.block_sparse_moe.experts.132.w3", "model.layers.36.block_sparse_moe.experts.133.w3", "model.layers.36.block_sparse_moe.experts.134.w3", "model.layers.36.block_sparse_moe.experts.135.w3", "model.layers.36.block_sparse_moe.experts.136.w3", "model.layers.36.block_sparse_moe.experts.137.w3", "model.layers.36.block_sparse_moe.experts.138.w3", "model.layers.36.block_sparse_moe.experts.139.w3", "model.layers.36.block_sparse_moe.experts.140.w3", "model.layers.36.block_sparse_moe.experts.141.w3", "model.layers.36.block_sparse_moe.experts.142.w3", "model.layers.36.block_sparse_moe.experts.143.w3", "model.layers.36.block_sparse_moe.experts.144.w3", "model.layers.36.block_sparse_moe.experts.145.w3", "model.layers.36.block_sparse_moe.experts.146.w3", "model.layers.36.block_sparse_moe.experts.147.w3", "model.layers.36.block_sparse_moe.experts.148.w3", "model.layers.36.block_sparse_moe.experts.149.w3", "model.layers.36.block_sparse_moe.experts.150.w3", "model.layers.36.block_sparse_moe.experts.151.w3", "model.layers.36.block_sparse_moe.experts.152.w3", "model.layers.36.block_sparse_moe.experts.153.w3", "model.layers.36.block_sparse_moe.experts.154.w3", "model.layers.36.block_sparse_moe.experts.155.w3", "model.layers.36.block_sparse_moe.experts.156.w3", "model.layers.36.block_sparse_moe.experts.157.w3", "model.layers.36.block_sparse_moe.experts.158.w3", "model.layers.36.block_sparse_moe.experts.159.w3", "model.layers.36.block_sparse_moe.experts.160.w3", "model.layers.36.block_sparse_moe.experts.161.w3", "model.layers.36.block_sparse_moe.experts.162.w3", "model.layers.36.block_sparse_moe.experts.163.w3", "model.layers.36.block_sparse_moe.experts.164.w3", "model.layers.36.block_sparse_moe.experts.165.w3", "model.layers.36.block_sparse_moe.experts.166.w3", "model.layers.36.block_sparse_moe.experts.167.w3", "model.layers.36.block_sparse_moe.experts.168.w3", "model.layers.36.block_sparse_moe.experts.169.w3", "model.layers.36.block_sparse_moe.experts.170.w3", "model.layers.36.block_sparse_moe.experts.171.w3", "model.layers.36.block_sparse_moe.experts.172.w3", "model.layers.36.block_sparse_moe.experts.173.w3", "model.layers.36.block_sparse_moe.experts.174.w3", "model.layers.36.block_sparse_moe.experts.175.w3", "model.layers.36.block_sparse_moe.experts.176.w3", "model.layers.36.block_sparse_moe.experts.177.w3", "model.layers.36.block_sparse_moe.experts.178.w3", "model.layers.36.block_sparse_moe.experts.179.w3", "model.layers.36.block_sparse_moe.experts.180.w3", "model.layers.36.block_sparse_moe.experts.181.w3", "model.layers.36.block_sparse_moe.experts.182.w3", "model.layers.36.block_sparse_moe.experts.183.w3", "model.layers.36.block_sparse_moe.experts.184.w3", "model.layers.36.block_sparse_moe.experts.185.w3", "model.layers.36.block_sparse_moe.experts.186.w3", "model.layers.36.block_sparse_moe.experts.187.w3", "model.layers.36.block_sparse_moe.experts.188.w3", "model.layers.36.block_sparse_moe.experts.189.w3", "model.layers.36.block_sparse_moe.experts.190.w3", "model.layers.36.block_sparse_moe.experts.191.w3", "model.layers.36.block_sparse_moe.experts.192.w3", "model.layers.36.block_sparse_moe.experts.193.w3", "model.layers.36.block_sparse_moe.experts.194.w3", "model.layers.36.block_sparse_moe.experts.195.w3", "model.layers.36.block_sparse_moe.experts.196.w3", "model.layers.36.block_sparse_moe.experts.197.w3", "model.layers.36.block_sparse_moe.experts.198.w3", "model.layers.36.block_sparse_moe.experts.199.w3", "model.layers.36.block_sparse_moe.experts.200.w3", "model.layers.36.block_sparse_moe.experts.201.w3", "model.layers.36.block_sparse_moe.experts.202.w3", "model.layers.36.block_sparse_moe.experts.203.w3", "model.layers.36.block_sparse_moe.experts.204.w3", "model.layers.36.block_sparse_moe.experts.205.w3", "model.layers.36.block_sparse_moe.experts.206.w3", "model.layers.36.block_sparse_moe.experts.207.w3", "model.layers.36.block_sparse_moe.experts.208.w3", "model.layers.36.block_sparse_moe.experts.209.w3", "model.layers.36.block_sparse_moe.experts.210.w3", "model.layers.36.block_sparse_moe.experts.211.w3", "model.layers.36.block_sparse_moe.experts.212.w3", "model.layers.36.block_sparse_moe.experts.213.w3", "model.layers.36.block_sparse_moe.experts.214.w3", "model.layers.36.block_sparse_moe.experts.215.w3", "model.layers.36.block_sparse_moe.experts.216.w3", "model.layers.36.block_sparse_moe.experts.217.w3", "model.layers.36.block_sparse_moe.experts.218.w3", "model.layers.36.block_sparse_moe.experts.219.w3", "model.layers.36.block_sparse_moe.experts.220.w3", "model.layers.36.block_sparse_moe.experts.221.w3", "model.layers.36.block_sparse_moe.experts.222.w3", "model.layers.36.block_sparse_moe.experts.223.w3", "model.layers.36.block_sparse_moe.experts.224.w3", "model.layers.36.block_sparse_moe.experts.225.w3", "model.layers.36.block_sparse_moe.experts.226.w3", "model.layers.36.block_sparse_moe.experts.227.w3", "model.layers.36.block_sparse_moe.experts.228.w3", "model.layers.36.block_sparse_moe.experts.229.w3", "model.layers.36.block_sparse_moe.experts.230.w3", "model.layers.36.block_sparse_moe.experts.231.w3", "model.layers.36.block_sparse_moe.experts.232.w3", "model.layers.36.block_sparse_moe.experts.233.w3", "model.layers.36.block_sparse_moe.experts.234.w3", "model.layers.36.block_sparse_moe.experts.235.w3", "model.layers.36.block_sparse_moe.experts.236.w3", "model.layers.36.block_sparse_moe.experts.237.w3", "model.layers.36.block_sparse_moe.experts.238.w3", "model.layers.36.block_sparse_moe.experts.239.w3", "model.layers.36.block_sparse_moe.experts.240.w3", "model.layers.36.block_sparse_moe.experts.241.w3", "model.layers.36.block_sparse_moe.experts.242.w3", "model.layers.36.block_sparse_moe.experts.243.w3", "model.layers.36.block_sparse_moe.experts.244.w3", "model.layers.36.block_sparse_moe.experts.245.w3", "model.layers.36.block_sparse_moe.experts.246.w3", "model.layers.36.block_sparse_moe.experts.247.w3", "model.layers.36.block_sparse_moe.experts.248.w3", "model.layers.36.block_sparse_moe.experts.249.w3", "model.layers.36.block_sparse_moe.experts.250.w3", "model.layers.36.block_sparse_moe.experts.251.w3", "model.layers.36.block_sparse_moe.experts.252.w3", "model.layers.36.block_sparse_moe.experts.253.w3", "model.layers.36.block_sparse_moe.experts.254.w3", "model.layers.36.block_sparse_moe.experts.255.w3", "model.layers.36.block_sparse_moe.experts.0.w2", "model.layers.36.block_sparse_moe.experts.1.w2", "model.layers.36.block_sparse_moe.experts.2.w2", "model.layers.36.block_sparse_moe.experts.3.w2", "model.layers.36.block_sparse_moe.experts.4.w2", "model.layers.36.block_sparse_moe.experts.5.w2", "model.layers.36.block_sparse_moe.experts.6.w2", "model.layers.36.block_sparse_moe.experts.7.w2", "model.layers.36.block_sparse_moe.experts.8.w2", "model.layers.36.block_sparse_moe.experts.9.w2", "model.layers.36.block_sparse_moe.experts.10.w2", "model.layers.36.block_sparse_moe.experts.11.w2", "model.layers.36.block_sparse_moe.experts.12.w2", "model.layers.36.block_sparse_moe.experts.13.w2", "model.layers.36.block_sparse_moe.experts.14.w2", "model.layers.36.block_sparse_moe.experts.15.w2", "model.layers.36.block_sparse_moe.experts.16.w2", "model.layers.36.block_sparse_moe.experts.17.w2", "model.layers.36.block_sparse_moe.experts.18.w2", "model.layers.36.block_sparse_moe.experts.19.w2", "model.layers.36.block_sparse_moe.experts.20.w2", "model.layers.36.block_sparse_moe.experts.21.w2", "model.layers.36.block_sparse_moe.experts.22.w2", "model.layers.36.block_sparse_moe.experts.23.w2", "model.layers.36.block_sparse_moe.experts.24.w2", "model.layers.36.block_sparse_moe.experts.25.w2", "model.layers.36.block_sparse_moe.experts.26.w2", "model.layers.36.block_sparse_moe.experts.27.w2", "model.layers.36.block_sparse_moe.experts.28.w2", "model.layers.36.block_sparse_moe.experts.29.w2", "model.layers.36.block_sparse_moe.experts.30.w2", "model.layers.36.block_sparse_moe.experts.31.w2", "model.layers.36.block_sparse_moe.experts.32.w2", "model.layers.36.block_sparse_moe.experts.33.w2", "model.layers.36.block_sparse_moe.experts.34.w2", "model.layers.36.block_sparse_moe.experts.35.w2", "model.layers.36.block_sparse_moe.experts.36.w2", "model.layers.36.block_sparse_moe.experts.37.w2", "model.layers.36.block_sparse_moe.experts.38.w2", "model.layers.36.block_sparse_moe.experts.39.w2", "model.layers.36.block_sparse_moe.experts.40.w2", "model.layers.36.block_sparse_moe.experts.41.w2", "model.layers.36.block_sparse_moe.experts.42.w2", "model.layers.36.block_sparse_moe.experts.43.w2", "model.layers.36.block_sparse_moe.experts.44.w2", "model.layers.36.block_sparse_moe.experts.45.w2", "model.layers.36.block_sparse_moe.experts.46.w2", "model.layers.36.block_sparse_moe.experts.47.w2", "model.layers.36.block_sparse_moe.experts.48.w2", "model.layers.36.block_sparse_moe.experts.49.w2", "model.layers.36.block_sparse_moe.experts.50.w2", "model.layers.36.block_sparse_moe.experts.51.w2", "model.layers.36.block_sparse_moe.experts.52.w2", "model.layers.36.block_sparse_moe.experts.53.w2", "model.layers.36.block_sparse_moe.experts.54.w2", "model.layers.36.block_sparse_moe.experts.55.w2", "model.layers.36.block_sparse_moe.experts.56.w2", "model.layers.36.block_sparse_moe.experts.57.w2", "model.layers.36.block_sparse_moe.experts.58.w2", "model.layers.36.block_sparse_moe.experts.59.w2", "model.layers.36.block_sparse_moe.experts.60.w2", "model.layers.36.block_sparse_moe.experts.61.w2", "model.layers.36.block_sparse_moe.experts.62.w2", "model.layers.36.block_sparse_moe.experts.63.w2", "model.layers.36.block_sparse_moe.experts.64.w2", "model.layers.36.block_sparse_moe.experts.65.w2", "model.layers.36.block_sparse_moe.experts.66.w2", "model.layers.36.block_sparse_moe.experts.67.w2", "model.layers.36.block_sparse_moe.experts.68.w2", "model.layers.36.block_sparse_moe.experts.69.w2", "model.layers.36.block_sparse_moe.experts.70.w2", "model.layers.36.block_sparse_moe.experts.71.w2", "model.layers.36.block_sparse_moe.experts.72.w2", "model.layers.36.block_sparse_moe.experts.73.w2", "model.layers.36.block_sparse_moe.experts.74.w2", "model.layers.36.block_sparse_moe.experts.75.w2", "model.layers.36.block_sparse_moe.experts.76.w2", "model.layers.36.block_sparse_moe.experts.77.w2", "model.layers.36.block_sparse_moe.experts.78.w2", "model.layers.36.block_sparse_moe.experts.79.w2", "model.layers.36.block_sparse_moe.experts.80.w2", "model.layers.36.block_sparse_moe.experts.81.w2", "model.layers.36.block_sparse_moe.experts.82.w2", "model.layers.36.block_sparse_moe.experts.83.w2", "model.layers.36.block_sparse_moe.experts.84.w2", "model.layers.36.block_sparse_moe.experts.85.w2", "model.layers.36.block_sparse_moe.experts.86.w2", "model.layers.36.block_sparse_moe.experts.87.w2", "model.layers.36.block_sparse_moe.experts.88.w2", "model.layers.36.block_sparse_moe.experts.89.w2", "model.layers.36.block_sparse_moe.experts.90.w2", "model.layers.36.block_sparse_moe.experts.91.w2", "model.layers.36.block_sparse_moe.experts.92.w2", "model.layers.36.block_sparse_moe.experts.93.w2", "model.layers.36.block_sparse_moe.experts.94.w2", "model.layers.36.block_sparse_moe.experts.95.w2", "model.layers.36.block_sparse_moe.experts.96.w2", "model.layers.36.block_sparse_moe.experts.97.w2", "model.layers.36.block_sparse_moe.experts.98.w2", "model.layers.36.block_sparse_moe.experts.99.w2", "model.layers.36.block_sparse_moe.experts.100.w2", "model.layers.36.block_sparse_moe.experts.101.w2", "model.layers.36.block_sparse_moe.experts.102.w2", "model.layers.36.block_sparse_moe.experts.103.w2", "model.layers.36.block_sparse_moe.experts.104.w2", "model.layers.36.block_sparse_moe.experts.105.w2", "model.layers.36.block_sparse_moe.experts.106.w2", "model.layers.36.block_sparse_moe.experts.107.w2", "model.layers.36.block_sparse_moe.experts.108.w2", "model.layers.36.block_sparse_moe.experts.109.w2", "model.layers.36.block_sparse_moe.experts.110.w2", "model.layers.36.block_sparse_moe.experts.111.w2", "model.layers.36.block_sparse_moe.experts.112.w2", "model.layers.36.block_sparse_moe.experts.113.w2", "model.layers.36.block_sparse_moe.experts.114.w2", "model.layers.36.block_sparse_moe.experts.115.w2", "model.layers.36.block_sparse_moe.experts.116.w2", "model.layers.36.block_sparse_moe.experts.117.w2", "model.layers.36.block_sparse_moe.experts.118.w2", "model.layers.36.block_sparse_moe.experts.119.w2", "model.layers.36.block_sparse_moe.experts.120.w2", "model.layers.36.block_sparse_moe.experts.121.w2", "model.layers.36.block_sparse_moe.experts.122.w2", "model.layers.36.block_sparse_moe.experts.123.w2", "model.layers.36.block_sparse_moe.experts.124.w2", "model.layers.36.block_sparse_moe.experts.125.w2", "model.layers.36.block_sparse_moe.experts.126.w2", "model.layers.36.block_sparse_moe.experts.127.w2", "model.layers.36.block_sparse_moe.experts.128.w2", "model.layers.36.block_sparse_moe.experts.129.w2", "model.layers.36.block_sparse_moe.experts.130.w2", "model.layers.36.block_sparse_moe.experts.131.w2", "model.layers.36.block_sparse_moe.experts.132.w2", "model.layers.36.block_sparse_moe.experts.133.w2", "model.layers.36.block_sparse_moe.experts.134.w2", "model.layers.36.block_sparse_moe.experts.135.w2", "model.layers.36.block_sparse_moe.experts.136.w2", "model.layers.36.block_sparse_moe.experts.137.w2", "model.layers.36.block_sparse_moe.experts.138.w2", "model.layers.36.block_sparse_moe.experts.139.w2", "model.layers.36.block_sparse_moe.experts.140.w2", "model.layers.36.block_sparse_moe.experts.141.w2", "model.layers.36.block_sparse_moe.experts.142.w2", "model.layers.36.block_sparse_moe.experts.143.w2", "model.layers.36.block_sparse_moe.experts.144.w2", "model.layers.36.block_sparse_moe.experts.145.w2", "model.layers.36.block_sparse_moe.experts.146.w2", "model.layers.36.block_sparse_moe.experts.147.w2", "model.layers.36.block_sparse_moe.experts.148.w2", "model.layers.36.block_sparse_moe.experts.149.w2", "model.layers.36.block_sparse_moe.experts.150.w2", "model.layers.36.block_sparse_moe.experts.151.w2", "model.layers.36.block_sparse_moe.experts.152.w2", "model.layers.36.block_sparse_moe.experts.153.w2", "model.layers.36.block_sparse_moe.experts.154.w2", "model.layers.36.block_sparse_moe.experts.155.w2", "model.layers.36.block_sparse_moe.experts.156.w2", "model.layers.36.block_sparse_moe.experts.157.w2", "model.layers.36.block_sparse_moe.experts.158.w2", "model.layers.36.block_sparse_moe.experts.159.w2", "model.layers.36.block_sparse_moe.experts.160.w2", "model.layers.36.block_sparse_moe.experts.161.w2", "model.layers.36.block_sparse_moe.experts.162.w2", "model.layers.36.block_sparse_moe.experts.163.w2", "model.layers.36.block_sparse_moe.experts.164.w2", "model.layers.36.block_sparse_moe.experts.165.w2", "model.layers.36.block_sparse_moe.experts.166.w2", "model.layers.36.block_sparse_moe.experts.167.w2", "model.layers.36.block_sparse_moe.experts.168.w2", "model.layers.36.block_sparse_moe.experts.169.w2", "model.layers.36.block_sparse_moe.experts.170.w2", "model.layers.36.block_sparse_moe.experts.171.w2", "model.layers.36.block_sparse_moe.experts.172.w2", "model.layers.36.block_sparse_moe.experts.173.w2", "model.layers.36.block_sparse_moe.experts.174.w2", "model.layers.36.block_sparse_moe.experts.175.w2", "model.layers.36.block_sparse_moe.experts.176.w2", "model.layers.36.block_sparse_moe.experts.177.w2", "model.layers.36.block_sparse_moe.experts.178.w2", "model.layers.36.block_sparse_moe.experts.179.w2", "model.layers.36.block_sparse_moe.experts.180.w2", "model.layers.36.block_sparse_moe.experts.181.w2", "model.layers.36.block_sparse_moe.experts.182.w2", "model.layers.36.block_sparse_moe.experts.183.w2", "model.layers.36.block_sparse_moe.experts.184.w2", "model.layers.36.block_sparse_moe.experts.185.w2", "model.layers.36.block_sparse_moe.experts.186.w2", "model.layers.36.block_sparse_moe.experts.187.w2", "model.layers.36.block_sparse_moe.experts.188.w2", "model.layers.36.block_sparse_moe.experts.189.w2", "model.layers.36.block_sparse_moe.experts.190.w2", "model.layers.36.block_sparse_moe.experts.191.w2", "model.layers.36.block_sparse_moe.experts.192.w2", "model.layers.36.block_sparse_moe.experts.193.w2", "model.layers.36.block_sparse_moe.experts.194.w2", "model.layers.36.block_sparse_moe.experts.195.w2", "model.layers.36.block_sparse_moe.experts.196.w2", "model.layers.36.block_sparse_moe.experts.197.w2", "model.layers.36.block_sparse_moe.experts.198.w2", "model.layers.36.block_sparse_moe.experts.199.w2", "model.layers.36.block_sparse_moe.experts.200.w2", "model.layers.36.block_sparse_moe.experts.201.w2", "model.layers.36.block_sparse_moe.experts.202.w2", "model.layers.36.block_sparse_moe.experts.203.w2", "model.layers.36.block_sparse_moe.experts.204.w2", "model.layers.36.block_sparse_moe.experts.205.w2", "model.layers.36.block_sparse_moe.experts.206.w2", "model.layers.36.block_sparse_moe.experts.207.w2", "model.layers.36.block_sparse_moe.experts.208.w2", "model.layers.36.block_sparse_moe.experts.209.w2", "model.layers.36.block_sparse_moe.experts.210.w2", "model.layers.36.block_sparse_moe.experts.211.w2", "model.layers.36.block_sparse_moe.experts.212.w2", "model.layers.36.block_sparse_moe.experts.213.w2", "model.layers.36.block_sparse_moe.experts.214.w2", "model.layers.36.block_sparse_moe.experts.215.w2", "model.layers.36.block_sparse_moe.experts.216.w2", "model.layers.36.block_sparse_moe.experts.217.w2", "model.layers.36.block_sparse_moe.experts.218.w2", "model.layers.36.block_sparse_moe.experts.219.w2", "model.layers.36.block_sparse_moe.experts.220.w2", "model.layers.36.block_sparse_moe.experts.221.w2", "model.layers.36.block_sparse_moe.experts.222.w2", "model.layers.36.block_sparse_moe.experts.223.w2", "model.layers.36.block_sparse_moe.experts.224.w2", "model.layers.36.block_sparse_moe.experts.225.w2", "model.layers.36.block_sparse_moe.experts.226.w2", "model.layers.36.block_sparse_moe.experts.227.w2", "model.layers.36.block_sparse_moe.experts.228.w2", "model.layers.36.block_sparse_moe.experts.229.w2", "model.layers.36.block_sparse_moe.experts.230.w2", "model.layers.36.block_sparse_moe.experts.231.w2", "model.layers.36.block_sparse_moe.experts.232.w2", "model.layers.36.block_sparse_moe.experts.233.w2", "model.layers.36.block_sparse_moe.experts.234.w2", "model.layers.36.block_sparse_moe.experts.235.w2", "model.layers.36.block_sparse_moe.experts.236.w2", "model.layers.36.block_sparse_moe.experts.237.w2", "model.layers.36.block_sparse_moe.experts.238.w2", "model.layers.36.block_sparse_moe.experts.239.w2", "model.layers.36.block_sparse_moe.experts.240.w2", "model.layers.36.block_sparse_moe.experts.241.w2", "model.layers.36.block_sparse_moe.experts.242.w2", "model.layers.36.block_sparse_moe.experts.243.w2", "model.layers.36.block_sparse_moe.experts.244.w2", "model.layers.36.block_sparse_moe.experts.245.w2", "model.layers.36.block_sparse_moe.experts.246.w2", "model.layers.36.block_sparse_moe.experts.247.w2", "model.layers.36.block_sparse_moe.experts.248.w2", "model.layers.36.block_sparse_moe.experts.249.w2", "model.layers.36.block_sparse_moe.experts.250.w2", "model.layers.36.block_sparse_moe.experts.251.w2", "model.layers.36.block_sparse_moe.experts.252.w2", "model.layers.36.block_sparse_moe.experts.253.w2", "model.layers.36.block_sparse_moe.experts.254.w2", "model.layers.36.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005462218075990566, "dbits": 3623878656 } ] }, { "idx": 74, "layers": [ "model.layers.37.self_attn.q_proj", "model.layers.37.self_attn.k_proj", "model.layers.37.self_attn.v_proj", "model.layers.37.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004659432545304265, "dbits": 44040192 } ] }, { "idx": 75, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w1", "model.layers.37.block_sparse_moe.experts.1.w1", "model.layers.37.block_sparse_moe.experts.2.w1", "model.layers.37.block_sparse_moe.experts.3.w1", "model.layers.37.block_sparse_moe.experts.4.w1", "model.layers.37.block_sparse_moe.experts.5.w1", "model.layers.37.block_sparse_moe.experts.6.w1", "model.layers.37.block_sparse_moe.experts.7.w1", "model.layers.37.block_sparse_moe.experts.8.w1", "model.layers.37.block_sparse_moe.experts.9.w1", "model.layers.37.block_sparse_moe.experts.10.w1", "model.layers.37.block_sparse_moe.experts.11.w1", "model.layers.37.block_sparse_moe.experts.12.w1", "model.layers.37.block_sparse_moe.experts.13.w1", "model.layers.37.block_sparse_moe.experts.14.w1", "model.layers.37.block_sparse_moe.experts.15.w1", "model.layers.37.block_sparse_moe.experts.16.w1", "model.layers.37.block_sparse_moe.experts.17.w1", "model.layers.37.block_sparse_moe.experts.18.w1", "model.layers.37.block_sparse_moe.experts.19.w1", "model.layers.37.block_sparse_moe.experts.20.w1", "model.layers.37.block_sparse_moe.experts.21.w1", "model.layers.37.block_sparse_moe.experts.22.w1", "model.layers.37.block_sparse_moe.experts.23.w1", "model.layers.37.block_sparse_moe.experts.24.w1", "model.layers.37.block_sparse_moe.experts.25.w1", "model.layers.37.block_sparse_moe.experts.26.w1", "model.layers.37.block_sparse_moe.experts.27.w1", "model.layers.37.block_sparse_moe.experts.28.w1", "model.layers.37.block_sparse_moe.experts.29.w1", "model.layers.37.block_sparse_moe.experts.30.w1", "model.layers.37.block_sparse_moe.experts.31.w1", "model.layers.37.block_sparse_moe.experts.32.w1", "model.layers.37.block_sparse_moe.experts.33.w1", "model.layers.37.block_sparse_moe.experts.34.w1", "model.layers.37.block_sparse_moe.experts.35.w1", "model.layers.37.block_sparse_moe.experts.36.w1", "model.layers.37.block_sparse_moe.experts.37.w1", "model.layers.37.block_sparse_moe.experts.38.w1", "model.layers.37.block_sparse_moe.experts.39.w1", "model.layers.37.block_sparse_moe.experts.40.w1", "model.layers.37.block_sparse_moe.experts.41.w1", "model.layers.37.block_sparse_moe.experts.42.w1", "model.layers.37.block_sparse_moe.experts.43.w1", "model.layers.37.block_sparse_moe.experts.44.w1", "model.layers.37.block_sparse_moe.experts.45.w1", "model.layers.37.block_sparse_moe.experts.46.w1", "model.layers.37.block_sparse_moe.experts.47.w1", "model.layers.37.block_sparse_moe.experts.48.w1", "model.layers.37.block_sparse_moe.experts.49.w1", "model.layers.37.block_sparse_moe.experts.50.w1", "model.layers.37.block_sparse_moe.experts.51.w1", "model.layers.37.block_sparse_moe.experts.52.w1", "model.layers.37.block_sparse_moe.experts.53.w1", "model.layers.37.block_sparse_moe.experts.54.w1", "model.layers.37.block_sparse_moe.experts.55.w1", "model.layers.37.block_sparse_moe.experts.56.w1", "model.layers.37.block_sparse_moe.experts.57.w1", "model.layers.37.block_sparse_moe.experts.58.w1", "model.layers.37.block_sparse_moe.experts.59.w1", "model.layers.37.block_sparse_moe.experts.60.w1", "model.layers.37.block_sparse_moe.experts.61.w1", "model.layers.37.block_sparse_moe.experts.62.w1", "model.layers.37.block_sparse_moe.experts.63.w1", "model.layers.37.block_sparse_moe.experts.64.w1", "model.layers.37.block_sparse_moe.experts.65.w1", "model.layers.37.block_sparse_moe.experts.66.w1", "model.layers.37.block_sparse_moe.experts.67.w1", "model.layers.37.block_sparse_moe.experts.68.w1", "model.layers.37.block_sparse_moe.experts.69.w1", "model.layers.37.block_sparse_moe.experts.70.w1", "model.layers.37.block_sparse_moe.experts.71.w1", "model.layers.37.block_sparse_moe.experts.72.w1", "model.layers.37.block_sparse_moe.experts.73.w1", "model.layers.37.block_sparse_moe.experts.74.w1", "model.layers.37.block_sparse_moe.experts.75.w1", "model.layers.37.block_sparse_moe.experts.76.w1", "model.layers.37.block_sparse_moe.experts.77.w1", "model.layers.37.block_sparse_moe.experts.78.w1", "model.layers.37.block_sparse_moe.experts.79.w1", "model.layers.37.block_sparse_moe.experts.80.w1", "model.layers.37.block_sparse_moe.experts.81.w1", "model.layers.37.block_sparse_moe.experts.82.w1", "model.layers.37.block_sparse_moe.experts.83.w1", "model.layers.37.block_sparse_moe.experts.84.w1", "model.layers.37.block_sparse_moe.experts.85.w1", "model.layers.37.block_sparse_moe.experts.86.w1", "model.layers.37.block_sparse_moe.experts.87.w1", "model.layers.37.block_sparse_moe.experts.88.w1", "model.layers.37.block_sparse_moe.experts.89.w1", "model.layers.37.block_sparse_moe.experts.90.w1", "model.layers.37.block_sparse_moe.experts.91.w1", "model.layers.37.block_sparse_moe.experts.92.w1", "model.layers.37.block_sparse_moe.experts.93.w1", "model.layers.37.block_sparse_moe.experts.94.w1", "model.layers.37.block_sparse_moe.experts.95.w1", "model.layers.37.block_sparse_moe.experts.96.w1", "model.layers.37.block_sparse_moe.experts.97.w1", "model.layers.37.block_sparse_moe.experts.98.w1", "model.layers.37.block_sparse_moe.experts.99.w1", "model.layers.37.block_sparse_moe.experts.100.w1", "model.layers.37.block_sparse_moe.experts.101.w1", "model.layers.37.block_sparse_moe.experts.102.w1", "model.layers.37.block_sparse_moe.experts.103.w1", "model.layers.37.block_sparse_moe.experts.104.w1", "model.layers.37.block_sparse_moe.experts.105.w1", "model.layers.37.block_sparse_moe.experts.106.w1", "model.layers.37.block_sparse_moe.experts.107.w1", "model.layers.37.block_sparse_moe.experts.108.w1", "model.layers.37.block_sparse_moe.experts.109.w1", "model.layers.37.block_sparse_moe.experts.110.w1", "model.layers.37.block_sparse_moe.experts.111.w1", "model.layers.37.block_sparse_moe.experts.112.w1", "model.layers.37.block_sparse_moe.experts.113.w1", "model.layers.37.block_sparse_moe.experts.114.w1", "model.layers.37.block_sparse_moe.experts.115.w1", "model.layers.37.block_sparse_moe.experts.116.w1", "model.layers.37.block_sparse_moe.experts.117.w1", "model.layers.37.block_sparse_moe.experts.118.w1", "model.layers.37.block_sparse_moe.experts.119.w1", "model.layers.37.block_sparse_moe.experts.120.w1", "model.layers.37.block_sparse_moe.experts.121.w1", "model.layers.37.block_sparse_moe.experts.122.w1", "model.layers.37.block_sparse_moe.experts.123.w1", "model.layers.37.block_sparse_moe.experts.124.w1", "model.layers.37.block_sparse_moe.experts.125.w1", "model.layers.37.block_sparse_moe.experts.126.w1", "model.layers.37.block_sparse_moe.experts.127.w1", "model.layers.37.block_sparse_moe.experts.128.w1", "model.layers.37.block_sparse_moe.experts.129.w1", "model.layers.37.block_sparse_moe.experts.130.w1", "model.layers.37.block_sparse_moe.experts.131.w1", "model.layers.37.block_sparse_moe.experts.132.w1", "model.layers.37.block_sparse_moe.experts.133.w1", "model.layers.37.block_sparse_moe.experts.134.w1", "model.layers.37.block_sparse_moe.experts.135.w1", "model.layers.37.block_sparse_moe.experts.136.w1", "model.layers.37.block_sparse_moe.experts.137.w1", "model.layers.37.block_sparse_moe.experts.138.w1", "model.layers.37.block_sparse_moe.experts.139.w1", "model.layers.37.block_sparse_moe.experts.140.w1", "model.layers.37.block_sparse_moe.experts.141.w1", "model.layers.37.block_sparse_moe.experts.142.w1", "model.layers.37.block_sparse_moe.experts.143.w1", "model.layers.37.block_sparse_moe.experts.144.w1", "model.layers.37.block_sparse_moe.experts.145.w1", "model.layers.37.block_sparse_moe.experts.146.w1", "model.layers.37.block_sparse_moe.experts.147.w1", "model.layers.37.block_sparse_moe.experts.148.w1", "model.layers.37.block_sparse_moe.experts.149.w1", "model.layers.37.block_sparse_moe.experts.150.w1", "model.layers.37.block_sparse_moe.experts.151.w1", "model.layers.37.block_sparse_moe.experts.152.w1", "model.layers.37.block_sparse_moe.experts.153.w1", "model.layers.37.block_sparse_moe.experts.154.w1", "model.layers.37.block_sparse_moe.experts.155.w1", "model.layers.37.block_sparse_moe.experts.156.w1", "model.layers.37.block_sparse_moe.experts.157.w1", "model.layers.37.block_sparse_moe.experts.158.w1", "model.layers.37.block_sparse_moe.experts.159.w1", "model.layers.37.block_sparse_moe.experts.160.w1", "model.layers.37.block_sparse_moe.experts.161.w1", "model.layers.37.block_sparse_moe.experts.162.w1", "model.layers.37.block_sparse_moe.experts.163.w1", "model.layers.37.block_sparse_moe.experts.164.w1", "model.layers.37.block_sparse_moe.experts.165.w1", "model.layers.37.block_sparse_moe.experts.166.w1", "model.layers.37.block_sparse_moe.experts.167.w1", "model.layers.37.block_sparse_moe.experts.168.w1", "model.layers.37.block_sparse_moe.experts.169.w1", "model.layers.37.block_sparse_moe.experts.170.w1", "model.layers.37.block_sparse_moe.experts.171.w1", "model.layers.37.block_sparse_moe.experts.172.w1", "model.layers.37.block_sparse_moe.experts.173.w1", "model.layers.37.block_sparse_moe.experts.174.w1", "model.layers.37.block_sparse_moe.experts.175.w1", "model.layers.37.block_sparse_moe.experts.176.w1", "model.layers.37.block_sparse_moe.experts.177.w1", "model.layers.37.block_sparse_moe.experts.178.w1", "model.layers.37.block_sparse_moe.experts.179.w1", "model.layers.37.block_sparse_moe.experts.180.w1", "model.layers.37.block_sparse_moe.experts.181.w1", "model.layers.37.block_sparse_moe.experts.182.w1", "model.layers.37.block_sparse_moe.experts.183.w1", "model.layers.37.block_sparse_moe.experts.184.w1", "model.layers.37.block_sparse_moe.experts.185.w1", "model.layers.37.block_sparse_moe.experts.186.w1", "model.layers.37.block_sparse_moe.experts.187.w1", "model.layers.37.block_sparse_moe.experts.188.w1", "model.layers.37.block_sparse_moe.experts.189.w1", "model.layers.37.block_sparse_moe.experts.190.w1", "model.layers.37.block_sparse_moe.experts.191.w1", "model.layers.37.block_sparse_moe.experts.192.w1", "model.layers.37.block_sparse_moe.experts.193.w1", "model.layers.37.block_sparse_moe.experts.194.w1", "model.layers.37.block_sparse_moe.experts.195.w1", "model.layers.37.block_sparse_moe.experts.196.w1", "model.layers.37.block_sparse_moe.experts.197.w1", "model.layers.37.block_sparse_moe.experts.198.w1", "model.layers.37.block_sparse_moe.experts.199.w1", "model.layers.37.block_sparse_moe.experts.200.w1", "model.layers.37.block_sparse_moe.experts.201.w1", "model.layers.37.block_sparse_moe.experts.202.w1", "model.layers.37.block_sparse_moe.experts.203.w1", "model.layers.37.block_sparse_moe.experts.204.w1", "model.layers.37.block_sparse_moe.experts.205.w1", "model.layers.37.block_sparse_moe.experts.206.w1", "model.layers.37.block_sparse_moe.experts.207.w1", "model.layers.37.block_sparse_moe.experts.208.w1", "model.layers.37.block_sparse_moe.experts.209.w1", "model.layers.37.block_sparse_moe.experts.210.w1", "model.layers.37.block_sparse_moe.experts.211.w1", "model.layers.37.block_sparse_moe.experts.212.w1", "model.layers.37.block_sparse_moe.experts.213.w1", "model.layers.37.block_sparse_moe.experts.214.w1", "model.layers.37.block_sparse_moe.experts.215.w1", "model.layers.37.block_sparse_moe.experts.216.w1", "model.layers.37.block_sparse_moe.experts.217.w1", "model.layers.37.block_sparse_moe.experts.218.w1", "model.layers.37.block_sparse_moe.experts.219.w1", "model.layers.37.block_sparse_moe.experts.220.w1", "model.layers.37.block_sparse_moe.experts.221.w1", "model.layers.37.block_sparse_moe.experts.222.w1", "model.layers.37.block_sparse_moe.experts.223.w1", "model.layers.37.block_sparse_moe.experts.224.w1", "model.layers.37.block_sparse_moe.experts.225.w1", "model.layers.37.block_sparse_moe.experts.226.w1", "model.layers.37.block_sparse_moe.experts.227.w1", "model.layers.37.block_sparse_moe.experts.228.w1", "model.layers.37.block_sparse_moe.experts.229.w1", "model.layers.37.block_sparse_moe.experts.230.w1", "model.layers.37.block_sparse_moe.experts.231.w1", "model.layers.37.block_sparse_moe.experts.232.w1", "model.layers.37.block_sparse_moe.experts.233.w1", "model.layers.37.block_sparse_moe.experts.234.w1", "model.layers.37.block_sparse_moe.experts.235.w1", "model.layers.37.block_sparse_moe.experts.236.w1", "model.layers.37.block_sparse_moe.experts.237.w1", "model.layers.37.block_sparse_moe.experts.238.w1", "model.layers.37.block_sparse_moe.experts.239.w1", "model.layers.37.block_sparse_moe.experts.240.w1", "model.layers.37.block_sparse_moe.experts.241.w1", "model.layers.37.block_sparse_moe.experts.242.w1", "model.layers.37.block_sparse_moe.experts.243.w1", "model.layers.37.block_sparse_moe.experts.244.w1", "model.layers.37.block_sparse_moe.experts.245.w1", "model.layers.37.block_sparse_moe.experts.246.w1", "model.layers.37.block_sparse_moe.experts.247.w1", "model.layers.37.block_sparse_moe.experts.248.w1", "model.layers.37.block_sparse_moe.experts.249.w1", "model.layers.37.block_sparse_moe.experts.250.w1", "model.layers.37.block_sparse_moe.experts.251.w1", "model.layers.37.block_sparse_moe.experts.252.w1", "model.layers.37.block_sparse_moe.experts.253.w1", "model.layers.37.block_sparse_moe.experts.254.w1", "model.layers.37.block_sparse_moe.experts.255.w1", "model.layers.37.block_sparse_moe.experts.0.w3", "model.layers.37.block_sparse_moe.experts.1.w3", "model.layers.37.block_sparse_moe.experts.2.w3", "model.layers.37.block_sparse_moe.experts.3.w3", "model.layers.37.block_sparse_moe.experts.4.w3", "model.layers.37.block_sparse_moe.experts.5.w3", "model.layers.37.block_sparse_moe.experts.6.w3", "model.layers.37.block_sparse_moe.experts.7.w3", "model.layers.37.block_sparse_moe.experts.8.w3", "model.layers.37.block_sparse_moe.experts.9.w3", "model.layers.37.block_sparse_moe.experts.10.w3", "model.layers.37.block_sparse_moe.experts.11.w3", "model.layers.37.block_sparse_moe.experts.12.w3", "model.layers.37.block_sparse_moe.experts.13.w3", "model.layers.37.block_sparse_moe.experts.14.w3", "model.layers.37.block_sparse_moe.experts.15.w3", "model.layers.37.block_sparse_moe.experts.16.w3", "model.layers.37.block_sparse_moe.experts.17.w3", "model.layers.37.block_sparse_moe.experts.18.w3", "model.layers.37.block_sparse_moe.experts.19.w3", "model.layers.37.block_sparse_moe.experts.20.w3", "model.layers.37.block_sparse_moe.experts.21.w3", "model.layers.37.block_sparse_moe.experts.22.w3", "model.layers.37.block_sparse_moe.experts.23.w3", "model.layers.37.block_sparse_moe.experts.24.w3", "model.layers.37.block_sparse_moe.experts.25.w3", "model.layers.37.block_sparse_moe.experts.26.w3", "model.layers.37.block_sparse_moe.experts.27.w3", "model.layers.37.block_sparse_moe.experts.28.w3", "model.layers.37.block_sparse_moe.experts.29.w3", "model.layers.37.block_sparse_moe.experts.30.w3", "model.layers.37.block_sparse_moe.experts.31.w3", "model.layers.37.block_sparse_moe.experts.32.w3", "model.layers.37.block_sparse_moe.experts.33.w3", "model.layers.37.block_sparse_moe.experts.34.w3", "model.layers.37.block_sparse_moe.experts.35.w3", "model.layers.37.block_sparse_moe.experts.36.w3", "model.layers.37.block_sparse_moe.experts.37.w3", "model.layers.37.block_sparse_moe.experts.38.w3", "model.layers.37.block_sparse_moe.experts.39.w3", "model.layers.37.block_sparse_moe.experts.40.w3", "model.layers.37.block_sparse_moe.experts.41.w3", "model.layers.37.block_sparse_moe.experts.42.w3", "model.layers.37.block_sparse_moe.experts.43.w3", "model.layers.37.block_sparse_moe.experts.44.w3", "model.layers.37.block_sparse_moe.experts.45.w3", "model.layers.37.block_sparse_moe.experts.46.w3", "model.layers.37.block_sparse_moe.experts.47.w3", "model.layers.37.block_sparse_moe.experts.48.w3", "model.layers.37.block_sparse_moe.experts.49.w3", "model.layers.37.block_sparse_moe.experts.50.w3", "model.layers.37.block_sparse_moe.experts.51.w3", "model.layers.37.block_sparse_moe.experts.52.w3", "model.layers.37.block_sparse_moe.experts.53.w3", "model.layers.37.block_sparse_moe.experts.54.w3", "model.layers.37.block_sparse_moe.experts.55.w3", "model.layers.37.block_sparse_moe.experts.56.w3", "model.layers.37.block_sparse_moe.experts.57.w3", "model.layers.37.block_sparse_moe.experts.58.w3", "model.layers.37.block_sparse_moe.experts.59.w3", "model.layers.37.block_sparse_moe.experts.60.w3", "model.layers.37.block_sparse_moe.experts.61.w3", "model.layers.37.block_sparse_moe.experts.62.w3", "model.layers.37.block_sparse_moe.experts.63.w3", "model.layers.37.block_sparse_moe.experts.64.w3", "model.layers.37.block_sparse_moe.experts.65.w3", "model.layers.37.block_sparse_moe.experts.66.w3", "model.layers.37.block_sparse_moe.experts.67.w3", "model.layers.37.block_sparse_moe.experts.68.w3", "model.layers.37.block_sparse_moe.experts.69.w3", "model.layers.37.block_sparse_moe.experts.70.w3", "model.layers.37.block_sparse_moe.experts.71.w3", "model.layers.37.block_sparse_moe.experts.72.w3", "model.layers.37.block_sparse_moe.experts.73.w3", "model.layers.37.block_sparse_moe.experts.74.w3", "model.layers.37.block_sparse_moe.experts.75.w3", "model.layers.37.block_sparse_moe.experts.76.w3", "model.layers.37.block_sparse_moe.experts.77.w3", "model.layers.37.block_sparse_moe.experts.78.w3", "model.layers.37.block_sparse_moe.experts.79.w3", "model.layers.37.block_sparse_moe.experts.80.w3", "model.layers.37.block_sparse_moe.experts.81.w3", "model.layers.37.block_sparse_moe.experts.82.w3", "model.layers.37.block_sparse_moe.experts.83.w3", "model.layers.37.block_sparse_moe.experts.84.w3", "model.layers.37.block_sparse_moe.experts.85.w3", "model.layers.37.block_sparse_moe.experts.86.w3", "model.layers.37.block_sparse_moe.experts.87.w3", "model.layers.37.block_sparse_moe.experts.88.w3", "model.layers.37.block_sparse_moe.experts.89.w3", "model.layers.37.block_sparse_moe.experts.90.w3", "model.layers.37.block_sparse_moe.experts.91.w3", "model.layers.37.block_sparse_moe.experts.92.w3", "model.layers.37.block_sparse_moe.experts.93.w3", "model.layers.37.block_sparse_moe.experts.94.w3", "model.layers.37.block_sparse_moe.experts.95.w3", "model.layers.37.block_sparse_moe.experts.96.w3", "model.layers.37.block_sparse_moe.experts.97.w3", "model.layers.37.block_sparse_moe.experts.98.w3", "model.layers.37.block_sparse_moe.experts.99.w3", "model.layers.37.block_sparse_moe.experts.100.w3", "model.layers.37.block_sparse_moe.experts.101.w3", "model.layers.37.block_sparse_moe.experts.102.w3", "model.layers.37.block_sparse_moe.experts.103.w3", "model.layers.37.block_sparse_moe.experts.104.w3", "model.layers.37.block_sparse_moe.experts.105.w3", "model.layers.37.block_sparse_moe.experts.106.w3", "model.layers.37.block_sparse_moe.experts.107.w3", "model.layers.37.block_sparse_moe.experts.108.w3", "model.layers.37.block_sparse_moe.experts.109.w3", "model.layers.37.block_sparse_moe.experts.110.w3", "model.layers.37.block_sparse_moe.experts.111.w3", "model.layers.37.block_sparse_moe.experts.112.w3", "model.layers.37.block_sparse_moe.experts.113.w3", "model.layers.37.block_sparse_moe.experts.114.w3", "model.layers.37.block_sparse_moe.experts.115.w3", "model.layers.37.block_sparse_moe.experts.116.w3", "model.layers.37.block_sparse_moe.experts.117.w3", "model.layers.37.block_sparse_moe.experts.118.w3", "model.layers.37.block_sparse_moe.experts.119.w3", "model.layers.37.block_sparse_moe.experts.120.w3", "model.layers.37.block_sparse_moe.experts.121.w3", "model.layers.37.block_sparse_moe.experts.122.w3", "model.layers.37.block_sparse_moe.experts.123.w3", "model.layers.37.block_sparse_moe.experts.124.w3", "model.layers.37.block_sparse_moe.experts.125.w3", "model.layers.37.block_sparse_moe.experts.126.w3", "model.layers.37.block_sparse_moe.experts.127.w3", "model.layers.37.block_sparse_moe.experts.128.w3", "model.layers.37.block_sparse_moe.experts.129.w3", "model.layers.37.block_sparse_moe.experts.130.w3", "model.layers.37.block_sparse_moe.experts.131.w3", "model.layers.37.block_sparse_moe.experts.132.w3", "model.layers.37.block_sparse_moe.experts.133.w3", "model.layers.37.block_sparse_moe.experts.134.w3", "model.layers.37.block_sparse_moe.experts.135.w3", "model.layers.37.block_sparse_moe.experts.136.w3", "model.layers.37.block_sparse_moe.experts.137.w3", "model.layers.37.block_sparse_moe.experts.138.w3", "model.layers.37.block_sparse_moe.experts.139.w3", "model.layers.37.block_sparse_moe.experts.140.w3", "model.layers.37.block_sparse_moe.experts.141.w3", "model.layers.37.block_sparse_moe.experts.142.w3", "model.layers.37.block_sparse_moe.experts.143.w3", "model.layers.37.block_sparse_moe.experts.144.w3", "model.layers.37.block_sparse_moe.experts.145.w3", "model.layers.37.block_sparse_moe.experts.146.w3", "model.layers.37.block_sparse_moe.experts.147.w3", "model.layers.37.block_sparse_moe.experts.148.w3", "model.layers.37.block_sparse_moe.experts.149.w3", "model.layers.37.block_sparse_moe.experts.150.w3", "model.layers.37.block_sparse_moe.experts.151.w3", "model.layers.37.block_sparse_moe.experts.152.w3", "model.layers.37.block_sparse_moe.experts.153.w3", "model.layers.37.block_sparse_moe.experts.154.w3", "model.layers.37.block_sparse_moe.experts.155.w3", "model.layers.37.block_sparse_moe.experts.156.w3", "model.layers.37.block_sparse_moe.experts.157.w3", "model.layers.37.block_sparse_moe.experts.158.w3", "model.layers.37.block_sparse_moe.experts.159.w3", "model.layers.37.block_sparse_moe.experts.160.w3", "model.layers.37.block_sparse_moe.experts.161.w3", "model.layers.37.block_sparse_moe.experts.162.w3", "model.layers.37.block_sparse_moe.experts.163.w3", "model.layers.37.block_sparse_moe.experts.164.w3", "model.layers.37.block_sparse_moe.experts.165.w3", "model.layers.37.block_sparse_moe.experts.166.w3", "model.layers.37.block_sparse_moe.experts.167.w3", "model.layers.37.block_sparse_moe.experts.168.w3", "model.layers.37.block_sparse_moe.experts.169.w3", "model.layers.37.block_sparse_moe.experts.170.w3", "model.layers.37.block_sparse_moe.experts.171.w3", "model.layers.37.block_sparse_moe.experts.172.w3", "model.layers.37.block_sparse_moe.experts.173.w3", "model.layers.37.block_sparse_moe.experts.174.w3", "model.layers.37.block_sparse_moe.experts.175.w3", "model.layers.37.block_sparse_moe.experts.176.w3", "model.layers.37.block_sparse_moe.experts.177.w3", "model.layers.37.block_sparse_moe.experts.178.w3", "model.layers.37.block_sparse_moe.experts.179.w3", "model.layers.37.block_sparse_moe.experts.180.w3", "model.layers.37.block_sparse_moe.experts.181.w3", "model.layers.37.block_sparse_moe.experts.182.w3", "model.layers.37.block_sparse_moe.experts.183.w3", "model.layers.37.block_sparse_moe.experts.184.w3", "model.layers.37.block_sparse_moe.experts.185.w3", "model.layers.37.block_sparse_moe.experts.186.w3", "model.layers.37.block_sparse_moe.experts.187.w3", "model.layers.37.block_sparse_moe.experts.188.w3", "model.layers.37.block_sparse_moe.experts.189.w3", "model.layers.37.block_sparse_moe.experts.190.w3", "model.layers.37.block_sparse_moe.experts.191.w3", "model.layers.37.block_sparse_moe.experts.192.w3", "model.layers.37.block_sparse_moe.experts.193.w3", "model.layers.37.block_sparse_moe.experts.194.w3", "model.layers.37.block_sparse_moe.experts.195.w3", "model.layers.37.block_sparse_moe.experts.196.w3", "model.layers.37.block_sparse_moe.experts.197.w3", "model.layers.37.block_sparse_moe.experts.198.w3", "model.layers.37.block_sparse_moe.experts.199.w3", "model.layers.37.block_sparse_moe.experts.200.w3", "model.layers.37.block_sparse_moe.experts.201.w3", "model.layers.37.block_sparse_moe.experts.202.w3", "model.layers.37.block_sparse_moe.experts.203.w3", "model.layers.37.block_sparse_moe.experts.204.w3", "model.layers.37.block_sparse_moe.experts.205.w3", "model.layers.37.block_sparse_moe.experts.206.w3", "model.layers.37.block_sparse_moe.experts.207.w3", "model.layers.37.block_sparse_moe.experts.208.w3", "model.layers.37.block_sparse_moe.experts.209.w3", "model.layers.37.block_sparse_moe.experts.210.w3", "model.layers.37.block_sparse_moe.experts.211.w3", "model.layers.37.block_sparse_moe.experts.212.w3", "model.layers.37.block_sparse_moe.experts.213.w3", "model.layers.37.block_sparse_moe.experts.214.w3", "model.layers.37.block_sparse_moe.experts.215.w3", "model.layers.37.block_sparse_moe.experts.216.w3", "model.layers.37.block_sparse_moe.experts.217.w3", "model.layers.37.block_sparse_moe.experts.218.w3", "model.layers.37.block_sparse_moe.experts.219.w3", "model.layers.37.block_sparse_moe.experts.220.w3", "model.layers.37.block_sparse_moe.experts.221.w3", "model.layers.37.block_sparse_moe.experts.222.w3", "model.layers.37.block_sparse_moe.experts.223.w3", "model.layers.37.block_sparse_moe.experts.224.w3", "model.layers.37.block_sparse_moe.experts.225.w3", "model.layers.37.block_sparse_moe.experts.226.w3", "model.layers.37.block_sparse_moe.experts.227.w3", "model.layers.37.block_sparse_moe.experts.228.w3", "model.layers.37.block_sparse_moe.experts.229.w3", "model.layers.37.block_sparse_moe.experts.230.w3", "model.layers.37.block_sparse_moe.experts.231.w3", "model.layers.37.block_sparse_moe.experts.232.w3", "model.layers.37.block_sparse_moe.experts.233.w3", "model.layers.37.block_sparse_moe.experts.234.w3", "model.layers.37.block_sparse_moe.experts.235.w3", "model.layers.37.block_sparse_moe.experts.236.w3", "model.layers.37.block_sparse_moe.experts.237.w3", "model.layers.37.block_sparse_moe.experts.238.w3", "model.layers.37.block_sparse_moe.experts.239.w3", "model.layers.37.block_sparse_moe.experts.240.w3", "model.layers.37.block_sparse_moe.experts.241.w3", "model.layers.37.block_sparse_moe.experts.242.w3", "model.layers.37.block_sparse_moe.experts.243.w3", "model.layers.37.block_sparse_moe.experts.244.w3", "model.layers.37.block_sparse_moe.experts.245.w3", "model.layers.37.block_sparse_moe.experts.246.w3", "model.layers.37.block_sparse_moe.experts.247.w3", "model.layers.37.block_sparse_moe.experts.248.w3", "model.layers.37.block_sparse_moe.experts.249.w3", "model.layers.37.block_sparse_moe.experts.250.w3", "model.layers.37.block_sparse_moe.experts.251.w3", "model.layers.37.block_sparse_moe.experts.252.w3", "model.layers.37.block_sparse_moe.experts.253.w3", "model.layers.37.block_sparse_moe.experts.254.w3", "model.layers.37.block_sparse_moe.experts.255.w3", "model.layers.37.block_sparse_moe.experts.0.w2", "model.layers.37.block_sparse_moe.experts.1.w2", "model.layers.37.block_sparse_moe.experts.2.w2", "model.layers.37.block_sparse_moe.experts.3.w2", "model.layers.37.block_sparse_moe.experts.4.w2", "model.layers.37.block_sparse_moe.experts.5.w2", "model.layers.37.block_sparse_moe.experts.6.w2", "model.layers.37.block_sparse_moe.experts.7.w2", "model.layers.37.block_sparse_moe.experts.8.w2", "model.layers.37.block_sparse_moe.experts.9.w2", "model.layers.37.block_sparse_moe.experts.10.w2", "model.layers.37.block_sparse_moe.experts.11.w2", "model.layers.37.block_sparse_moe.experts.12.w2", "model.layers.37.block_sparse_moe.experts.13.w2", "model.layers.37.block_sparse_moe.experts.14.w2", "model.layers.37.block_sparse_moe.experts.15.w2", "model.layers.37.block_sparse_moe.experts.16.w2", "model.layers.37.block_sparse_moe.experts.17.w2", "model.layers.37.block_sparse_moe.experts.18.w2", "model.layers.37.block_sparse_moe.experts.19.w2", "model.layers.37.block_sparse_moe.experts.20.w2", "model.layers.37.block_sparse_moe.experts.21.w2", "model.layers.37.block_sparse_moe.experts.22.w2", "model.layers.37.block_sparse_moe.experts.23.w2", "model.layers.37.block_sparse_moe.experts.24.w2", "model.layers.37.block_sparse_moe.experts.25.w2", "model.layers.37.block_sparse_moe.experts.26.w2", "model.layers.37.block_sparse_moe.experts.27.w2", "model.layers.37.block_sparse_moe.experts.28.w2", "model.layers.37.block_sparse_moe.experts.29.w2", "model.layers.37.block_sparse_moe.experts.30.w2", "model.layers.37.block_sparse_moe.experts.31.w2", "model.layers.37.block_sparse_moe.experts.32.w2", "model.layers.37.block_sparse_moe.experts.33.w2", "model.layers.37.block_sparse_moe.experts.34.w2", "model.layers.37.block_sparse_moe.experts.35.w2", "model.layers.37.block_sparse_moe.experts.36.w2", "model.layers.37.block_sparse_moe.experts.37.w2", "model.layers.37.block_sparse_moe.experts.38.w2", "model.layers.37.block_sparse_moe.experts.39.w2", "model.layers.37.block_sparse_moe.experts.40.w2", "model.layers.37.block_sparse_moe.experts.41.w2", "model.layers.37.block_sparse_moe.experts.42.w2", "model.layers.37.block_sparse_moe.experts.43.w2", "model.layers.37.block_sparse_moe.experts.44.w2", "model.layers.37.block_sparse_moe.experts.45.w2", "model.layers.37.block_sparse_moe.experts.46.w2", "model.layers.37.block_sparse_moe.experts.47.w2", "model.layers.37.block_sparse_moe.experts.48.w2", "model.layers.37.block_sparse_moe.experts.49.w2", "model.layers.37.block_sparse_moe.experts.50.w2", "model.layers.37.block_sparse_moe.experts.51.w2", "model.layers.37.block_sparse_moe.experts.52.w2", "model.layers.37.block_sparse_moe.experts.53.w2", "model.layers.37.block_sparse_moe.experts.54.w2", "model.layers.37.block_sparse_moe.experts.55.w2", "model.layers.37.block_sparse_moe.experts.56.w2", "model.layers.37.block_sparse_moe.experts.57.w2", "model.layers.37.block_sparse_moe.experts.58.w2", "model.layers.37.block_sparse_moe.experts.59.w2", "model.layers.37.block_sparse_moe.experts.60.w2", "model.layers.37.block_sparse_moe.experts.61.w2", "model.layers.37.block_sparse_moe.experts.62.w2", "model.layers.37.block_sparse_moe.experts.63.w2", "model.layers.37.block_sparse_moe.experts.64.w2", "model.layers.37.block_sparse_moe.experts.65.w2", "model.layers.37.block_sparse_moe.experts.66.w2", "model.layers.37.block_sparse_moe.experts.67.w2", "model.layers.37.block_sparse_moe.experts.68.w2", "model.layers.37.block_sparse_moe.experts.69.w2", "model.layers.37.block_sparse_moe.experts.70.w2", "model.layers.37.block_sparse_moe.experts.71.w2", "model.layers.37.block_sparse_moe.experts.72.w2", "model.layers.37.block_sparse_moe.experts.73.w2", "model.layers.37.block_sparse_moe.experts.74.w2", "model.layers.37.block_sparse_moe.experts.75.w2", "model.layers.37.block_sparse_moe.experts.76.w2", "model.layers.37.block_sparse_moe.experts.77.w2", "model.layers.37.block_sparse_moe.experts.78.w2", "model.layers.37.block_sparse_moe.experts.79.w2", "model.layers.37.block_sparse_moe.experts.80.w2", "model.layers.37.block_sparse_moe.experts.81.w2", "model.layers.37.block_sparse_moe.experts.82.w2", "model.layers.37.block_sparse_moe.experts.83.w2", "model.layers.37.block_sparse_moe.experts.84.w2", "model.layers.37.block_sparse_moe.experts.85.w2", "model.layers.37.block_sparse_moe.experts.86.w2", "model.layers.37.block_sparse_moe.experts.87.w2", "model.layers.37.block_sparse_moe.experts.88.w2", "model.layers.37.block_sparse_moe.experts.89.w2", "model.layers.37.block_sparse_moe.experts.90.w2", "model.layers.37.block_sparse_moe.experts.91.w2", "model.layers.37.block_sparse_moe.experts.92.w2", "model.layers.37.block_sparse_moe.experts.93.w2", "model.layers.37.block_sparse_moe.experts.94.w2", "model.layers.37.block_sparse_moe.experts.95.w2", "model.layers.37.block_sparse_moe.experts.96.w2", "model.layers.37.block_sparse_moe.experts.97.w2", "model.layers.37.block_sparse_moe.experts.98.w2", "model.layers.37.block_sparse_moe.experts.99.w2", "model.layers.37.block_sparse_moe.experts.100.w2", "model.layers.37.block_sparse_moe.experts.101.w2", "model.layers.37.block_sparse_moe.experts.102.w2", "model.layers.37.block_sparse_moe.experts.103.w2", "model.layers.37.block_sparse_moe.experts.104.w2", "model.layers.37.block_sparse_moe.experts.105.w2", "model.layers.37.block_sparse_moe.experts.106.w2", "model.layers.37.block_sparse_moe.experts.107.w2", "model.layers.37.block_sparse_moe.experts.108.w2", "model.layers.37.block_sparse_moe.experts.109.w2", "model.layers.37.block_sparse_moe.experts.110.w2", "model.layers.37.block_sparse_moe.experts.111.w2", "model.layers.37.block_sparse_moe.experts.112.w2", "model.layers.37.block_sparse_moe.experts.113.w2", "model.layers.37.block_sparse_moe.experts.114.w2", "model.layers.37.block_sparse_moe.experts.115.w2", "model.layers.37.block_sparse_moe.experts.116.w2", "model.layers.37.block_sparse_moe.experts.117.w2", "model.layers.37.block_sparse_moe.experts.118.w2", "model.layers.37.block_sparse_moe.experts.119.w2", "model.layers.37.block_sparse_moe.experts.120.w2", "model.layers.37.block_sparse_moe.experts.121.w2", "model.layers.37.block_sparse_moe.experts.122.w2", "model.layers.37.block_sparse_moe.experts.123.w2", "model.layers.37.block_sparse_moe.experts.124.w2", "model.layers.37.block_sparse_moe.experts.125.w2", "model.layers.37.block_sparse_moe.experts.126.w2", "model.layers.37.block_sparse_moe.experts.127.w2", "model.layers.37.block_sparse_moe.experts.128.w2", "model.layers.37.block_sparse_moe.experts.129.w2", "model.layers.37.block_sparse_moe.experts.130.w2", "model.layers.37.block_sparse_moe.experts.131.w2", "model.layers.37.block_sparse_moe.experts.132.w2", "model.layers.37.block_sparse_moe.experts.133.w2", "model.layers.37.block_sparse_moe.experts.134.w2", "model.layers.37.block_sparse_moe.experts.135.w2", "model.layers.37.block_sparse_moe.experts.136.w2", "model.layers.37.block_sparse_moe.experts.137.w2", "model.layers.37.block_sparse_moe.experts.138.w2", "model.layers.37.block_sparse_moe.experts.139.w2", "model.layers.37.block_sparse_moe.experts.140.w2", "model.layers.37.block_sparse_moe.experts.141.w2", "model.layers.37.block_sparse_moe.experts.142.w2", "model.layers.37.block_sparse_moe.experts.143.w2", "model.layers.37.block_sparse_moe.experts.144.w2", "model.layers.37.block_sparse_moe.experts.145.w2", "model.layers.37.block_sparse_moe.experts.146.w2", "model.layers.37.block_sparse_moe.experts.147.w2", "model.layers.37.block_sparse_moe.experts.148.w2", "model.layers.37.block_sparse_moe.experts.149.w2", "model.layers.37.block_sparse_moe.experts.150.w2", "model.layers.37.block_sparse_moe.experts.151.w2", "model.layers.37.block_sparse_moe.experts.152.w2", "model.layers.37.block_sparse_moe.experts.153.w2", "model.layers.37.block_sparse_moe.experts.154.w2", "model.layers.37.block_sparse_moe.experts.155.w2", "model.layers.37.block_sparse_moe.experts.156.w2", "model.layers.37.block_sparse_moe.experts.157.w2", "model.layers.37.block_sparse_moe.experts.158.w2", "model.layers.37.block_sparse_moe.experts.159.w2", "model.layers.37.block_sparse_moe.experts.160.w2", "model.layers.37.block_sparse_moe.experts.161.w2", "model.layers.37.block_sparse_moe.experts.162.w2", "model.layers.37.block_sparse_moe.experts.163.w2", "model.layers.37.block_sparse_moe.experts.164.w2", "model.layers.37.block_sparse_moe.experts.165.w2", "model.layers.37.block_sparse_moe.experts.166.w2", "model.layers.37.block_sparse_moe.experts.167.w2", "model.layers.37.block_sparse_moe.experts.168.w2", "model.layers.37.block_sparse_moe.experts.169.w2", "model.layers.37.block_sparse_moe.experts.170.w2", "model.layers.37.block_sparse_moe.experts.171.w2", "model.layers.37.block_sparse_moe.experts.172.w2", "model.layers.37.block_sparse_moe.experts.173.w2", "model.layers.37.block_sparse_moe.experts.174.w2", "model.layers.37.block_sparse_moe.experts.175.w2", "model.layers.37.block_sparse_moe.experts.176.w2", "model.layers.37.block_sparse_moe.experts.177.w2", "model.layers.37.block_sparse_moe.experts.178.w2", "model.layers.37.block_sparse_moe.experts.179.w2", "model.layers.37.block_sparse_moe.experts.180.w2", "model.layers.37.block_sparse_moe.experts.181.w2", "model.layers.37.block_sparse_moe.experts.182.w2", "model.layers.37.block_sparse_moe.experts.183.w2", "model.layers.37.block_sparse_moe.experts.184.w2", "model.layers.37.block_sparse_moe.experts.185.w2", "model.layers.37.block_sparse_moe.experts.186.w2", "model.layers.37.block_sparse_moe.experts.187.w2", "model.layers.37.block_sparse_moe.experts.188.w2", "model.layers.37.block_sparse_moe.experts.189.w2", "model.layers.37.block_sparse_moe.experts.190.w2", "model.layers.37.block_sparse_moe.experts.191.w2", "model.layers.37.block_sparse_moe.experts.192.w2", "model.layers.37.block_sparse_moe.experts.193.w2", "model.layers.37.block_sparse_moe.experts.194.w2", "model.layers.37.block_sparse_moe.experts.195.w2", "model.layers.37.block_sparse_moe.experts.196.w2", "model.layers.37.block_sparse_moe.experts.197.w2", "model.layers.37.block_sparse_moe.experts.198.w2", "model.layers.37.block_sparse_moe.experts.199.w2", "model.layers.37.block_sparse_moe.experts.200.w2", "model.layers.37.block_sparse_moe.experts.201.w2", "model.layers.37.block_sparse_moe.experts.202.w2", "model.layers.37.block_sparse_moe.experts.203.w2", "model.layers.37.block_sparse_moe.experts.204.w2", "model.layers.37.block_sparse_moe.experts.205.w2", "model.layers.37.block_sparse_moe.experts.206.w2", "model.layers.37.block_sparse_moe.experts.207.w2", "model.layers.37.block_sparse_moe.experts.208.w2", "model.layers.37.block_sparse_moe.experts.209.w2", "model.layers.37.block_sparse_moe.experts.210.w2", "model.layers.37.block_sparse_moe.experts.211.w2", "model.layers.37.block_sparse_moe.experts.212.w2", "model.layers.37.block_sparse_moe.experts.213.w2", "model.layers.37.block_sparse_moe.experts.214.w2", "model.layers.37.block_sparse_moe.experts.215.w2", "model.layers.37.block_sparse_moe.experts.216.w2", "model.layers.37.block_sparse_moe.experts.217.w2", "model.layers.37.block_sparse_moe.experts.218.w2", "model.layers.37.block_sparse_moe.experts.219.w2", "model.layers.37.block_sparse_moe.experts.220.w2", "model.layers.37.block_sparse_moe.experts.221.w2", "model.layers.37.block_sparse_moe.experts.222.w2", "model.layers.37.block_sparse_moe.experts.223.w2", "model.layers.37.block_sparse_moe.experts.224.w2", "model.layers.37.block_sparse_moe.experts.225.w2", "model.layers.37.block_sparse_moe.experts.226.w2", "model.layers.37.block_sparse_moe.experts.227.w2", "model.layers.37.block_sparse_moe.experts.228.w2", "model.layers.37.block_sparse_moe.experts.229.w2", "model.layers.37.block_sparse_moe.experts.230.w2", "model.layers.37.block_sparse_moe.experts.231.w2", "model.layers.37.block_sparse_moe.experts.232.w2", "model.layers.37.block_sparse_moe.experts.233.w2", "model.layers.37.block_sparse_moe.experts.234.w2", "model.layers.37.block_sparse_moe.experts.235.w2", "model.layers.37.block_sparse_moe.experts.236.w2", "model.layers.37.block_sparse_moe.experts.237.w2", "model.layers.37.block_sparse_moe.experts.238.w2", "model.layers.37.block_sparse_moe.experts.239.w2", "model.layers.37.block_sparse_moe.experts.240.w2", "model.layers.37.block_sparse_moe.experts.241.w2", "model.layers.37.block_sparse_moe.experts.242.w2", "model.layers.37.block_sparse_moe.experts.243.w2", "model.layers.37.block_sparse_moe.experts.244.w2", "model.layers.37.block_sparse_moe.experts.245.w2", "model.layers.37.block_sparse_moe.experts.246.w2", "model.layers.37.block_sparse_moe.experts.247.w2", "model.layers.37.block_sparse_moe.experts.248.w2", "model.layers.37.block_sparse_moe.experts.249.w2", "model.layers.37.block_sparse_moe.experts.250.w2", "model.layers.37.block_sparse_moe.experts.251.w2", "model.layers.37.block_sparse_moe.experts.252.w2", "model.layers.37.block_sparse_moe.experts.253.w2", "model.layers.37.block_sparse_moe.experts.254.w2", "model.layers.37.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005541162565350644, "dbits": 3623878656 } ] }, { "idx": 76, "layers": [ "model.layers.38.self_attn.q_proj", "model.layers.38.self_attn.k_proj", "model.layers.38.self_attn.v_proj", "model.layers.38.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005548894777893998, "dbits": 44040192 } ] }, { "idx": 77, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w1", "model.layers.38.block_sparse_moe.experts.1.w1", "model.layers.38.block_sparse_moe.experts.2.w1", "model.layers.38.block_sparse_moe.experts.3.w1", "model.layers.38.block_sparse_moe.experts.4.w1", "model.layers.38.block_sparse_moe.experts.5.w1", "model.layers.38.block_sparse_moe.experts.6.w1", "model.layers.38.block_sparse_moe.experts.7.w1", "model.layers.38.block_sparse_moe.experts.8.w1", "model.layers.38.block_sparse_moe.experts.9.w1", "model.layers.38.block_sparse_moe.experts.10.w1", "model.layers.38.block_sparse_moe.experts.11.w1", "model.layers.38.block_sparse_moe.experts.12.w1", "model.layers.38.block_sparse_moe.experts.13.w1", "model.layers.38.block_sparse_moe.experts.14.w1", "model.layers.38.block_sparse_moe.experts.15.w1", "model.layers.38.block_sparse_moe.experts.16.w1", "model.layers.38.block_sparse_moe.experts.17.w1", "model.layers.38.block_sparse_moe.experts.18.w1", "model.layers.38.block_sparse_moe.experts.19.w1", "model.layers.38.block_sparse_moe.experts.20.w1", "model.layers.38.block_sparse_moe.experts.21.w1", "model.layers.38.block_sparse_moe.experts.22.w1", "model.layers.38.block_sparse_moe.experts.23.w1", "model.layers.38.block_sparse_moe.experts.24.w1", "model.layers.38.block_sparse_moe.experts.25.w1", "model.layers.38.block_sparse_moe.experts.26.w1", "model.layers.38.block_sparse_moe.experts.27.w1", "model.layers.38.block_sparse_moe.experts.28.w1", "model.layers.38.block_sparse_moe.experts.29.w1", "model.layers.38.block_sparse_moe.experts.30.w1", "model.layers.38.block_sparse_moe.experts.31.w1", "model.layers.38.block_sparse_moe.experts.32.w1", "model.layers.38.block_sparse_moe.experts.33.w1", "model.layers.38.block_sparse_moe.experts.34.w1", "model.layers.38.block_sparse_moe.experts.35.w1", "model.layers.38.block_sparse_moe.experts.36.w1", "model.layers.38.block_sparse_moe.experts.37.w1", "model.layers.38.block_sparse_moe.experts.38.w1", "model.layers.38.block_sparse_moe.experts.39.w1", "model.layers.38.block_sparse_moe.experts.40.w1", "model.layers.38.block_sparse_moe.experts.41.w1", "model.layers.38.block_sparse_moe.experts.42.w1", "model.layers.38.block_sparse_moe.experts.43.w1", "model.layers.38.block_sparse_moe.experts.44.w1", "model.layers.38.block_sparse_moe.experts.45.w1", "model.layers.38.block_sparse_moe.experts.46.w1", "model.layers.38.block_sparse_moe.experts.47.w1", "model.layers.38.block_sparse_moe.experts.48.w1", "model.layers.38.block_sparse_moe.experts.49.w1", "model.layers.38.block_sparse_moe.experts.50.w1", "model.layers.38.block_sparse_moe.experts.51.w1", "model.layers.38.block_sparse_moe.experts.52.w1", "model.layers.38.block_sparse_moe.experts.53.w1", "model.layers.38.block_sparse_moe.experts.54.w1", "model.layers.38.block_sparse_moe.experts.55.w1", "model.layers.38.block_sparse_moe.experts.56.w1", "model.layers.38.block_sparse_moe.experts.57.w1", "model.layers.38.block_sparse_moe.experts.58.w1", "model.layers.38.block_sparse_moe.experts.59.w1", "model.layers.38.block_sparse_moe.experts.60.w1", "model.layers.38.block_sparse_moe.experts.61.w1", "model.layers.38.block_sparse_moe.experts.62.w1", "model.layers.38.block_sparse_moe.experts.63.w1", "model.layers.38.block_sparse_moe.experts.64.w1", "model.layers.38.block_sparse_moe.experts.65.w1", "model.layers.38.block_sparse_moe.experts.66.w1", "model.layers.38.block_sparse_moe.experts.67.w1", "model.layers.38.block_sparse_moe.experts.68.w1", "model.layers.38.block_sparse_moe.experts.69.w1", "model.layers.38.block_sparse_moe.experts.70.w1", "model.layers.38.block_sparse_moe.experts.71.w1", "model.layers.38.block_sparse_moe.experts.72.w1", "model.layers.38.block_sparse_moe.experts.73.w1", "model.layers.38.block_sparse_moe.experts.74.w1", "model.layers.38.block_sparse_moe.experts.75.w1", "model.layers.38.block_sparse_moe.experts.76.w1", "model.layers.38.block_sparse_moe.experts.77.w1", "model.layers.38.block_sparse_moe.experts.78.w1", "model.layers.38.block_sparse_moe.experts.79.w1", "model.layers.38.block_sparse_moe.experts.80.w1", "model.layers.38.block_sparse_moe.experts.81.w1", "model.layers.38.block_sparse_moe.experts.82.w1", "model.layers.38.block_sparse_moe.experts.83.w1", "model.layers.38.block_sparse_moe.experts.84.w1", "model.layers.38.block_sparse_moe.experts.85.w1", "model.layers.38.block_sparse_moe.experts.86.w1", "model.layers.38.block_sparse_moe.experts.87.w1", "model.layers.38.block_sparse_moe.experts.88.w1", "model.layers.38.block_sparse_moe.experts.89.w1", "model.layers.38.block_sparse_moe.experts.90.w1", "model.layers.38.block_sparse_moe.experts.91.w1", "model.layers.38.block_sparse_moe.experts.92.w1", "model.layers.38.block_sparse_moe.experts.93.w1", "model.layers.38.block_sparse_moe.experts.94.w1", "model.layers.38.block_sparse_moe.experts.95.w1", "model.layers.38.block_sparse_moe.experts.96.w1", "model.layers.38.block_sparse_moe.experts.97.w1", "model.layers.38.block_sparse_moe.experts.98.w1", "model.layers.38.block_sparse_moe.experts.99.w1", "model.layers.38.block_sparse_moe.experts.100.w1", "model.layers.38.block_sparse_moe.experts.101.w1", "model.layers.38.block_sparse_moe.experts.102.w1", "model.layers.38.block_sparse_moe.experts.103.w1", "model.layers.38.block_sparse_moe.experts.104.w1", "model.layers.38.block_sparse_moe.experts.105.w1", "model.layers.38.block_sparse_moe.experts.106.w1", "model.layers.38.block_sparse_moe.experts.107.w1", "model.layers.38.block_sparse_moe.experts.108.w1", "model.layers.38.block_sparse_moe.experts.109.w1", "model.layers.38.block_sparse_moe.experts.110.w1", "model.layers.38.block_sparse_moe.experts.111.w1", "model.layers.38.block_sparse_moe.experts.112.w1", "model.layers.38.block_sparse_moe.experts.113.w1", "model.layers.38.block_sparse_moe.experts.114.w1", "model.layers.38.block_sparse_moe.experts.115.w1", "model.layers.38.block_sparse_moe.experts.116.w1", "model.layers.38.block_sparse_moe.experts.117.w1", "model.layers.38.block_sparse_moe.experts.118.w1", "model.layers.38.block_sparse_moe.experts.119.w1", "model.layers.38.block_sparse_moe.experts.120.w1", "model.layers.38.block_sparse_moe.experts.121.w1", "model.layers.38.block_sparse_moe.experts.122.w1", "model.layers.38.block_sparse_moe.experts.123.w1", "model.layers.38.block_sparse_moe.experts.124.w1", "model.layers.38.block_sparse_moe.experts.125.w1", "model.layers.38.block_sparse_moe.experts.126.w1", "model.layers.38.block_sparse_moe.experts.127.w1", "model.layers.38.block_sparse_moe.experts.128.w1", "model.layers.38.block_sparse_moe.experts.129.w1", "model.layers.38.block_sparse_moe.experts.130.w1", "model.layers.38.block_sparse_moe.experts.131.w1", "model.layers.38.block_sparse_moe.experts.132.w1", "model.layers.38.block_sparse_moe.experts.133.w1", "model.layers.38.block_sparse_moe.experts.134.w1", "model.layers.38.block_sparse_moe.experts.135.w1", "model.layers.38.block_sparse_moe.experts.136.w1", "model.layers.38.block_sparse_moe.experts.137.w1", "model.layers.38.block_sparse_moe.experts.138.w1", "model.layers.38.block_sparse_moe.experts.139.w1", "model.layers.38.block_sparse_moe.experts.140.w1", "model.layers.38.block_sparse_moe.experts.141.w1", "model.layers.38.block_sparse_moe.experts.142.w1", "model.layers.38.block_sparse_moe.experts.143.w1", "model.layers.38.block_sparse_moe.experts.144.w1", "model.layers.38.block_sparse_moe.experts.145.w1", "model.layers.38.block_sparse_moe.experts.146.w1", "model.layers.38.block_sparse_moe.experts.147.w1", "model.layers.38.block_sparse_moe.experts.148.w1", "model.layers.38.block_sparse_moe.experts.149.w1", "model.layers.38.block_sparse_moe.experts.150.w1", "model.layers.38.block_sparse_moe.experts.151.w1", "model.layers.38.block_sparse_moe.experts.152.w1", "model.layers.38.block_sparse_moe.experts.153.w1", "model.layers.38.block_sparse_moe.experts.154.w1", "model.layers.38.block_sparse_moe.experts.155.w1", "model.layers.38.block_sparse_moe.experts.156.w1", "model.layers.38.block_sparse_moe.experts.157.w1", "model.layers.38.block_sparse_moe.experts.158.w1", "model.layers.38.block_sparse_moe.experts.159.w1", "model.layers.38.block_sparse_moe.experts.160.w1", "model.layers.38.block_sparse_moe.experts.161.w1", "model.layers.38.block_sparse_moe.experts.162.w1", "model.layers.38.block_sparse_moe.experts.163.w1", "model.layers.38.block_sparse_moe.experts.164.w1", "model.layers.38.block_sparse_moe.experts.165.w1", "model.layers.38.block_sparse_moe.experts.166.w1", "model.layers.38.block_sparse_moe.experts.167.w1", "model.layers.38.block_sparse_moe.experts.168.w1", "model.layers.38.block_sparse_moe.experts.169.w1", "model.layers.38.block_sparse_moe.experts.170.w1", "model.layers.38.block_sparse_moe.experts.171.w1", "model.layers.38.block_sparse_moe.experts.172.w1", "model.layers.38.block_sparse_moe.experts.173.w1", "model.layers.38.block_sparse_moe.experts.174.w1", "model.layers.38.block_sparse_moe.experts.175.w1", "model.layers.38.block_sparse_moe.experts.176.w1", "model.layers.38.block_sparse_moe.experts.177.w1", "model.layers.38.block_sparse_moe.experts.178.w1", "model.layers.38.block_sparse_moe.experts.179.w1", "model.layers.38.block_sparse_moe.experts.180.w1", "model.layers.38.block_sparse_moe.experts.181.w1", "model.layers.38.block_sparse_moe.experts.182.w1", "model.layers.38.block_sparse_moe.experts.183.w1", "model.layers.38.block_sparse_moe.experts.184.w1", "model.layers.38.block_sparse_moe.experts.185.w1", "model.layers.38.block_sparse_moe.experts.186.w1", "model.layers.38.block_sparse_moe.experts.187.w1", "model.layers.38.block_sparse_moe.experts.188.w1", "model.layers.38.block_sparse_moe.experts.189.w1", "model.layers.38.block_sparse_moe.experts.190.w1", "model.layers.38.block_sparse_moe.experts.191.w1", "model.layers.38.block_sparse_moe.experts.192.w1", "model.layers.38.block_sparse_moe.experts.193.w1", "model.layers.38.block_sparse_moe.experts.194.w1", "model.layers.38.block_sparse_moe.experts.195.w1", "model.layers.38.block_sparse_moe.experts.196.w1", "model.layers.38.block_sparse_moe.experts.197.w1", "model.layers.38.block_sparse_moe.experts.198.w1", "model.layers.38.block_sparse_moe.experts.199.w1", "model.layers.38.block_sparse_moe.experts.200.w1", "model.layers.38.block_sparse_moe.experts.201.w1", "model.layers.38.block_sparse_moe.experts.202.w1", "model.layers.38.block_sparse_moe.experts.203.w1", "model.layers.38.block_sparse_moe.experts.204.w1", "model.layers.38.block_sparse_moe.experts.205.w1", "model.layers.38.block_sparse_moe.experts.206.w1", "model.layers.38.block_sparse_moe.experts.207.w1", "model.layers.38.block_sparse_moe.experts.208.w1", "model.layers.38.block_sparse_moe.experts.209.w1", "model.layers.38.block_sparse_moe.experts.210.w1", "model.layers.38.block_sparse_moe.experts.211.w1", "model.layers.38.block_sparse_moe.experts.212.w1", "model.layers.38.block_sparse_moe.experts.213.w1", "model.layers.38.block_sparse_moe.experts.214.w1", "model.layers.38.block_sparse_moe.experts.215.w1", "model.layers.38.block_sparse_moe.experts.216.w1", "model.layers.38.block_sparse_moe.experts.217.w1", "model.layers.38.block_sparse_moe.experts.218.w1", "model.layers.38.block_sparse_moe.experts.219.w1", "model.layers.38.block_sparse_moe.experts.220.w1", "model.layers.38.block_sparse_moe.experts.221.w1", "model.layers.38.block_sparse_moe.experts.222.w1", "model.layers.38.block_sparse_moe.experts.223.w1", "model.layers.38.block_sparse_moe.experts.224.w1", "model.layers.38.block_sparse_moe.experts.225.w1", "model.layers.38.block_sparse_moe.experts.226.w1", "model.layers.38.block_sparse_moe.experts.227.w1", "model.layers.38.block_sparse_moe.experts.228.w1", "model.layers.38.block_sparse_moe.experts.229.w1", "model.layers.38.block_sparse_moe.experts.230.w1", "model.layers.38.block_sparse_moe.experts.231.w1", "model.layers.38.block_sparse_moe.experts.232.w1", "model.layers.38.block_sparse_moe.experts.233.w1", "model.layers.38.block_sparse_moe.experts.234.w1", "model.layers.38.block_sparse_moe.experts.235.w1", "model.layers.38.block_sparse_moe.experts.236.w1", "model.layers.38.block_sparse_moe.experts.237.w1", "model.layers.38.block_sparse_moe.experts.238.w1", "model.layers.38.block_sparse_moe.experts.239.w1", "model.layers.38.block_sparse_moe.experts.240.w1", "model.layers.38.block_sparse_moe.experts.241.w1", "model.layers.38.block_sparse_moe.experts.242.w1", "model.layers.38.block_sparse_moe.experts.243.w1", "model.layers.38.block_sparse_moe.experts.244.w1", "model.layers.38.block_sparse_moe.experts.245.w1", "model.layers.38.block_sparse_moe.experts.246.w1", "model.layers.38.block_sparse_moe.experts.247.w1", "model.layers.38.block_sparse_moe.experts.248.w1", "model.layers.38.block_sparse_moe.experts.249.w1", "model.layers.38.block_sparse_moe.experts.250.w1", "model.layers.38.block_sparse_moe.experts.251.w1", "model.layers.38.block_sparse_moe.experts.252.w1", "model.layers.38.block_sparse_moe.experts.253.w1", "model.layers.38.block_sparse_moe.experts.254.w1", "model.layers.38.block_sparse_moe.experts.255.w1", "model.layers.38.block_sparse_moe.experts.0.w3", "model.layers.38.block_sparse_moe.experts.1.w3", "model.layers.38.block_sparse_moe.experts.2.w3", "model.layers.38.block_sparse_moe.experts.3.w3", "model.layers.38.block_sparse_moe.experts.4.w3", "model.layers.38.block_sparse_moe.experts.5.w3", "model.layers.38.block_sparse_moe.experts.6.w3", "model.layers.38.block_sparse_moe.experts.7.w3", "model.layers.38.block_sparse_moe.experts.8.w3", "model.layers.38.block_sparse_moe.experts.9.w3", "model.layers.38.block_sparse_moe.experts.10.w3", "model.layers.38.block_sparse_moe.experts.11.w3", "model.layers.38.block_sparse_moe.experts.12.w3", "model.layers.38.block_sparse_moe.experts.13.w3", "model.layers.38.block_sparse_moe.experts.14.w3", "model.layers.38.block_sparse_moe.experts.15.w3", "model.layers.38.block_sparse_moe.experts.16.w3", "model.layers.38.block_sparse_moe.experts.17.w3", "model.layers.38.block_sparse_moe.experts.18.w3", "model.layers.38.block_sparse_moe.experts.19.w3", "model.layers.38.block_sparse_moe.experts.20.w3", "model.layers.38.block_sparse_moe.experts.21.w3", "model.layers.38.block_sparse_moe.experts.22.w3", "model.layers.38.block_sparse_moe.experts.23.w3", "model.layers.38.block_sparse_moe.experts.24.w3", "model.layers.38.block_sparse_moe.experts.25.w3", "model.layers.38.block_sparse_moe.experts.26.w3", "model.layers.38.block_sparse_moe.experts.27.w3", "model.layers.38.block_sparse_moe.experts.28.w3", "model.layers.38.block_sparse_moe.experts.29.w3", "model.layers.38.block_sparse_moe.experts.30.w3", "model.layers.38.block_sparse_moe.experts.31.w3", "model.layers.38.block_sparse_moe.experts.32.w3", "model.layers.38.block_sparse_moe.experts.33.w3", "model.layers.38.block_sparse_moe.experts.34.w3", "model.layers.38.block_sparse_moe.experts.35.w3", "model.layers.38.block_sparse_moe.experts.36.w3", "model.layers.38.block_sparse_moe.experts.37.w3", "model.layers.38.block_sparse_moe.experts.38.w3", "model.layers.38.block_sparse_moe.experts.39.w3", "model.layers.38.block_sparse_moe.experts.40.w3", "model.layers.38.block_sparse_moe.experts.41.w3", "model.layers.38.block_sparse_moe.experts.42.w3", "model.layers.38.block_sparse_moe.experts.43.w3", "model.layers.38.block_sparse_moe.experts.44.w3", "model.layers.38.block_sparse_moe.experts.45.w3", "model.layers.38.block_sparse_moe.experts.46.w3", "model.layers.38.block_sparse_moe.experts.47.w3", "model.layers.38.block_sparse_moe.experts.48.w3", "model.layers.38.block_sparse_moe.experts.49.w3", "model.layers.38.block_sparse_moe.experts.50.w3", "model.layers.38.block_sparse_moe.experts.51.w3", "model.layers.38.block_sparse_moe.experts.52.w3", "model.layers.38.block_sparse_moe.experts.53.w3", "model.layers.38.block_sparse_moe.experts.54.w3", "model.layers.38.block_sparse_moe.experts.55.w3", "model.layers.38.block_sparse_moe.experts.56.w3", "model.layers.38.block_sparse_moe.experts.57.w3", "model.layers.38.block_sparse_moe.experts.58.w3", "model.layers.38.block_sparse_moe.experts.59.w3", "model.layers.38.block_sparse_moe.experts.60.w3", "model.layers.38.block_sparse_moe.experts.61.w3", "model.layers.38.block_sparse_moe.experts.62.w3", "model.layers.38.block_sparse_moe.experts.63.w3", "model.layers.38.block_sparse_moe.experts.64.w3", "model.layers.38.block_sparse_moe.experts.65.w3", "model.layers.38.block_sparse_moe.experts.66.w3", "model.layers.38.block_sparse_moe.experts.67.w3", "model.layers.38.block_sparse_moe.experts.68.w3", "model.layers.38.block_sparse_moe.experts.69.w3", "model.layers.38.block_sparse_moe.experts.70.w3", "model.layers.38.block_sparse_moe.experts.71.w3", "model.layers.38.block_sparse_moe.experts.72.w3", "model.layers.38.block_sparse_moe.experts.73.w3", "model.layers.38.block_sparse_moe.experts.74.w3", "model.layers.38.block_sparse_moe.experts.75.w3", "model.layers.38.block_sparse_moe.experts.76.w3", "model.layers.38.block_sparse_moe.experts.77.w3", "model.layers.38.block_sparse_moe.experts.78.w3", "model.layers.38.block_sparse_moe.experts.79.w3", "model.layers.38.block_sparse_moe.experts.80.w3", "model.layers.38.block_sparse_moe.experts.81.w3", "model.layers.38.block_sparse_moe.experts.82.w3", "model.layers.38.block_sparse_moe.experts.83.w3", "model.layers.38.block_sparse_moe.experts.84.w3", "model.layers.38.block_sparse_moe.experts.85.w3", "model.layers.38.block_sparse_moe.experts.86.w3", "model.layers.38.block_sparse_moe.experts.87.w3", "model.layers.38.block_sparse_moe.experts.88.w3", "model.layers.38.block_sparse_moe.experts.89.w3", "model.layers.38.block_sparse_moe.experts.90.w3", "model.layers.38.block_sparse_moe.experts.91.w3", "model.layers.38.block_sparse_moe.experts.92.w3", "model.layers.38.block_sparse_moe.experts.93.w3", "model.layers.38.block_sparse_moe.experts.94.w3", "model.layers.38.block_sparse_moe.experts.95.w3", "model.layers.38.block_sparse_moe.experts.96.w3", "model.layers.38.block_sparse_moe.experts.97.w3", "model.layers.38.block_sparse_moe.experts.98.w3", "model.layers.38.block_sparse_moe.experts.99.w3", "model.layers.38.block_sparse_moe.experts.100.w3", "model.layers.38.block_sparse_moe.experts.101.w3", "model.layers.38.block_sparse_moe.experts.102.w3", "model.layers.38.block_sparse_moe.experts.103.w3", "model.layers.38.block_sparse_moe.experts.104.w3", "model.layers.38.block_sparse_moe.experts.105.w3", "model.layers.38.block_sparse_moe.experts.106.w3", "model.layers.38.block_sparse_moe.experts.107.w3", "model.layers.38.block_sparse_moe.experts.108.w3", "model.layers.38.block_sparse_moe.experts.109.w3", "model.layers.38.block_sparse_moe.experts.110.w3", "model.layers.38.block_sparse_moe.experts.111.w3", "model.layers.38.block_sparse_moe.experts.112.w3", "model.layers.38.block_sparse_moe.experts.113.w3", "model.layers.38.block_sparse_moe.experts.114.w3", "model.layers.38.block_sparse_moe.experts.115.w3", "model.layers.38.block_sparse_moe.experts.116.w3", "model.layers.38.block_sparse_moe.experts.117.w3", "model.layers.38.block_sparse_moe.experts.118.w3", "model.layers.38.block_sparse_moe.experts.119.w3", "model.layers.38.block_sparse_moe.experts.120.w3", "model.layers.38.block_sparse_moe.experts.121.w3", "model.layers.38.block_sparse_moe.experts.122.w3", "model.layers.38.block_sparse_moe.experts.123.w3", "model.layers.38.block_sparse_moe.experts.124.w3", "model.layers.38.block_sparse_moe.experts.125.w3", "model.layers.38.block_sparse_moe.experts.126.w3", "model.layers.38.block_sparse_moe.experts.127.w3", "model.layers.38.block_sparse_moe.experts.128.w3", "model.layers.38.block_sparse_moe.experts.129.w3", "model.layers.38.block_sparse_moe.experts.130.w3", "model.layers.38.block_sparse_moe.experts.131.w3", "model.layers.38.block_sparse_moe.experts.132.w3", "model.layers.38.block_sparse_moe.experts.133.w3", "model.layers.38.block_sparse_moe.experts.134.w3", "model.layers.38.block_sparse_moe.experts.135.w3", "model.layers.38.block_sparse_moe.experts.136.w3", "model.layers.38.block_sparse_moe.experts.137.w3", "model.layers.38.block_sparse_moe.experts.138.w3", "model.layers.38.block_sparse_moe.experts.139.w3", "model.layers.38.block_sparse_moe.experts.140.w3", "model.layers.38.block_sparse_moe.experts.141.w3", "model.layers.38.block_sparse_moe.experts.142.w3", "model.layers.38.block_sparse_moe.experts.143.w3", "model.layers.38.block_sparse_moe.experts.144.w3", "model.layers.38.block_sparse_moe.experts.145.w3", "model.layers.38.block_sparse_moe.experts.146.w3", "model.layers.38.block_sparse_moe.experts.147.w3", "model.layers.38.block_sparse_moe.experts.148.w3", "model.layers.38.block_sparse_moe.experts.149.w3", "model.layers.38.block_sparse_moe.experts.150.w3", "model.layers.38.block_sparse_moe.experts.151.w3", "model.layers.38.block_sparse_moe.experts.152.w3", "model.layers.38.block_sparse_moe.experts.153.w3", "model.layers.38.block_sparse_moe.experts.154.w3", "model.layers.38.block_sparse_moe.experts.155.w3", "model.layers.38.block_sparse_moe.experts.156.w3", "model.layers.38.block_sparse_moe.experts.157.w3", "model.layers.38.block_sparse_moe.experts.158.w3", "model.layers.38.block_sparse_moe.experts.159.w3", "model.layers.38.block_sparse_moe.experts.160.w3", "model.layers.38.block_sparse_moe.experts.161.w3", "model.layers.38.block_sparse_moe.experts.162.w3", "model.layers.38.block_sparse_moe.experts.163.w3", "model.layers.38.block_sparse_moe.experts.164.w3", "model.layers.38.block_sparse_moe.experts.165.w3", "model.layers.38.block_sparse_moe.experts.166.w3", "model.layers.38.block_sparse_moe.experts.167.w3", "model.layers.38.block_sparse_moe.experts.168.w3", "model.layers.38.block_sparse_moe.experts.169.w3", "model.layers.38.block_sparse_moe.experts.170.w3", "model.layers.38.block_sparse_moe.experts.171.w3", "model.layers.38.block_sparse_moe.experts.172.w3", "model.layers.38.block_sparse_moe.experts.173.w3", "model.layers.38.block_sparse_moe.experts.174.w3", "model.layers.38.block_sparse_moe.experts.175.w3", "model.layers.38.block_sparse_moe.experts.176.w3", "model.layers.38.block_sparse_moe.experts.177.w3", "model.layers.38.block_sparse_moe.experts.178.w3", "model.layers.38.block_sparse_moe.experts.179.w3", "model.layers.38.block_sparse_moe.experts.180.w3", "model.layers.38.block_sparse_moe.experts.181.w3", "model.layers.38.block_sparse_moe.experts.182.w3", "model.layers.38.block_sparse_moe.experts.183.w3", "model.layers.38.block_sparse_moe.experts.184.w3", "model.layers.38.block_sparse_moe.experts.185.w3", "model.layers.38.block_sparse_moe.experts.186.w3", "model.layers.38.block_sparse_moe.experts.187.w3", "model.layers.38.block_sparse_moe.experts.188.w3", "model.layers.38.block_sparse_moe.experts.189.w3", "model.layers.38.block_sparse_moe.experts.190.w3", "model.layers.38.block_sparse_moe.experts.191.w3", "model.layers.38.block_sparse_moe.experts.192.w3", "model.layers.38.block_sparse_moe.experts.193.w3", "model.layers.38.block_sparse_moe.experts.194.w3", "model.layers.38.block_sparse_moe.experts.195.w3", "model.layers.38.block_sparse_moe.experts.196.w3", "model.layers.38.block_sparse_moe.experts.197.w3", "model.layers.38.block_sparse_moe.experts.198.w3", "model.layers.38.block_sparse_moe.experts.199.w3", "model.layers.38.block_sparse_moe.experts.200.w3", "model.layers.38.block_sparse_moe.experts.201.w3", "model.layers.38.block_sparse_moe.experts.202.w3", "model.layers.38.block_sparse_moe.experts.203.w3", "model.layers.38.block_sparse_moe.experts.204.w3", "model.layers.38.block_sparse_moe.experts.205.w3", "model.layers.38.block_sparse_moe.experts.206.w3", "model.layers.38.block_sparse_moe.experts.207.w3", "model.layers.38.block_sparse_moe.experts.208.w3", "model.layers.38.block_sparse_moe.experts.209.w3", "model.layers.38.block_sparse_moe.experts.210.w3", "model.layers.38.block_sparse_moe.experts.211.w3", "model.layers.38.block_sparse_moe.experts.212.w3", "model.layers.38.block_sparse_moe.experts.213.w3", "model.layers.38.block_sparse_moe.experts.214.w3", "model.layers.38.block_sparse_moe.experts.215.w3", "model.layers.38.block_sparse_moe.experts.216.w3", "model.layers.38.block_sparse_moe.experts.217.w3", "model.layers.38.block_sparse_moe.experts.218.w3", "model.layers.38.block_sparse_moe.experts.219.w3", "model.layers.38.block_sparse_moe.experts.220.w3", "model.layers.38.block_sparse_moe.experts.221.w3", "model.layers.38.block_sparse_moe.experts.222.w3", "model.layers.38.block_sparse_moe.experts.223.w3", "model.layers.38.block_sparse_moe.experts.224.w3", "model.layers.38.block_sparse_moe.experts.225.w3", "model.layers.38.block_sparse_moe.experts.226.w3", "model.layers.38.block_sparse_moe.experts.227.w3", "model.layers.38.block_sparse_moe.experts.228.w3", "model.layers.38.block_sparse_moe.experts.229.w3", "model.layers.38.block_sparse_moe.experts.230.w3", "model.layers.38.block_sparse_moe.experts.231.w3", "model.layers.38.block_sparse_moe.experts.232.w3", "model.layers.38.block_sparse_moe.experts.233.w3", "model.layers.38.block_sparse_moe.experts.234.w3", "model.layers.38.block_sparse_moe.experts.235.w3", "model.layers.38.block_sparse_moe.experts.236.w3", "model.layers.38.block_sparse_moe.experts.237.w3", "model.layers.38.block_sparse_moe.experts.238.w3", "model.layers.38.block_sparse_moe.experts.239.w3", "model.layers.38.block_sparse_moe.experts.240.w3", "model.layers.38.block_sparse_moe.experts.241.w3", "model.layers.38.block_sparse_moe.experts.242.w3", "model.layers.38.block_sparse_moe.experts.243.w3", "model.layers.38.block_sparse_moe.experts.244.w3", "model.layers.38.block_sparse_moe.experts.245.w3", "model.layers.38.block_sparse_moe.experts.246.w3", "model.layers.38.block_sparse_moe.experts.247.w3", "model.layers.38.block_sparse_moe.experts.248.w3", "model.layers.38.block_sparse_moe.experts.249.w3", "model.layers.38.block_sparse_moe.experts.250.w3", "model.layers.38.block_sparse_moe.experts.251.w3", "model.layers.38.block_sparse_moe.experts.252.w3", "model.layers.38.block_sparse_moe.experts.253.w3", "model.layers.38.block_sparse_moe.experts.254.w3", "model.layers.38.block_sparse_moe.experts.255.w3", "model.layers.38.block_sparse_moe.experts.0.w2", "model.layers.38.block_sparse_moe.experts.1.w2", "model.layers.38.block_sparse_moe.experts.2.w2", "model.layers.38.block_sparse_moe.experts.3.w2", "model.layers.38.block_sparse_moe.experts.4.w2", "model.layers.38.block_sparse_moe.experts.5.w2", "model.layers.38.block_sparse_moe.experts.6.w2", "model.layers.38.block_sparse_moe.experts.7.w2", "model.layers.38.block_sparse_moe.experts.8.w2", "model.layers.38.block_sparse_moe.experts.9.w2", "model.layers.38.block_sparse_moe.experts.10.w2", "model.layers.38.block_sparse_moe.experts.11.w2", "model.layers.38.block_sparse_moe.experts.12.w2", "model.layers.38.block_sparse_moe.experts.13.w2", "model.layers.38.block_sparse_moe.experts.14.w2", "model.layers.38.block_sparse_moe.experts.15.w2", "model.layers.38.block_sparse_moe.experts.16.w2", "model.layers.38.block_sparse_moe.experts.17.w2", "model.layers.38.block_sparse_moe.experts.18.w2", "model.layers.38.block_sparse_moe.experts.19.w2", "model.layers.38.block_sparse_moe.experts.20.w2", "model.layers.38.block_sparse_moe.experts.21.w2", "model.layers.38.block_sparse_moe.experts.22.w2", "model.layers.38.block_sparse_moe.experts.23.w2", "model.layers.38.block_sparse_moe.experts.24.w2", "model.layers.38.block_sparse_moe.experts.25.w2", "model.layers.38.block_sparse_moe.experts.26.w2", "model.layers.38.block_sparse_moe.experts.27.w2", "model.layers.38.block_sparse_moe.experts.28.w2", "model.layers.38.block_sparse_moe.experts.29.w2", "model.layers.38.block_sparse_moe.experts.30.w2", "model.layers.38.block_sparse_moe.experts.31.w2", "model.layers.38.block_sparse_moe.experts.32.w2", "model.layers.38.block_sparse_moe.experts.33.w2", "model.layers.38.block_sparse_moe.experts.34.w2", "model.layers.38.block_sparse_moe.experts.35.w2", "model.layers.38.block_sparse_moe.experts.36.w2", "model.layers.38.block_sparse_moe.experts.37.w2", "model.layers.38.block_sparse_moe.experts.38.w2", "model.layers.38.block_sparse_moe.experts.39.w2", "model.layers.38.block_sparse_moe.experts.40.w2", "model.layers.38.block_sparse_moe.experts.41.w2", "model.layers.38.block_sparse_moe.experts.42.w2", "model.layers.38.block_sparse_moe.experts.43.w2", "model.layers.38.block_sparse_moe.experts.44.w2", "model.layers.38.block_sparse_moe.experts.45.w2", "model.layers.38.block_sparse_moe.experts.46.w2", "model.layers.38.block_sparse_moe.experts.47.w2", "model.layers.38.block_sparse_moe.experts.48.w2", "model.layers.38.block_sparse_moe.experts.49.w2", "model.layers.38.block_sparse_moe.experts.50.w2", "model.layers.38.block_sparse_moe.experts.51.w2", "model.layers.38.block_sparse_moe.experts.52.w2", "model.layers.38.block_sparse_moe.experts.53.w2", "model.layers.38.block_sparse_moe.experts.54.w2", "model.layers.38.block_sparse_moe.experts.55.w2", "model.layers.38.block_sparse_moe.experts.56.w2", "model.layers.38.block_sparse_moe.experts.57.w2", "model.layers.38.block_sparse_moe.experts.58.w2", "model.layers.38.block_sparse_moe.experts.59.w2", "model.layers.38.block_sparse_moe.experts.60.w2", "model.layers.38.block_sparse_moe.experts.61.w2", "model.layers.38.block_sparse_moe.experts.62.w2", "model.layers.38.block_sparse_moe.experts.63.w2", "model.layers.38.block_sparse_moe.experts.64.w2", "model.layers.38.block_sparse_moe.experts.65.w2", "model.layers.38.block_sparse_moe.experts.66.w2", "model.layers.38.block_sparse_moe.experts.67.w2", "model.layers.38.block_sparse_moe.experts.68.w2", "model.layers.38.block_sparse_moe.experts.69.w2", "model.layers.38.block_sparse_moe.experts.70.w2", "model.layers.38.block_sparse_moe.experts.71.w2", "model.layers.38.block_sparse_moe.experts.72.w2", "model.layers.38.block_sparse_moe.experts.73.w2", "model.layers.38.block_sparse_moe.experts.74.w2", "model.layers.38.block_sparse_moe.experts.75.w2", "model.layers.38.block_sparse_moe.experts.76.w2", "model.layers.38.block_sparse_moe.experts.77.w2", "model.layers.38.block_sparse_moe.experts.78.w2", "model.layers.38.block_sparse_moe.experts.79.w2", "model.layers.38.block_sparse_moe.experts.80.w2", "model.layers.38.block_sparse_moe.experts.81.w2", "model.layers.38.block_sparse_moe.experts.82.w2", "model.layers.38.block_sparse_moe.experts.83.w2", "model.layers.38.block_sparse_moe.experts.84.w2", "model.layers.38.block_sparse_moe.experts.85.w2", "model.layers.38.block_sparse_moe.experts.86.w2", "model.layers.38.block_sparse_moe.experts.87.w2", "model.layers.38.block_sparse_moe.experts.88.w2", "model.layers.38.block_sparse_moe.experts.89.w2", "model.layers.38.block_sparse_moe.experts.90.w2", "model.layers.38.block_sparse_moe.experts.91.w2", "model.layers.38.block_sparse_moe.experts.92.w2", "model.layers.38.block_sparse_moe.experts.93.w2", "model.layers.38.block_sparse_moe.experts.94.w2", "model.layers.38.block_sparse_moe.experts.95.w2", "model.layers.38.block_sparse_moe.experts.96.w2", "model.layers.38.block_sparse_moe.experts.97.w2", "model.layers.38.block_sparse_moe.experts.98.w2", "model.layers.38.block_sparse_moe.experts.99.w2", "model.layers.38.block_sparse_moe.experts.100.w2", "model.layers.38.block_sparse_moe.experts.101.w2", "model.layers.38.block_sparse_moe.experts.102.w2", "model.layers.38.block_sparse_moe.experts.103.w2", "model.layers.38.block_sparse_moe.experts.104.w2", "model.layers.38.block_sparse_moe.experts.105.w2", "model.layers.38.block_sparse_moe.experts.106.w2", "model.layers.38.block_sparse_moe.experts.107.w2", "model.layers.38.block_sparse_moe.experts.108.w2", "model.layers.38.block_sparse_moe.experts.109.w2", "model.layers.38.block_sparse_moe.experts.110.w2", "model.layers.38.block_sparse_moe.experts.111.w2", "model.layers.38.block_sparse_moe.experts.112.w2", "model.layers.38.block_sparse_moe.experts.113.w2", "model.layers.38.block_sparse_moe.experts.114.w2", "model.layers.38.block_sparse_moe.experts.115.w2", "model.layers.38.block_sparse_moe.experts.116.w2", "model.layers.38.block_sparse_moe.experts.117.w2", "model.layers.38.block_sparse_moe.experts.118.w2", "model.layers.38.block_sparse_moe.experts.119.w2", "model.layers.38.block_sparse_moe.experts.120.w2", "model.layers.38.block_sparse_moe.experts.121.w2", "model.layers.38.block_sparse_moe.experts.122.w2", "model.layers.38.block_sparse_moe.experts.123.w2", "model.layers.38.block_sparse_moe.experts.124.w2", "model.layers.38.block_sparse_moe.experts.125.w2", "model.layers.38.block_sparse_moe.experts.126.w2", "model.layers.38.block_sparse_moe.experts.127.w2", "model.layers.38.block_sparse_moe.experts.128.w2", "model.layers.38.block_sparse_moe.experts.129.w2", "model.layers.38.block_sparse_moe.experts.130.w2", "model.layers.38.block_sparse_moe.experts.131.w2", "model.layers.38.block_sparse_moe.experts.132.w2", "model.layers.38.block_sparse_moe.experts.133.w2", "model.layers.38.block_sparse_moe.experts.134.w2", "model.layers.38.block_sparse_moe.experts.135.w2", "model.layers.38.block_sparse_moe.experts.136.w2", "model.layers.38.block_sparse_moe.experts.137.w2", "model.layers.38.block_sparse_moe.experts.138.w2", "model.layers.38.block_sparse_moe.experts.139.w2", "model.layers.38.block_sparse_moe.experts.140.w2", "model.layers.38.block_sparse_moe.experts.141.w2", "model.layers.38.block_sparse_moe.experts.142.w2", "model.layers.38.block_sparse_moe.experts.143.w2", "model.layers.38.block_sparse_moe.experts.144.w2", "model.layers.38.block_sparse_moe.experts.145.w2", "model.layers.38.block_sparse_moe.experts.146.w2", "model.layers.38.block_sparse_moe.experts.147.w2", "model.layers.38.block_sparse_moe.experts.148.w2", "model.layers.38.block_sparse_moe.experts.149.w2", "model.layers.38.block_sparse_moe.experts.150.w2", "model.layers.38.block_sparse_moe.experts.151.w2", "model.layers.38.block_sparse_moe.experts.152.w2", "model.layers.38.block_sparse_moe.experts.153.w2", "model.layers.38.block_sparse_moe.experts.154.w2", "model.layers.38.block_sparse_moe.experts.155.w2", "model.layers.38.block_sparse_moe.experts.156.w2", "model.layers.38.block_sparse_moe.experts.157.w2", "model.layers.38.block_sparse_moe.experts.158.w2", "model.layers.38.block_sparse_moe.experts.159.w2", "model.layers.38.block_sparse_moe.experts.160.w2", "model.layers.38.block_sparse_moe.experts.161.w2", "model.layers.38.block_sparse_moe.experts.162.w2", "model.layers.38.block_sparse_moe.experts.163.w2", "model.layers.38.block_sparse_moe.experts.164.w2", "model.layers.38.block_sparse_moe.experts.165.w2", "model.layers.38.block_sparse_moe.experts.166.w2", "model.layers.38.block_sparse_moe.experts.167.w2", "model.layers.38.block_sparse_moe.experts.168.w2", "model.layers.38.block_sparse_moe.experts.169.w2", "model.layers.38.block_sparse_moe.experts.170.w2", "model.layers.38.block_sparse_moe.experts.171.w2", "model.layers.38.block_sparse_moe.experts.172.w2", "model.layers.38.block_sparse_moe.experts.173.w2", "model.layers.38.block_sparse_moe.experts.174.w2", "model.layers.38.block_sparse_moe.experts.175.w2", "model.layers.38.block_sparse_moe.experts.176.w2", "model.layers.38.block_sparse_moe.experts.177.w2", "model.layers.38.block_sparse_moe.experts.178.w2", "model.layers.38.block_sparse_moe.experts.179.w2", "model.layers.38.block_sparse_moe.experts.180.w2", "model.layers.38.block_sparse_moe.experts.181.w2", "model.layers.38.block_sparse_moe.experts.182.w2", "model.layers.38.block_sparse_moe.experts.183.w2", "model.layers.38.block_sparse_moe.experts.184.w2", "model.layers.38.block_sparse_moe.experts.185.w2", "model.layers.38.block_sparse_moe.experts.186.w2", "model.layers.38.block_sparse_moe.experts.187.w2", "model.layers.38.block_sparse_moe.experts.188.w2", "model.layers.38.block_sparse_moe.experts.189.w2", "model.layers.38.block_sparse_moe.experts.190.w2", "model.layers.38.block_sparse_moe.experts.191.w2", "model.layers.38.block_sparse_moe.experts.192.w2", "model.layers.38.block_sparse_moe.experts.193.w2", "model.layers.38.block_sparse_moe.experts.194.w2", "model.layers.38.block_sparse_moe.experts.195.w2", "model.layers.38.block_sparse_moe.experts.196.w2", "model.layers.38.block_sparse_moe.experts.197.w2", "model.layers.38.block_sparse_moe.experts.198.w2", "model.layers.38.block_sparse_moe.experts.199.w2", "model.layers.38.block_sparse_moe.experts.200.w2", "model.layers.38.block_sparse_moe.experts.201.w2", "model.layers.38.block_sparse_moe.experts.202.w2", "model.layers.38.block_sparse_moe.experts.203.w2", "model.layers.38.block_sparse_moe.experts.204.w2", "model.layers.38.block_sparse_moe.experts.205.w2", "model.layers.38.block_sparse_moe.experts.206.w2", "model.layers.38.block_sparse_moe.experts.207.w2", "model.layers.38.block_sparse_moe.experts.208.w2", "model.layers.38.block_sparse_moe.experts.209.w2", "model.layers.38.block_sparse_moe.experts.210.w2", "model.layers.38.block_sparse_moe.experts.211.w2", "model.layers.38.block_sparse_moe.experts.212.w2", "model.layers.38.block_sparse_moe.experts.213.w2", "model.layers.38.block_sparse_moe.experts.214.w2", "model.layers.38.block_sparse_moe.experts.215.w2", "model.layers.38.block_sparse_moe.experts.216.w2", "model.layers.38.block_sparse_moe.experts.217.w2", "model.layers.38.block_sparse_moe.experts.218.w2", "model.layers.38.block_sparse_moe.experts.219.w2", "model.layers.38.block_sparse_moe.experts.220.w2", "model.layers.38.block_sparse_moe.experts.221.w2", "model.layers.38.block_sparse_moe.experts.222.w2", "model.layers.38.block_sparse_moe.experts.223.w2", "model.layers.38.block_sparse_moe.experts.224.w2", "model.layers.38.block_sparse_moe.experts.225.w2", "model.layers.38.block_sparse_moe.experts.226.w2", "model.layers.38.block_sparse_moe.experts.227.w2", "model.layers.38.block_sparse_moe.experts.228.w2", "model.layers.38.block_sparse_moe.experts.229.w2", "model.layers.38.block_sparse_moe.experts.230.w2", "model.layers.38.block_sparse_moe.experts.231.w2", "model.layers.38.block_sparse_moe.experts.232.w2", "model.layers.38.block_sparse_moe.experts.233.w2", "model.layers.38.block_sparse_moe.experts.234.w2", "model.layers.38.block_sparse_moe.experts.235.w2", "model.layers.38.block_sparse_moe.experts.236.w2", "model.layers.38.block_sparse_moe.experts.237.w2", "model.layers.38.block_sparse_moe.experts.238.w2", "model.layers.38.block_sparse_moe.experts.239.w2", "model.layers.38.block_sparse_moe.experts.240.w2", "model.layers.38.block_sparse_moe.experts.241.w2", "model.layers.38.block_sparse_moe.experts.242.w2", "model.layers.38.block_sparse_moe.experts.243.w2", "model.layers.38.block_sparse_moe.experts.244.w2", "model.layers.38.block_sparse_moe.experts.245.w2", "model.layers.38.block_sparse_moe.experts.246.w2", "model.layers.38.block_sparse_moe.experts.247.w2", "model.layers.38.block_sparse_moe.experts.248.w2", "model.layers.38.block_sparse_moe.experts.249.w2", "model.layers.38.block_sparse_moe.experts.250.w2", "model.layers.38.block_sparse_moe.experts.251.w2", "model.layers.38.block_sparse_moe.experts.252.w2", "model.layers.38.block_sparse_moe.experts.253.w2", "model.layers.38.block_sparse_moe.experts.254.w2", "model.layers.38.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0006697643548250198, "dbits": 3623878656 } ] }, { "idx": 78, "layers": [ "model.layers.39.self_attn.q_proj", "model.layers.39.self_attn.k_proj", "model.layers.39.self_attn.v_proj", "model.layers.39.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006123412027955022, "dbits": 44040192 } ] }, { "idx": 79, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w1", "model.layers.39.block_sparse_moe.experts.1.w1", "model.layers.39.block_sparse_moe.experts.2.w1", "model.layers.39.block_sparse_moe.experts.3.w1", "model.layers.39.block_sparse_moe.experts.4.w1", "model.layers.39.block_sparse_moe.experts.5.w1", "model.layers.39.block_sparse_moe.experts.6.w1", "model.layers.39.block_sparse_moe.experts.7.w1", "model.layers.39.block_sparse_moe.experts.8.w1", "model.layers.39.block_sparse_moe.experts.9.w1", "model.layers.39.block_sparse_moe.experts.10.w1", "model.layers.39.block_sparse_moe.experts.11.w1", "model.layers.39.block_sparse_moe.experts.12.w1", "model.layers.39.block_sparse_moe.experts.13.w1", "model.layers.39.block_sparse_moe.experts.14.w1", "model.layers.39.block_sparse_moe.experts.15.w1", "model.layers.39.block_sparse_moe.experts.16.w1", "model.layers.39.block_sparse_moe.experts.17.w1", "model.layers.39.block_sparse_moe.experts.18.w1", "model.layers.39.block_sparse_moe.experts.19.w1", "model.layers.39.block_sparse_moe.experts.20.w1", "model.layers.39.block_sparse_moe.experts.21.w1", "model.layers.39.block_sparse_moe.experts.22.w1", "model.layers.39.block_sparse_moe.experts.23.w1", "model.layers.39.block_sparse_moe.experts.24.w1", "model.layers.39.block_sparse_moe.experts.25.w1", "model.layers.39.block_sparse_moe.experts.26.w1", "model.layers.39.block_sparse_moe.experts.27.w1", "model.layers.39.block_sparse_moe.experts.28.w1", "model.layers.39.block_sparse_moe.experts.29.w1", "model.layers.39.block_sparse_moe.experts.30.w1", "model.layers.39.block_sparse_moe.experts.31.w1", "model.layers.39.block_sparse_moe.experts.32.w1", "model.layers.39.block_sparse_moe.experts.33.w1", "model.layers.39.block_sparse_moe.experts.34.w1", "model.layers.39.block_sparse_moe.experts.35.w1", "model.layers.39.block_sparse_moe.experts.36.w1", "model.layers.39.block_sparse_moe.experts.37.w1", "model.layers.39.block_sparse_moe.experts.38.w1", "model.layers.39.block_sparse_moe.experts.39.w1", "model.layers.39.block_sparse_moe.experts.40.w1", "model.layers.39.block_sparse_moe.experts.41.w1", "model.layers.39.block_sparse_moe.experts.42.w1", "model.layers.39.block_sparse_moe.experts.43.w1", "model.layers.39.block_sparse_moe.experts.44.w1", "model.layers.39.block_sparse_moe.experts.45.w1", "model.layers.39.block_sparse_moe.experts.46.w1", "model.layers.39.block_sparse_moe.experts.47.w1", "model.layers.39.block_sparse_moe.experts.48.w1", "model.layers.39.block_sparse_moe.experts.49.w1", "model.layers.39.block_sparse_moe.experts.50.w1", "model.layers.39.block_sparse_moe.experts.51.w1", "model.layers.39.block_sparse_moe.experts.52.w1", "model.layers.39.block_sparse_moe.experts.53.w1", "model.layers.39.block_sparse_moe.experts.54.w1", "model.layers.39.block_sparse_moe.experts.55.w1", "model.layers.39.block_sparse_moe.experts.56.w1", "model.layers.39.block_sparse_moe.experts.57.w1", "model.layers.39.block_sparse_moe.experts.58.w1", "model.layers.39.block_sparse_moe.experts.59.w1", "model.layers.39.block_sparse_moe.experts.60.w1", "model.layers.39.block_sparse_moe.experts.61.w1", "model.layers.39.block_sparse_moe.experts.62.w1", "model.layers.39.block_sparse_moe.experts.63.w1", "model.layers.39.block_sparse_moe.experts.64.w1", "model.layers.39.block_sparse_moe.experts.65.w1", "model.layers.39.block_sparse_moe.experts.66.w1", "model.layers.39.block_sparse_moe.experts.67.w1", "model.layers.39.block_sparse_moe.experts.68.w1", "model.layers.39.block_sparse_moe.experts.69.w1", "model.layers.39.block_sparse_moe.experts.70.w1", "model.layers.39.block_sparse_moe.experts.71.w1", "model.layers.39.block_sparse_moe.experts.72.w1", "model.layers.39.block_sparse_moe.experts.73.w1", "model.layers.39.block_sparse_moe.experts.74.w1", "model.layers.39.block_sparse_moe.experts.75.w1", "model.layers.39.block_sparse_moe.experts.76.w1", "model.layers.39.block_sparse_moe.experts.77.w1", "model.layers.39.block_sparse_moe.experts.78.w1", "model.layers.39.block_sparse_moe.experts.79.w1", "model.layers.39.block_sparse_moe.experts.80.w1", "model.layers.39.block_sparse_moe.experts.81.w1", "model.layers.39.block_sparse_moe.experts.82.w1", "model.layers.39.block_sparse_moe.experts.83.w1", "model.layers.39.block_sparse_moe.experts.84.w1", "model.layers.39.block_sparse_moe.experts.85.w1", "model.layers.39.block_sparse_moe.experts.86.w1", "model.layers.39.block_sparse_moe.experts.87.w1", "model.layers.39.block_sparse_moe.experts.88.w1", "model.layers.39.block_sparse_moe.experts.89.w1", "model.layers.39.block_sparse_moe.experts.90.w1", "model.layers.39.block_sparse_moe.experts.91.w1", "model.layers.39.block_sparse_moe.experts.92.w1", "model.layers.39.block_sparse_moe.experts.93.w1", "model.layers.39.block_sparse_moe.experts.94.w1", "model.layers.39.block_sparse_moe.experts.95.w1", "model.layers.39.block_sparse_moe.experts.96.w1", "model.layers.39.block_sparse_moe.experts.97.w1", "model.layers.39.block_sparse_moe.experts.98.w1", "model.layers.39.block_sparse_moe.experts.99.w1", "model.layers.39.block_sparse_moe.experts.100.w1", "model.layers.39.block_sparse_moe.experts.101.w1", "model.layers.39.block_sparse_moe.experts.102.w1", "model.layers.39.block_sparse_moe.experts.103.w1", "model.layers.39.block_sparse_moe.experts.104.w1", "model.layers.39.block_sparse_moe.experts.105.w1", "model.layers.39.block_sparse_moe.experts.106.w1", "model.layers.39.block_sparse_moe.experts.107.w1", "model.layers.39.block_sparse_moe.experts.108.w1", "model.layers.39.block_sparse_moe.experts.109.w1", "model.layers.39.block_sparse_moe.experts.110.w1", "model.layers.39.block_sparse_moe.experts.111.w1", "model.layers.39.block_sparse_moe.experts.112.w1", "model.layers.39.block_sparse_moe.experts.113.w1", "model.layers.39.block_sparse_moe.experts.114.w1", "model.layers.39.block_sparse_moe.experts.115.w1", "model.layers.39.block_sparse_moe.experts.116.w1", "model.layers.39.block_sparse_moe.experts.117.w1", "model.layers.39.block_sparse_moe.experts.118.w1", "model.layers.39.block_sparse_moe.experts.119.w1", "model.layers.39.block_sparse_moe.experts.120.w1", "model.layers.39.block_sparse_moe.experts.121.w1", "model.layers.39.block_sparse_moe.experts.122.w1", "model.layers.39.block_sparse_moe.experts.123.w1", "model.layers.39.block_sparse_moe.experts.124.w1", "model.layers.39.block_sparse_moe.experts.125.w1", "model.layers.39.block_sparse_moe.experts.126.w1", "model.layers.39.block_sparse_moe.experts.127.w1", "model.layers.39.block_sparse_moe.experts.128.w1", "model.layers.39.block_sparse_moe.experts.129.w1", "model.layers.39.block_sparse_moe.experts.130.w1", "model.layers.39.block_sparse_moe.experts.131.w1", "model.layers.39.block_sparse_moe.experts.132.w1", "model.layers.39.block_sparse_moe.experts.133.w1", "model.layers.39.block_sparse_moe.experts.134.w1", "model.layers.39.block_sparse_moe.experts.135.w1", "model.layers.39.block_sparse_moe.experts.136.w1", "model.layers.39.block_sparse_moe.experts.137.w1", "model.layers.39.block_sparse_moe.experts.138.w1", "model.layers.39.block_sparse_moe.experts.139.w1", "model.layers.39.block_sparse_moe.experts.140.w1", "model.layers.39.block_sparse_moe.experts.141.w1", "model.layers.39.block_sparse_moe.experts.142.w1", "model.layers.39.block_sparse_moe.experts.143.w1", "model.layers.39.block_sparse_moe.experts.144.w1", "model.layers.39.block_sparse_moe.experts.145.w1", "model.layers.39.block_sparse_moe.experts.146.w1", "model.layers.39.block_sparse_moe.experts.147.w1", "model.layers.39.block_sparse_moe.experts.148.w1", "model.layers.39.block_sparse_moe.experts.149.w1", "model.layers.39.block_sparse_moe.experts.150.w1", "model.layers.39.block_sparse_moe.experts.151.w1", "model.layers.39.block_sparse_moe.experts.152.w1", "model.layers.39.block_sparse_moe.experts.153.w1", "model.layers.39.block_sparse_moe.experts.154.w1", "model.layers.39.block_sparse_moe.experts.155.w1", "model.layers.39.block_sparse_moe.experts.156.w1", "model.layers.39.block_sparse_moe.experts.157.w1", "model.layers.39.block_sparse_moe.experts.158.w1", "model.layers.39.block_sparse_moe.experts.159.w1", "model.layers.39.block_sparse_moe.experts.160.w1", "model.layers.39.block_sparse_moe.experts.161.w1", "model.layers.39.block_sparse_moe.experts.162.w1", "model.layers.39.block_sparse_moe.experts.163.w1", "model.layers.39.block_sparse_moe.experts.164.w1", "model.layers.39.block_sparse_moe.experts.165.w1", "model.layers.39.block_sparse_moe.experts.166.w1", "model.layers.39.block_sparse_moe.experts.167.w1", "model.layers.39.block_sparse_moe.experts.168.w1", "model.layers.39.block_sparse_moe.experts.169.w1", "model.layers.39.block_sparse_moe.experts.170.w1", "model.layers.39.block_sparse_moe.experts.171.w1", "model.layers.39.block_sparse_moe.experts.172.w1", "model.layers.39.block_sparse_moe.experts.173.w1", "model.layers.39.block_sparse_moe.experts.174.w1", "model.layers.39.block_sparse_moe.experts.175.w1", "model.layers.39.block_sparse_moe.experts.176.w1", "model.layers.39.block_sparse_moe.experts.177.w1", "model.layers.39.block_sparse_moe.experts.178.w1", "model.layers.39.block_sparse_moe.experts.179.w1", "model.layers.39.block_sparse_moe.experts.180.w1", "model.layers.39.block_sparse_moe.experts.181.w1", "model.layers.39.block_sparse_moe.experts.182.w1", "model.layers.39.block_sparse_moe.experts.183.w1", "model.layers.39.block_sparse_moe.experts.184.w1", "model.layers.39.block_sparse_moe.experts.185.w1", "model.layers.39.block_sparse_moe.experts.186.w1", "model.layers.39.block_sparse_moe.experts.187.w1", "model.layers.39.block_sparse_moe.experts.188.w1", "model.layers.39.block_sparse_moe.experts.189.w1", "model.layers.39.block_sparse_moe.experts.190.w1", "model.layers.39.block_sparse_moe.experts.191.w1", "model.layers.39.block_sparse_moe.experts.192.w1", "model.layers.39.block_sparse_moe.experts.193.w1", "model.layers.39.block_sparse_moe.experts.194.w1", "model.layers.39.block_sparse_moe.experts.195.w1", "model.layers.39.block_sparse_moe.experts.196.w1", "model.layers.39.block_sparse_moe.experts.197.w1", "model.layers.39.block_sparse_moe.experts.198.w1", "model.layers.39.block_sparse_moe.experts.199.w1", "model.layers.39.block_sparse_moe.experts.200.w1", "model.layers.39.block_sparse_moe.experts.201.w1", "model.layers.39.block_sparse_moe.experts.202.w1", "model.layers.39.block_sparse_moe.experts.203.w1", "model.layers.39.block_sparse_moe.experts.204.w1", "model.layers.39.block_sparse_moe.experts.205.w1", "model.layers.39.block_sparse_moe.experts.206.w1", "model.layers.39.block_sparse_moe.experts.207.w1", "model.layers.39.block_sparse_moe.experts.208.w1", "model.layers.39.block_sparse_moe.experts.209.w1", "model.layers.39.block_sparse_moe.experts.210.w1", "model.layers.39.block_sparse_moe.experts.211.w1", "model.layers.39.block_sparse_moe.experts.212.w1", "model.layers.39.block_sparse_moe.experts.213.w1", "model.layers.39.block_sparse_moe.experts.214.w1", "model.layers.39.block_sparse_moe.experts.215.w1", "model.layers.39.block_sparse_moe.experts.216.w1", "model.layers.39.block_sparse_moe.experts.217.w1", "model.layers.39.block_sparse_moe.experts.218.w1", "model.layers.39.block_sparse_moe.experts.219.w1", "model.layers.39.block_sparse_moe.experts.220.w1", "model.layers.39.block_sparse_moe.experts.221.w1", "model.layers.39.block_sparse_moe.experts.222.w1", "model.layers.39.block_sparse_moe.experts.223.w1", "model.layers.39.block_sparse_moe.experts.224.w1", "model.layers.39.block_sparse_moe.experts.225.w1", "model.layers.39.block_sparse_moe.experts.226.w1", "model.layers.39.block_sparse_moe.experts.227.w1", "model.layers.39.block_sparse_moe.experts.228.w1", "model.layers.39.block_sparse_moe.experts.229.w1", "model.layers.39.block_sparse_moe.experts.230.w1", "model.layers.39.block_sparse_moe.experts.231.w1", "model.layers.39.block_sparse_moe.experts.232.w1", "model.layers.39.block_sparse_moe.experts.233.w1", "model.layers.39.block_sparse_moe.experts.234.w1", "model.layers.39.block_sparse_moe.experts.235.w1", "model.layers.39.block_sparse_moe.experts.236.w1", "model.layers.39.block_sparse_moe.experts.237.w1", "model.layers.39.block_sparse_moe.experts.238.w1", "model.layers.39.block_sparse_moe.experts.239.w1", "model.layers.39.block_sparse_moe.experts.240.w1", "model.layers.39.block_sparse_moe.experts.241.w1", "model.layers.39.block_sparse_moe.experts.242.w1", "model.layers.39.block_sparse_moe.experts.243.w1", "model.layers.39.block_sparse_moe.experts.244.w1", "model.layers.39.block_sparse_moe.experts.245.w1", "model.layers.39.block_sparse_moe.experts.246.w1", "model.layers.39.block_sparse_moe.experts.247.w1", "model.layers.39.block_sparse_moe.experts.248.w1", "model.layers.39.block_sparse_moe.experts.249.w1", "model.layers.39.block_sparse_moe.experts.250.w1", "model.layers.39.block_sparse_moe.experts.251.w1", "model.layers.39.block_sparse_moe.experts.252.w1", "model.layers.39.block_sparse_moe.experts.253.w1", "model.layers.39.block_sparse_moe.experts.254.w1", "model.layers.39.block_sparse_moe.experts.255.w1", "model.layers.39.block_sparse_moe.experts.0.w3", "model.layers.39.block_sparse_moe.experts.1.w3", "model.layers.39.block_sparse_moe.experts.2.w3", "model.layers.39.block_sparse_moe.experts.3.w3", "model.layers.39.block_sparse_moe.experts.4.w3", "model.layers.39.block_sparse_moe.experts.5.w3", "model.layers.39.block_sparse_moe.experts.6.w3", "model.layers.39.block_sparse_moe.experts.7.w3", "model.layers.39.block_sparse_moe.experts.8.w3", "model.layers.39.block_sparse_moe.experts.9.w3", "model.layers.39.block_sparse_moe.experts.10.w3", "model.layers.39.block_sparse_moe.experts.11.w3", "model.layers.39.block_sparse_moe.experts.12.w3", "model.layers.39.block_sparse_moe.experts.13.w3", "model.layers.39.block_sparse_moe.experts.14.w3", "model.layers.39.block_sparse_moe.experts.15.w3", "model.layers.39.block_sparse_moe.experts.16.w3", "model.layers.39.block_sparse_moe.experts.17.w3", "model.layers.39.block_sparse_moe.experts.18.w3", "model.layers.39.block_sparse_moe.experts.19.w3", "model.layers.39.block_sparse_moe.experts.20.w3", "model.layers.39.block_sparse_moe.experts.21.w3", "model.layers.39.block_sparse_moe.experts.22.w3", "model.layers.39.block_sparse_moe.experts.23.w3", "model.layers.39.block_sparse_moe.experts.24.w3", "model.layers.39.block_sparse_moe.experts.25.w3", "model.layers.39.block_sparse_moe.experts.26.w3", "model.layers.39.block_sparse_moe.experts.27.w3", "model.layers.39.block_sparse_moe.experts.28.w3", "model.layers.39.block_sparse_moe.experts.29.w3", "model.layers.39.block_sparse_moe.experts.30.w3", "model.layers.39.block_sparse_moe.experts.31.w3", "model.layers.39.block_sparse_moe.experts.32.w3", "model.layers.39.block_sparse_moe.experts.33.w3", "model.layers.39.block_sparse_moe.experts.34.w3", "model.layers.39.block_sparse_moe.experts.35.w3", "model.layers.39.block_sparse_moe.experts.36.w3", "model.layers.39.block_sparse_moe.experts.37.w3", "model.layers.39.block_sparse_moe.experts.38.w3", "model.layers.39.block_sparse_moe.experts.39.w3", "model.layers.39.block_sparse_moe.experts.40.w3", "model.layers.39.block_sparse_moe.experts.41.w3", "model.layers.39.block_sparse_moe.experts.42.w3", "model.layers.39.block_sparse_moe.experts.43.w3", "model.layers.39.block_sparse_moe.experts.44.w3", "model.layers.39.block_sparse_moe.experts.45.w3", "model.layers.39.block_sparse_moe.experts.46.w3", "model.layers.39.block_sparse_moe.experts.47.w3", "model.layers.39.block_sparse_moe.experts.48.w3", "model.layers.39.block_sparse_moe.experts.49.w3", "model.layers.39.block_sparse_moe.experts.50.w3", "model.layers.39.block_sparse_moe.experts.51.w3", "model.layers.39.block_sparse_moe.experts.52.w3", "model.layers.39.block_sparse_moe.experts.53.w3", "model.layers.39.block_sparse_moe.experts.54.w3", "model.layers.39.block_sparse_moe.experts.55.w3", "model.layers.39.block_sparse_moe.experts.56.w3", "model.layers.39.block_sparse_moe.experts.57.w3", "model.layers.39.block_sparse_moe.experts.58.w3", "model.layers.39.block_sparse_moe.experts.59.w3", "model.layers.39.block_sparse_moe.experts.60.w3", "model.layers.39.block_sparse_moe.experts.61.w3", "model.layers.39.block_sparse_moe.experts.62.w3", "model.layers.39.block_sparse_moe.experts.63.w3", "model.layers.39.block_sparse_moe.experts.64.w3", "model.layers.39.block_sparse_moe.experts.65.w3", "model.layers.39.block_sparse_moe.experts.66.w3", "model.layers.39.block_sparse_moe.experts.67.w3", "model.layers.39.block_sparse_moe.experts.68.w3", "model.layers.39.block_sparse_moe.experts.69.w3", "model.layers.39.block_sparse_moe.experts.70.w3", "model.layers.39.block_sparse_moe.experts.71.w3", "model.layers.39.block_sparse_moe.experts.72.w3", "model.layers.39.block_sparse_moe.experts.73.w3", "model.layers.39.block_sparse_moe.experts.74.w3", "model.layers.39.block_sparse_moe.experts.75.w3", "model.layers.39.block_sparse_moe.experts.76.w3", "model.layers.39.block_sparse_moe.experts.77.w3", "model.layers.39.block_sparse_moe.experts.78.w3", "model.layers.39.block_sparse_moe.experts.79.w3", "model.layers.39.block_sparse_moe.experts.80.w3", "model.layers.39.block_sparse_moe.experts.81.w3", "model.layers.39.block_sparse_moe.experts.82.w3", "model.layers.39.block_sparse_moe.experts.83.w3", "model.layers.39.block_sparse_moe.experts.84.w3", "model.layers.39.block_sparse_moe.experts.85.w3", "model.layers.39.block_sparse_moe.experts.86.w3", "model.layers.39.block_sparse_moe.experts.87.w3", "model.layers.39.block_sparse_moe.experts.88.w3", "model.layers.39.block_sparse_moe.experts.89.w3", "model.layers.39.block_sparse_moe.experts.90.w3", "model.layers.39.block_sparse_moe.experts.91.w3", "model.layers.39.block_sparse_moe.experts.92.w3", "model.layers.39.block_sparse_moe.experts.93.w3", "model.layers.39.block_sparse_moe.experts.94.w3", "model.layers.39.block_sparse_moe.experts.95.w3", "model.layers.39.block_sparse_moe.experts.96.w3", "model.layers.39.block_sparse_moe.experts.97.w3", "model.layers.39.block_sparse_moe.experts.98.w3", "model.layers.39.block_sparse_moe.experts.99.w3", "model.layers.39.block_sparse_moe.experts.100.w3", "model.layers.39.block_sparse_moe.experts.101.w3", "model.layers.39.block_sparse_moe.experts.102.w3", "model.layers.39.block_sparse_moe.experts.103.w3", "model.layers.39.block_sparse_moe.experts.104.w3", "model.layers.39.block_sparse_moe.experts.105.w3", "model.layers.39.block_sparse_moe.experts.106.w3", "model.layers.39.block_sparse_moe.experts.107.w3", "model.layers.39.block_sparse_moe.experts.108.w3", "model.layers.39.block_sparse_moe.experts.109.w3", "model.layers.39.block_sparse_moe.experts.110.w3", "model.layers.39.block_sparse_moe.experts.111.w3", "model.layers.39.block_sparse_moe.experts.112.w3", "model.layers.39.block_sparse_moe.experts.113.w3", "model.layers.39.block_sparse_moe.experts.114.w3", "model.layers.39.block_sparse_moe.experts.115.w3", "model.layers.39.block_sparse_moe.experts.116.w3", "model.layers.39.block_sparse_moe.experts.117.w3", "model.layers.39.block_sparse_moe.experts.118.w3", "model.layers.39.block_sparse_moe.experts.119.w3", "model.layers.39.block_sparse_moe.experts.120.w3", "model.layers.39.block_sparse_moe.experts.121.w3", "model.layers.39.block_sparse_moe.experts.122.w3", "model.layers.39.block_sparse_moe.experts.123.w3", "model.layers.39.block_sparse_moe.experts.124.w3", "model.layers.39.block_sparse_moe.experts.125.w3", "model.layers.39.block_sparse_moe.experts.126.w3", "model.layers.39.block_sparse_moe.experts.127.w3", "model.layers.39.block_sparse_moe.experts.128.w3", "model.layers.39.block_sparse_moe.experts.129.w3", "model.layers.39.block_sparse_moe.experts.130.w3", "model.layers.39.block_sparse_moe.experts.131.w3", "model.layers.39.block_sparse_moe.experts.132.w3", "model.layers.39.block_sparse_moe.experts.133.w3", "model.layers.39.block_sparse_moe.experts.134.w3", "model.layers.39.block_sparse_moe.experts.135.w3", "model.layers.39.block_sparse_moe.experts.136.w3", "model.layers.39.block_sparse_moe.experts.137.w3", "model.layers.39.block_sparse_moe.experts.138.w3", "model.layers.39.block_sparse_moe.experts.139.w3", "model.layers.39.block_sparse_moe.experts.140.w3", "model.layers.39.block_sparse_moe.experts.141.w3", "model.layers.39.block_sparse_moe.experts.142.w3", "model.layers.39.block_sparse_moe.experts.143.w3", "model.layers.39.block_sparse_moe.experts.144.w3", "model.layers.39.block_sparse_moe.experts.145.w3", "model.layers.39.block_sparse_moe.experts.146.w3", "model.layers.39.block_sparse_moe.experts.147.w3", "model.layers.39.block_sparse_moe.experts.148.w3", "model.layers.39.block_sparse_moe.experts.149.w3", "model.layers.39.block_sparse_moe.experts.150.w3", "model.layers.39.block_sparse_moe.experts.151.w3", "model.layers.39.block_sparse_moe.experts.152.w3", "model.layers.39.block_sparse_moe.experts.153.w3", "model.layers.39.block_sparse_moe.experts.154.w3", "model.layers.39.block_sparse_moe.experts.155.w3", "model.layers.39.block_sparse_moe.experts.156.w3", "model.layers.39.block_sparse_moe.experts.157.w3", "model.layers.39.block_sparse_moe.experts.158.w3", "model.layers.39.block_sparse_moe.experts.159.w3", "model.layers.39.block_sparse_moe.experts.160.w3", "model.layers.39.block_sparse_moe.experts.161.w3", "model.layers.39.block_sparse_moe.experts.162.w3", "model.layers.39.block_sparse_moe.experts.163.w3", "model.layers.39.block_sparse_moe.experts.164.w3", "model.layers.39.block_sparse_moe.experts.165.w3", "model.layers.39.block_sparse_moe.experts.166.w3", "model.layers.39.block_sparse_moe.experts.167.w3", "model.layers.39.block_sparse_moe.experts.168.w3", "model.layers.39.block_sparse_moe.experts.169.w3", "model.layers.39.block_sparse_moe.experts.170.w3", "model.layers.39.block_sparse_moe.experts.171.w3", "model.layers.39.block_sparse_moe.experts.172.w3", "model.layers.39.block_sparse_moe.experts.173.w3", "model.layers.39.block_sparse_moe.experts.174.w3", "model.layers.39.block_sparse_moe.experts.175.w3", "model.layers.39.block_sparse_moe.experts.176.w3", "model.layers.39.block_sparse_moe.experts.177.w3", "model.layers.39.block_sparse_moe.experts.178.w3", "model.layers.39.block_sparse_moe.experts.179.w3", "model.layers.39.block_sparse_moe.experts.180.w3", "model.layers.39.block_sparse_moe.experts.181.w3", "model.layers.39.block_sparse_moe.experts.182.w3", "model.layers.39.block_sparse_moe.experts.183.w3", "model.layers.39.block_sparse_moe.experts.184.w3", "model.layers.39.block_sparse_moe.experts.185.w3", "model.layers.39.block_sparse_moe.experts.186.w3", "model.layers.39.block_sparse_moe.experts.187.w3", "model.layers.39.block_sparse_moe.experts.188.w3", "model.layers.39.block_sparse_moe.experts.189.w3", "model.layers.39.block_sparse_moe.experts.190.w3", "model.layers.39.block_sparse_moe.experts.191.w3", "model.layers.39.block_sparse_moe.experts.192.w3", "model.layers.39.block_sparse_moe.experts.193.w3", "model.layers.39.block_sparse_moe.experts.194.w3", "model.layers.39.block_sparse_moe.experts.195.w3", "model.layers.39.block_sparse_moe.experts.196.w3", "model.layers.39.block_sparse_moe.experts.197.w3", "model.layers.39.block_sparse_moe.experts.198.w3", "model.layers.39.block_sparse_moe.experts.199.w3", "model.layers.39.block_sparse_moe.experts.200.w3", "model.layers.39.block_sparse_moe.experts.201.w3", "model.layers.39.block_sparse_moe.experts.202.w3", "model.layers.39.block_sparse_moe.experts.203.w3", "model.layers.39.block_sparse_moe.experts.204.w3", "model.layers.39.block_sparse_moe.experts.205.w3", "model.layers.39.block_sparse_moe.experts.206.w3", "model.layers.39.block_sparse_moe.experts.207.w3", "model.layers.39.block_sparse_moe.experts.208.w3", "model.layers.39.block_sparse_moe.experts.209.w3", "model.layers.39.block_sparse_moe.experts.210.w3", "model.layers.39.block_sparse_moe.experts.211.w3", "model.layers.39.block_sparse_moe.experts.212.w3", "model.layers.39.block_sparse_moe.experts.213.w3", "model.layers.39.block_sparse_moe.experts.214.w3", "model.layers.39.block_sparse_moe.experts.215.w3", "model.layers.39.block_sparse_moe.experts.216.w3", "model.layers.39.block_sparse_moe.experts.217.w3", "model.layers.39.block_sparse_moe.experts.218.w3", "model.layers.39.block_sparse_moe.experts.219.w3", "model.layers.39.block_sparse_moe.experts.220.w3", "model.layers.39.block_sparse_moe.experts.221.w3", "model.layers.39.block_sparse_moe.experts.222.w3", "model.layers.39.block_sparse_moe.experts.223.w3", "model.layers.39.block_sparse_moe.experts.224.w3", "model.layers.39.block_sparse_moe.experts.225.w3", "model.layers.39.block_sparse_moe.experts.226.w3", "model.layers.39.block_sparse_moe.experts.227.w3", "model.layers.39.block_sparse_moe.experts.228.w3", "model.layers.39.block_sparse_moe.experts.229.w3", "model.layers.39.block_sparse_moe.experts.230.w3", "model.layers.39.block_sparse_moe.experts.231.w3", "model.layers.39.block_sparse_moe.experts.232.w3", "model.layers.39.block_sparse_moe.experts.233.w3", "model.layers.39.block_sparse_moe.experts.234.w3", "model.layers.39.block_sparse_moe.experts.235.w3", "model.layers.39.block_sparse_moe.experts.236.w3", "model.layers.39.block_sparse_moe.experts.237.w3", "model.layers.39.block_sparse_moe.experts.238.w3", "model.layers.39.block_sparse_moe.experts.239.w3", "model.layers.39.block_sparse_moe.experts.240.w3", "model.layers.39.block_sparse_moe.experts.241.w3", "model.layers.39.block_sparse_moe.experts.242.w3", "model.layers.39.block_sparse_moe.experts.243.w3", "model.layers.39.block_sparse_moe.experts.244.w3", "model.layers.39.block_sparse_moe.experts.245.w3", "model.layers.39.block_sparse_moe.experts.246.w3", "model.layers.39.block_sparse_moe.experts.247.w3", "model.layers.39.block_sparse_moe.experts.248.w3", "model.layers.39.block_sparse_moe.experts.249.w3", "model.layers.39.block_sparse_moe.experts.250.w3", "model.layers.39.block_sparse_moe.experts.251.w3", "model.layers.39.block_sparse_moe.experts.252.w3", "model.layers.39.block_sparse_moe.experts.253.w3", "model.layers.39.block_sparse_moe.experts.254.w3", "model.layers.39.block_sparse_moe.experts.255.w3", "model.layers.39.block_sparse_moe.experts.0.w2", "model.layers.39.block_sparse_moe.experts.1.w2", "model.layers.39.block_sparse_moe.experts.2.w2", "model.layers.39.block_sparse_moe.experts.3.w2", "model.layers.39.block_sparse_moe.experts.4.w2", "model.layers.39.block_sparse_moe.experts.5.w2", "model.layers.39.block_sparse_moe.experts.6.w2", "model.layers.39.block_sparse_moe.experts.7.w2", "model.layers.39.block_sparse_moe.experts.8.w2", "model.layers.39.block_sparse_moe.experts.9.w2", "model.layers.39.block_sparse_moe.experts.10.w2", "model.layers.39.block_sparse_moe.experts.11.w2", "model.layers.39.block_sparse_moe.experts.12.w2", "model.layers.39.block_sparse_moe.experts.13.w2", "model.layers.39.block_sparse_moe.experts.14.w2", "model.layers.39.block_sparse_moe.experts.15.w2", "model.layers.39.block_sparse_moe.experts.16.w2", "model.layers.39.block_sparse_moe.experts.17.w2", "model.layers.39.block_sparse_moe.experts.18.w2", "model.layers.39.block_sparse_moe.experts.19.w2", "model.layers.39.block_sparse_moe.experts.20.w2", "model.layers.39.block_sparse_moe.experts.21.w2", "model.layers.39.block_sparse_moe.experts.22.w2", "model.layers.39.block_sparse_moe.experts.23.w2", "model.layers.39.block_sparse_moe.experts.24.w2", "model.layers.39.block_sparse_moe.experts.25.w2", "model.layers.39.block_sparse_moe.experts.26.w2", "model.layers.39.block_sparse_moe.experts.27.w2", "model.layers.39.block_sparse_moe.experts.28.w2", "model.layers.39.block_sparse_moe.experts.29.w2", "model.layers.39.block_sparse_moe.experts.30.w2", "model.layers.39.block_sparse_moe.experts.31.w2", "model.layers.39.block_sparse_moe.experts.32.w2", "model.layers.39.block_sparse_moe.experts.33.w2", "model.layers.39.block_sparse_moe.experts.34.w2", "model.layers.39.block_sparse_moe.experts.35.w2", "model.layers.39.block_sparse_moe.experts.36.w2", "model.layers.39.block_sparse_moe.experts.37.w2", "model.layers.39.block_sparse_moe.experts.38.w2", "model.layers.39.block_sparse_moe.experts.39.w2", "model.layers.39.block_sparse_moe.experts.40.w2", "model.layers.39.block_sparse_moe.experts.41.w2", "model.layers.39.block_sparse_moe.experts.42.w2", "model.layers.39.block_sparse_moe.experts.43.w2", "model.layers.39.block_sparse_moe.experts.44.w2", "model.layers.39.block_sparse_moe.experts.45.w2", "model.layers.39.block_sparse_moe.experts.46.w2", "model.layers.39.block_sparse_moe.experts.47.w2", "model.layers.39.block_sparse_moe.experts.48.w2", "model.layers.39.block_sparse_moe.experts.49.w2", "model.layers.39.block_sparse_moe.experts.50.w2", "model.layers.39.block_sparse_moe.experts.51.w2", "model.layers.39.block_sparse_moe.experts.52.w2", "model.layers.39.block_sparse_moe.experts.53.w2", "model.layers.39.block_sparse_moe.experts.54.w2", "model.layers.39.block_sparse_moe.experts.55.w2", "model.layers.39.block_sparse_moe.experts.56.w2", "model.layers.39.block_sparse_moe.experts.57.w2", "model.layers.39.block_sparse_moe.experts.58.w2", "model.layers.39.block_sparse_moe.experts.59.w2", "model.layers.39.block_sparse_moe.experts.60.w2", "model.layers.39.block_sparse_moe.experts.61.w2", "model.layers.39.block_sparse_moe.experts.62.w2", "model.layers.39.block_sparse_moe.experts.63.w2", "model.layers.39.block_sparse_moe.experts.64.w2", "model.layers.39.block_sparse_moe.experts.65.w2", "model.layers.39.block_sparse_moe.experts.66.w2", "model.layers.39.block_sparse_moe.experts.67.w2", "model.layers.39.block_sparse_moe.experts.68.w2", "model.layers.39.block_sparse_moe.experts.69.w2", "model.layers.39.block_sparse_moe.experts.70.w2", "model.layers.39.block_sparse_moe.experts.71.w2", "model.layers.39.block_sparse_moe.experts.72.w2", "model.layers.39.block_sparse_moe.experts.73.w2", "model.layers.39.block_sparse_moe.experts.74.w2", "model.layers.39.block_sparse_moe.experts.75.w2", "model.layers.39.block_sparse_moe.experts.76.w2", "model.layers.39.block_sparse_moe.experts.77.w2", "model.layers.39.block_sparse_moe.experts.78.w2", "model.layers.39.block_sparse_moe.experts.79.w2", "model.layers.39.block_sparse_moe.experts.80.w2", "model.layers.39.block_sparse_moe.experts.81.w2", "model.layers.39.block_sparse_moe.experts.82.w2", "model.layers.39.block_sparse_moe.experts.83.w2", "model.layers.39.block_sparse_moe.experts.84.w2", "model.layers.39.block_sparse_moe.experts.85.w2", "model.layers.39.block_sparse_moe.experts.86.w2", "model.layers.39.block_sparse_moe.experts.87.w2", "model.layers.39.block_sparse_moe.experts.88.w2", "model.layers.39.block_sparse_moe.experts.89.w2", "model.layers.39.block_sparse_moe.experts.90.w2", "model.layers.39.block_sparse_moe.experts.91.w2", "model.layers.39.block_sparse_moe.experts.92.w2", "model.layers.39.block_sparse_moe.experts.93.w2", "model.layers.39.block_sparse_moe.experts.94.w2", "model.layers.39.block_sparse_moe.experts.95.w2", "model.layers.39.block_sparse_moe.experts.96.w2", "model.layers.39.block_sparse_moe.experts.97.w2", "model.layers.39.block_sparse_moe.experts.98.w2", "model.layers.39.block_sparse_moe.experts.99.w2", "model.layers.39.block_sparse_moe.experts.100.w2", "model.layers.39.block_sparse_moe.experts.101.w2", "model.layers.39.block_sparse_moe.experts.102.w2", "model.layers.39.block_sparse_moe.experts.103.w2", "model.layers.39.block_sparse_moe.experts.104.w2", "model.layers.39.block_sparse_moe.experts.105.w2", "model.layers.39.block_sparse_moe.experts.106.w2", "model.layers.39.block_sparse_moe.experts.107.w2", "model.layers.39.block_sparse_moe.experts.108.w2", "model.layers.39.block_sparse_moe.experts.109.w2", "model.layers.39.block_sparse_moe.experts.110.w2", "model.layers.39.block_sparse_moe.experts.111.w2", "model.layers.39.block_sparse_moe.experts.112.w2", "model.layers.39.block_sparse_moe.experts.113.w2", "model.layers.39.block_sparse_moe.experts.114.w2", "model.layers.39.block_sparse_moe.experts.115.w2", "model.layers.39.block_sparse_moe.experts.116.w2", "model.layers.39.block_sparse_moe.experts.117.w2", "model.layers.39.block_sparse_moe.experts.118.w2", "model.layers.39.block_sparse_moe.experts.119.w2", "model.layers.39.block_sparse_moe.experts.120.w2", "model.layers.39.block_sparse_moe.experts.121.w2", "model.layers.39.block_sparse_moe.experts.122.w2", "model.layers.39.block_sparse_moe.experts.123.w2", "model.layers.39.block_sparse_moe.experts.124.w2", "model.layers.39.block_sparse_moe.experts.125.w2", "model.layers.39.block_sparse_moe.experts.126.w2", "model.layers.39.block_sparse_moe.experts.127.w2", "model.layers.39.block_sparse_moe.experts.128.w2", "model.layers.39.block_sparse_moe.experts.129.w2", "model.layers.39.block_sparse_moe.experts.130.w2", "model.layers.39.block_sparse_moe.experts.131.w2", "model.layers.39.block_sparse_moe.experts.132.w2", "model.layers.39.block_sparse_moe.experts.133.w2", "model.layers.39.block_sparse_moe.experts.134.w2", "model.layers.39.block_sparse_moe.experts.135.w2", "model.layers.39.block_sparse_moe.experts.136.w2", "model.layers.39.block_sparse_moe.experts.137.w2", "model.layers.39.block_sparse_moe.experts.138.w2", "model.layers.39.block_sparse_moe.experts.139.w2", "model.layers.39.block_sparse_moe.experts.140.w2", "model.layers.39.block_sparse_moe.experts.141.w2", "model.layers.39.block_sparse_moe.experts.142.w2", "model.layers.39.block_sparse_moe.experts.143.w2", "model.layers.39.block_sparse_moe.experts.144.w2", "model.layers.39.block_sparse_moe.experts.145.w2", "model.layers.39.block_sparse_moe.experts.146.w2", "model.layers.39.block_sparse_moe.experts.147.w2", "model.layers.39.block_sparse_moe.experts.148.w2", "model.layers.39.block_sparse_moe.experts.149.w2", "model.layers.39.block_sparse_moe.experts.150.w2", "model.layers.39.block_sparse_moe.experts.151.w2", "model.layers.39.block_sparse_moe.experts.152.w2", "model.layers.39.block_sparse_moe.experts.153.w2", "model.layers.39.block_sparse_moe.experts.154.w2", "model.layers.39.block_sparse_moe.experts.155.w2", "model.layers.39.block_sparse_moe.experts.156.w2", "model.layers.39.block_sparse_moe.experts.157.w2", "model.layers.39.block_sparse_moe.experts.158.w2", "model.layers.39.block_sparse_moe.experts.159.w2", "model.layers.39.block_sparse_moe.experts.160.w2", "model.layers.39.block_sparse_moe.experts.161.w2", "model.layers.39.block_sparse_moe.experts.162.w2", "model.layers.39.block_sparse_moe.experts.163.w2", "model.layers.39.block_sparse_moe.experts.164.w2", "model.layers.39.block_sparse_moe.experts.165.w2", "model.layers.39.block_sparse_moe.experts.166.w2", "model.layers.39.block_sparse_moe.experts.167.w2", "model.layers.39.block_sparse_moe.experts.168.w2", "model.layers.39.block_sparse_moe.experts.169.w2", "model.layers.39.block_sparse_moe.experts.170.w2", "model.layers.39.block_sparse_moe.experts.171.w2", "model.layers.39.block_sparse_moe.experts.172.w2", "model.layers.39.block_sparse_moe.experts.173.w2", "model.layers.39.block_sparse_moe.experts.174.w2", "model.layers.39.block_sparse_moe.experts.175.w2", "model.layers.39.block_sparse_moe.experts.176.w2", "model.layers.39.block_sparse_moe.experts.177.w2", "model.layers.39.block_sparse_moe.experts.178.w2", "model.layers.39.block_sparse_moe.experts.179.w2", "model.layers.39.block_sparse_moe.experts.180.w2", "model.layers.39.block_sparse_moe.experts.181.w2", "model.layers.39.block_sparse_moe.experts.182.w2", "model.layers.39.block_sparse_moe.experts.183.w2", "model.layers.39.block_sparse_moe.experts.184.w2", "model.layers.39.block_sparse_moe.experts.185.w2", "model.layers.39.block_sparse_moe.experts.186.w2", "model.layers.39.block_sparse_moe.experts.187.w2", "model.layers.39.block_sparse_moe.experts.188.w2", "model.layers.39.block_sparse_moe.experts.189.w2", "model.layers.39.block_sparse_moe.experts.190.w2", "model.layers.39.block_sparse_moe.experts.191.w2", "model.layers.39.block_sparse_moe.experts.192.w2", "model.layers.39.block_sparse_moe.experts.193.w2", "model.layers.39.block_sparse_moe.experts.194.w2", "model.layers.39.block_sparse_moe.experts.195.w2", "model.layers.39.block_sparse_moe.experts.196.w2", "model.layers.39.block_sparse_moe.experts.197.w2", "model.layers.39.block_sparse_moe.experts.198.w2", "model.layers.39.block_sparse_moe.experts.199.w2", "model.layers.39.block_sparse_moe.experts.200.w2", "model.layers.39.block_sparse_moe.experts.201.w2", "model.layers.39.block_sparse_moe.experts.202.w2", "model.layers.39.block_sparse_moe.experts.203.w2", "model.layers.39.block_sparse_moe.experts.204.w2", "model.layers.39.block_sparse_moe.experts.205.w2", "model.layers.39.block_sparse_moe.experts.206.w2", "model.layers.39.block_sparse_moe.experts.207.w2", "model.layers.39.block_sparse_moe.experts.208.w2", "model.layers.39.block_sparse_moe.experts.209.w2", "model.layers.39.block_sparse_moe.experts.210.w2", "model.layers.39.block_sparse_moe.experts.211.w2", "model.layers.39.block_sparse_moe.experts.212.w2", "model.layers.39.block_sparse_moe.experts.213.w2", "model.layers.39.block_sparse_moe.experts.214.w2", "model.layers.39.block_sparse_moe.experts.215.w2", "model.layers.39.block_sparse_moe.experts.216.w2", "model.layers.39.block_sparse_moe.experts.217.w2", "model.layers.39.block_sparse_moe.experts.218.w2", "model.layers.39.block_sparse_moe.experts.219.w2", "model.layers.39.block_sparse_moe.experts.220.w2", "model.layers.39.block_sparse_moe.experts.221.w2", "model.layers.39.block_sparse_moe.experts.222.w2", "model.layers.39.block_sparse_moe.experts.223.w2", "model.layers.39.block_sparse_moe.experts.224.w2", "model.layers.39.block_sparse_moe.experts.225.w2", "model.layers.39.block_sparse_moe.experts.226.w2", "model.layers.39.block_sparse_moe.experts.227.w2", "model.layers.39.block_sparse_moe.experts.228.w2", "model.layers.39.block_sparse_moe.experts.229.w2", "model.layers.39.block_sparse_moe.experts.230.w2", "model.layers.39.block_sparse_moe.experts.231.w2", "model.layers.39.block_sparse_moe.experts.232.w2", "model.layers.39.block_sparse_moe.experts.233.w2", "model.layers.39.block_sparse_moe.experts.234.w2", "model.layers.39.block_sparse_moe.experts.235.w2", "model.layers.39.block_sparse_moe.experts.236.w2", "model.layers.39.block_sparse_moe.experts.237.w2", "model.layers.39.block_sparse_moe.experts.238.w2", "model.layers.39.block_sparse_moe.experts.239.w2", "model.layers.39.block_sparse_moe.experts.240.w2", "model.layers.39.block_sparse_moe.experts.241.w2", "model.layers.39.block_sparse_moe.experts.242.w2", "model.layers.39.block_sparse_moe.experts.243.w2", "model.layers.39.block_sparse_moe.experts.244.w2", "model.layers.39.block_sparse_moe.experts.245.w2", "model.layers.39.block_sparse_moe.experts.246.w2", "model.layers.39.block_sparse_moe.experts.247.w2", "model.layers.39.block_sparse_moe.experts.248.w2", "model.layers.39.block_sparse_moe.experts.249.w2", "model.layers.39.block_sparse_moe.experts.250.w2", "model.layers.39.block_sparse_moe.experts.251.w2", "model.layers.39.block_sparse_moe.experts.252.w2", "model.layers.39.block_sparse_moe.experts.253.w2", "model.layers.39.block_sparse_moe.experts.254.w2", "model.layers.39.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0017761824652552383, "dbits": 3623878656 } ] }, { "idx": 80, "layers": [ "model.layers.40.self_attn.q_proj", "model.layers.40.self_attn.k_proj", "model.layers.40.self_attn.v_proj", "model.layers.40.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005855295062065113, "dbits": 44040192 } ] }, { "idx": 81, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w1", "model.layers.40.block_sparse_moe.experts.1.w1", "model.layers.40.block_sparse_moe.experts.2.w1", "model.layers.40.block_sparse_moe.experts.3.w1", "model.layers.40.block_sparse_moe.experts.4.w1", "model.layers.40.block_sparse_moe.experts.5.w1", "model.layers.40.block_sparse_moe.experts.6.w1", "model.layers.40.block_sparse_moe.experts.7.w1", "model.layers.40.block_sparse_moe.experts.8.w1", "model.layers.40.block_sparse_moe.experts.9.w1", "model.layers.40.block_sparse_moe.experts.10.w1", "model.layers.40.block_sparse_moe.experts.11.w1", "model.layers.40.block_sparse_moe.experts.12.w1", "model.layers.40.block_sparse_moe.experts.13.w1", "model.layers.40.block_sparse_moe.experts.14.w1", "model.layers.40.block_sparse_moe.experts.15.w1", "model.layers.40.block_sparse_moe.experts.16.w1", "model.layers.40.block_sparse_moe.experts.17.w1", "model.layers.40.block_sparse_moe.experts.18.w1", "model.layers.40.block_sparse_moe.experts.19.w1", "model.layers.40.block_sparse_moe.experts.20.w1", "model.layers.40.block_sparse_moe.experts.21.w1", "model.layers.40.block_sparse_moe.experts.22.w1", "model.layers.40.block_sparse_moe.experts.23.w1", "model.layers.40.block_sparse_moe.experts.24.w1", "model.layers.40.block_sparse_moe.experts.25.w1", "model.layers.40.block_sparse_moe.experts.26.w1", "model.layers.40.block_sparse_moe.experts.27.w1", "model.layers.40.block_sparse_moe.experts.28.w1", "model.layers.40.block_sparse_moe.experts.29.w1", "model.layers.40.block_sparse_moe.experts.30.w1", "model.layers.40.block_sparse_moe.experts.31.w1", "model.layers.40.block_sparse_moe.experts.32.w1", "model.layers.40.block_sparse_moe.experts.33.w1", "model.layers.40.block_sparse_moe.experts.34.w1", "model.layers.40.block_sparse_moe.experts.35.w1", "model.layers.40.block_sparse_moe.experts.36.w1", "model.layers.40.block_sparse_moe.experts.37.w1", "model.layers.40.block_sparse_moe.experts.38.w1", "model.layers.40.block_sparse_moe.experts.39.w1", "model.layers.40.block_sparse_moe.experts.40.w1", "model.layers.40.block_sparse_moe.experts.41.w1", "model.layers.40.block_sparse_moe.experts.42.w1", "model.layers.40.block_sparse_moe.experts.43.w1", "model.layers.40.block_sparse_moe.experts.44.w1", "model.layers.40.block_sparse_moe.experts.45.w1", "model.layers.40.block_sparse_moe.experts.46.w1", "model.layers.40.block_sparse_moe.experts.47.w1", "model.layers.40.block_sparse_moe.experts.48.w1", "model.layers.40.block_sparse_moe.experts.49.w1", "model.layers.40.block_sparse_moe.experts.50.w1", "model.layers.40.block_sparse_moe.experts.51.w1", "model.layers.40.block_sparse_moe.experts.52.w1", "model.layers.40.block_sparse_moe.experts.53.w1", "model.layers.40.block_sparse_moe.experts.54.w1", "model.layers.40.block_sparse_moe.experts.55.w1", "model.layers.40.block_sparse_moe.experts.56.w1", "model.layers.40.block_sparse_moe.experts.57.w1", "model.layers.40.block_sparse_moe.experts.58.w1", "model.layers.40.block_sparse_moe.experts.59.w1", "model.layers.40.block_sparse_moe.experts.60.w1", "model.layers.40.block_sparse_moe.experts.61.w1", "model.layers.40.block_sparse_moe.experts.62.w1", "model.layers.40.block_sparse_moe.experts.63.w1", "model.layers.40.block_sparse_moe.experts.64.w1", "model.layers.40.block_sparse_moe.experts.65.w1", "model.layers.40.block_sparse_moe.experts.66.w1", "model.layers.40.block_sparse_moe.experts.67.w1", "model.layers.40.block_sparse_moe.experts.68.w1", "model.layers.40.block_sparse_moe.experts.69.w1", "model.layers.40.block_sparse_moe.experts.70.w1", "model.layers.40.block_sparse_moe.experts.71.w1", "model.layers.40.block_sparse_moe.experts.72.w1", "model.layers.40.block_sparse_moe.experts.73.w1", "model.layers.40.block_sparse_moe.experts.74.w1", "model.layers.40.block_sparse_moe.experts.75.w1", "model.layers.40.block_sparse_moe.experts.76.w1", "model.layers.40.block_sparse_moe.experts.77.w1", "model.layers.40.block_sparse_moe.experts.78.w1", "model.layers.40.block_sparse_moe.experts.79.w1", "model.layers.40.block_sparse_moe.experts.80.w1", "model.layers.40.block_sparse_moe.experts.81.w1", "model.layers.40.block_sparse_moe.experts.82.w1", "model.layers.40.block_sparse_moe.experts.83.w1", "model.layers.40.block_sparse_moe.experts.84.w1", "model.layers.40.block_sparse_moe.experts.85.w1", "model.layers.40.block_sparse_moe.experts.86.w1", "model.layers.40.block_sparse_moe.experts.87.w1", "model.layers.40.block_sparse_moe.experts.88.w1", "model.layers.40.block_sparse_moe.experts.89.w1", "model.layers.40.block_sparse_moe.experts.90.w1", "model.layers.40.block_sparse_moe.experts.91.w1", "model.layers.40.block_sparse_moe.experts.92.w1", "model.layers.40.block_sparse_moe.experts.93.w1", "model.layers.40.block_sparse_moe.experts.94.w1", "model.layers.40.block_sparse_moe.experts.95.w1", "model.layers.40.block_sparse_moe.experts.96.w1", "model.layers.40.block_sparse_moe.experts.97.w1", "model.layers.40.block_sparse_moe.experts.98.w1", "model.layers.40.block_sparse_moe.experts.99.w1", "model.layers.40.block_sparse_moe.experts.100.w1", "model.layers.40.block_sparse_moe.experts.101.w1", "model.layers.40.block_sparse_moe.experts.102.w1", "model.layers.40.block_sparse_moe.experts.103.w1", "model.layers.40.block_sparse_moe.experts.104.w1", "model.layers.40.block_sparse_moe.experts.105.w1", "model.layers.40.block_sparse_moe.experts.106.w1", "model.layers.40.block_sparse_moe.experts.107.w1", "model.layers.40.block_sparse_moe.experts.108.w1", "model.layers.40.block_sparse_moe.experts.109.w1", "model.layers.40.block_sparse_moe.experts.110.w1", "model.layers.40.block_sparse_moe.experts.111.w1", "model.layers.40.block_sparse_moe.experts.112.w1", "model.layers.40.block_sparse_moe.experts.113.w1", "model.layers.40.block_sparse_moe.experts.114.w1", "model.layers.40.block_sparse_moe.experts.115.w1", "model.layers.40.block_sparse_moe.experts.116.w1", "model.layers.40.block_sparse_moe.experts.117.w1", "model.layers.40.block_sparse_moe.experts.118.w1", "model.layers.40.block_sparse_moe.experts.119.w1", "model.layers.40.block_sparse_moe.experts.120.w1", "model.layers.40.block_sparse_moe.experts.121.w1", "model.layers.40.block_sparse_moe.experts.122.w1", "model.layers.40.block_sparse_moe.experts.123.w1", "model.layers.40.block_sparse_moe.experts.124.w1", "model.layers.40.block_sparse_moe.experts.125.w1", "model.layers.40.block_sparse_moe.experts.126.w1", "model.layers.40.block_sparse_moe.experts.127.w1", "model.layers.40.block_sparse_moe.experts.128.w1", "model.layers.40.block_sparse_moe.experts.129.w1", "model.layers.40.block_sparse_moe.experts.130.w1", "model.layers.40.block_sparse_moe.experts.131.w1", "model.layers.40.block_sparse_moe.experts.132.w1", "model.layers.40.block_sparse_moe.experts.133.w1", "model.layers.40.block_sparse_moe.experts.134.w1", "model.layers.40.block_sparse_moe.experts.135.w1", "model.layers.40.block_sparse_moe.experts.136.w1", "model.layers.40.block_sparse_moe.experts.137.w1", "model.layers.40.block_sparse_moe.experts.138.w1", "model.layers.40.block_sparse_moe.experts.139.w1", "model.layers.40.block_sparse_moe.experts.140.w1", "model.layers.40.block_sparse_moe.experts.141.w1", "model.layers.40.block_sparse_moe.experts.142.w1", "model.layers.40.block_sparse_moe.experts.143.w1", "model.layers.40.block_sparse_moe.experts.144.w1", "model.layers.40.block_sparse_moe.experts.145.w1", "model.layers.40.block_sparse_moe.experts.146.w1", "model.layers.40.block_sparse_moe.experts.147.w1", "model.layers.40.block_sparse_moe.experts.148.w1", "model.layers.40.block_sparse_moe.experts.149.w1", "model.layers.40.block_sparse_moe.experts.150.w1", "model.layers.40.block_sparse_moe.experts.151.w1", "model.layers.40.block_sparse_moe.experts.152.w1", "model.layers.40.block_sparse_moe.experts.153.w1", "model.layers.40.block_sparse_moe.experts.154.w1", "model.layers.40.block_sparse_moe.experts.155.w1", "model.layers.40.block_sparse_moe.experts.156.w1", "model.layers.40.block_sparse_moe.experts.157.w1", "model.layers.40.block_sparse_moe.experts.158.w1", "model.layers.40.block_sparse_moe.experts.159.w1", "model.layers.40.block_sparse_moe.experts.160.w1", "model.layers.40.block_sparse_moe.experts.161.w1", "model.layers.40.block_sparse_moe.experts.162.w1", "model.layers.40.block_sparse_moe.experts.163.w1", "model.layers.40.block_sparse_moe.experts.164.w1", "model.layers.40.block_sparse_moe.experts.165.w1", "model.layers.40.block_sparse_moe.experts.166.w1", "model.layers.40.block_sparse_moe.experts.167.w1", "model.layers.40.block_sparse_moe.experts.168.w1", "model.layers.40.block_sparse_moe.experts.169.w1", "model.layers.40.block_sparse_moe.experts.170.w1", "model.layers.40.block_sparse_moe.experts.171.w1", "model.layers.40.block_sparse_moe.experts.172.w1", "model.layers.40.block_sparse_moe.experts.173.w1", "model.layers.40.block_sparse_moe.experts.174.w1", "model.layers.40.block_sparse_moe.experts.175.w1", "model.layers.40.block_sparse_moe.experts.176.w1", "model.layers.40.block_sparse_moe.experts.177.w1", "model.layers.40.block_sparse_moe.experts.178.w1", "model.layers.40.block_sparse_moe.experts.179.w1", "model.layers.40.block_sparse_moe.experts.180.w1", "model.layers.40.block_sparse_moe.experts.181.w1", "model.layers.40.block_sparse_moe.experts.182.w1", "model.layers.40.block_sparse_moe.experts.183.w1", "model.layers.40.block_sparse_moe.experts.184.w1", "model.layers.40.block_sparse_moe.experts.185.w1", "model.layers.40.block_sparse_moe.experts.186.w1", "model.layers.40.block_sparse_moe.experts.187.w1", "model.layers.40.block_sparse_moe.experts.188.w1", "model.layers.40.block_sparse_moe.experts.189.w1", "model.layers.40.block_sparse_moe.experts.190.w1", "model.layers.40.block_sparse_moe.experts.191.w1", "model.layers.40.block_sparse_moe.experts.192.w1", "model.layers.40.block_sparse_moe.experts.193.w1", "model.layers.40.block_sparse_moe.experts.194.w1", "model.layers.40.block_sparse_moe.experts.195.w1", "model.layers.40.block_sparse_moe.experts.196.w1", "model.layers.40.block_sparse_moe.experts.197.w1", "model.layers.40.block_sparse_moe.experts.198.w1", "model.layers.40.block_sparse_moe.experts.199.w1", "model.layers.40.block_sparse_moe.experts.200.w1", "model.layers.40.block_sparse_moe.experts.201.w1", "model.layers.40.block_sparse_moe.experts.202.w1", "model.layers.40.block_sparse_moe.experts.203.w1", "model.layers.40.block_sparse_moe.experts.204.w1", "model.layers.40.block_sparse_moe.experts.205.w1", "model.layers.40.block_sparse_moe.experts.206.w1", "model.layers.40.block_sparse_moe.experts.207.w1", "model.layers.40.block_sparse_moe.experts.208.w1", "model.layers.40.block_sparse_moe.experts.209.w1", "model.layers.40.block_sparse_moe.experts.210.w1", "model.layers.40.block_sparse_moe.experts.211.w1", "model.layers.40.block_sparse_moe.experts.212.w1", "model.layers.40.block_sparse_moe.experts.213.w1", "model.layers.40.block_sparse_moe.experts.214.w1", "model.layers.40.block_sparse_moe.experts.215.w1", "model.layers.40.block_sparse_moe.experts.216.w1", "model.layers.40.block_sparse_moe.experts.217.w1", "model.layers.40.block_sparse_moe.experts.218.w1", "model.layers.40.block_sparse_moe.experts.219.w1", "model.layers.40.block_sparse_moe.experts.220.w1", "model.layers.40.block_sparse_moe.experts.221.w1", "model.layers.40.block_sparse_moe.experts.222.w1", "model.layers.40.block_sparse_moe.experts.223.w1", "model.layers.40.block_sparse_moe.experts.224.w1", "model.layers.40.block_sparse_moe.experts.225.w1", "model.layers.40.block_sparse_moe.experts.226.w1", "model.layers.40.block_sparse_moe.experts.227.w1", "model.layers.40.block_sparse_moe.experts.228.w1", "model.layers.40.block_sparse_moe.experts.229.w1", "model.layers.40.block_sparse_moe.experts.230.w1", "model.layers.40.block_sparse_moe.experts.231.w1", "model.layers.40.block_sparse_moe.experts.232.w1", "model.layers.40.block_sparse_moe.experts.233.w1", "model.layers.40.block_sparse_moe.experts.234.w1", "model.layers.40.block_sparse_moe.experts.235.w1", "model.layers.40.block_sparse_moe.experts.236.w1", "model.layers.40.block_sparse_moe.experts.237.w1", "model.layers.40.block_sparse_moe.experts.238.w1", "model.layers.40.block_sparse_moe.experts.239.w1", "model.layers.40.block_sparse_moe.experts.240.w1", "model.layers.40.block_sparse_moe.experts.241.w1", "model.layers.40.block_sparse_moe.experts.242.w1", "model.layers.40.block_sparse_moe.experts.243.w1", "model.layers.40.block_sparse_moe.experts.244.w1", "model.layers.40.block_sparse_moe.experts.245.w1", "model.layers.40.block_sparse_moe.experts.246.w1", "model.layers.40.block_sparse_moe.experts.247.w1", "model.layers.40.block_sparse_moe.experts.248.w1", "model.layers.40.block_sparse_moe.experts.249.w1", "model.layers.40.block_sparse_moe.experts.250.w1", "model.layers.40.block_sparse_moe.experts.251.w1", "model.layers.40.block_sparse_moe.experts.252.w1", "model.layers.40.block_sparse_moe.experts.253.w1", "model.layers.40.block_sparse_moe.experts.254.w1", "model.layers.40.block_sparse_moe.experts.255.w1", "model.layers.40.block_sparse_moe.experts.0.w3", "model.layers.40.block_sparse_moe.experts.1.w3", "model.layers.40.block_sparse_moe.experts.2.w3", "model.layers.40.block_sparse_moe.experts.3.w3", "model.layers.40.block_sparse_moe.experts.4.w3", "model.layers.40.block_sparse_moe.experts.5.w3", "model.layers.40.block_sparse_moe.experts.6.w3", "model.layers.40.block_sparse_moe.experts.7.w3", "model.layers.40.block_sparse_moe.experts.8.w3", "model.layers.40.block_sparse_moe.experts.9.w3", "model.layers.40.block_sparse_moe.experts.10.w3", "model.layers.40.block_sparse_moe.experts.11.w3", "model.layers.40.block_sparse_moe.experts.12.w3", "model.layers.40.block_sparse_moe.experts.13.w3", "model.layers.40.block_sparse_moe.experts.14.w3", "model.layers.40.block_sparse_moe.experts.15.w3", "model.layers.40.block_sparse_moe.experts.16.w3", "model.layers.40.block_sparse_moe.experts.17.w3", "model.layers.40.block_sparse_moe.experts.18.w3", "model.layers.40.block_sparse_moe.experts.19.w3", "model.layers.40.block_sparse_moe.experts.20.w3", "model.layers.40.block_sparse_moe.experts.21.w3", "model.layers.40.block_sparse_moe.experts.22.w3", "model.layers.40.block_sparse_moe.experts.23.w3", "model.layers.40.block_sparse_moe.experts.24.w3", "model.layers.40.block_sparse_moe.experts.25.w3", "model.layers.40.block_sparse_moe.experts.26.w3", "model.layers.40.block_sparse_moe.experts.27.w3", "model.layers.40.block_sparse_moe.experts.28.w3", "model.layers.40.block_sparse_moe.experts.29.w3", "model.layers.40.block_sparse_moe.experts.30.w3", "model.layers.40.block_sparse_moe.experts.31.w3", "model.layers.40.block_sparse_moe.experts.32.w3", "model.layers.40.block_sparse_moe.experts.33.w3", "model.layers.40.block_sparse_moe.experts.34.w3", "model.layers.40.block_sparse_moe.experts.35.w3", "model.layers.40.block_sparse_moe.experts.36.w3", "model.layers.40.block_sparse_moe.experts.37.w3", "model.layers.40.block_sparse_moe.experts.38.w3", "model.layers.40.block_sparse_moe.experts.39.w3", "model.layers.40.block_sparse_moe.experts.40.w3", "model.layers.40.block_sparse_moe.experts.41.w3", "model.layers.40.block_sparse_moe.experts.42.w3", "model.layers.40.block_sparse_moe.experts.43.w3", "model.layers.40.block_sparse_moe.experts.44.w3", "model.layers.40.block_sparse_moe.experts.45.w3", "model.layers.40.block_sparse_moe.experts.46.w3", "model.layers.40.block_sparse_moe.experts.47.w3", "model.layers.40.block_sparse_moe.experts.48.w3", "model.layers.40.block_sparse_moe.experts.49.w3", "model.layers.40.block_sparse_moe.experts.50.w3", "model.layers.40.block_sparse_moe.experts.51.w3", "model.layers.40.block_sparse_moe.experts.52.w3", "model.layers.40.block_sparse_moe.experts.53.w3", "model.layers.40.block_sparse_moe.experts.54.w3", "model.layers.40.block_sparse_moe.experts.55.w3", "model.layers.40.block_sparse_moe.experts.56.w3", "model.layers.40.block_sparse_moe.experts.57.w3", "model.layers.40.block_sparse_moe.experts.58.w3", "model.layers.40.block_sparse_moe.experts.59.w3", "model.layers.40.block_sparse_moe.experts.60.w3", "model.layers.40.block_sparse_moe.experts.61.w3", "model.layers.40.block_sparse_moe.experts.62.w3", "model.layers.40.block_sparse_moe.experts.63.w3", "model.layers.40.block_sparse_moe.experts.64.w3", "model.layers.40.block_sparse_moe.experts.65.w3", "model.layers.40.block_sparse_moe.experts.66.w3", "model.layers.40.block_sparse_moe.experts.67.w3", "model.layers.40.block_sparse_moe.experts.68.w3", "model.layers.40.block_sparse_moe.experts.69.w3", "model.layers.40.block_sparse_moe.experts.70.w3", "model.layers.40.block_sparse_moe.experts.71.w3", "model.layers.40.block_sparse_moe.experts.72.w3", "model.layers.40.block_sparse_moe.experts.73.w3", "model.layers.40.block_sparse_moe.experts.74.w3", "model.layers.40.block_sparse_moe.experts.75.w3", "model.layers.40.block_sparse_moe.experts.76.w3", "model.layers.40.block_sparse_moe.experts.77.w3", "model.layers.40.block_sparse_moe.experts.78.w3", "model.layers.40.block_sparse_moe.experts.79.w3", "model.layers.40.block_sparse_moe.experts.80.w3", "model.layers.40.block_sparse_moe.experts.81.w3", "model.layers.40.block_sparse_moe.experts.82.w3", "model.layers.40.block_sparse_moe.experts.83.w3", "model.layers.40.block_sparse_moe.experts.84.w3", "model.layers.40.block_sparse_moe.experts.85.w3", "model.layers.40.block_sparse_moe.experts.86.w3", "model.layers.40.block_sparse_moe.experts.87.w3", "model.layers.40.block_sparse_moe.experts.88.w3", "model.layers.40.block_sparse_moe.experts.89.w3", "model.layers.40.block_sparse_moe.experts.90.w3", "model.layers.40.block_sparse_moe.experts.91.w3", "model.layers.40.block_sparse_moe.experts.92.w3", "model.layers.40.block_sparse_moe.experts.93.w3", "model.layers.40.block_sparse_moe.experts.94.w3", "model.layers.40.block_sparse_moe.experts.95.w3", "model.layers.40.block_sparse_moe.experts.96.w3", "model.layers.40.block_sparse_moe.experts.97.w3", "model.layers.40.block_sparse_moe.experts.98.w3", "model.layers.40.block_sparse_moe.experts.99.w3", "model.layers.40.block_sparse_moe.experts.100.w3", "model.layers.40.block_sparse_moe.experts.101.w3", "model.layers.40.block_sparse_moe.experts.102.w3", "model.layers.40.block_sparse_moe.experts.103.w3", "model.layers.40.block_sparse_moe.experts.104.w3", "model.layers.40.block_sparse_moe.experts.105.w3", "model.layers.40.block_sparse_moe.experts.106.w3", "model.layers.40.block_sparse_moe.experts.107.w3", "model.layers.40.block_sparse_moe.experts.108.w3", "model.layers.40.block_sparse_moe.experts.109.w3", "model.layers.40.block_sparse_moe.experts.110.w3", "model.layers.40.block_sparse_moe.experts.111.w3", "model.layers.40.block_sparse_moe.experts.112.w3", "model.layers.40.block_sparse_moe.experts.113.w3", "model.layers.40.block_sparse_moe.experts.114.w3", "model.layers.40.block_sparse_moe.experts.115.w3", "model.layers.40.block_sparse_moe.experts.116.w3", "model.layers.40.block_sparse_moe.experts.117.w3", "model.layers.40.block_sparse_moe.experts.118.w3", "model.layers.40.block_sparse_moe.experts.119.w3", "model.layers.40.block_sparse_moe.experts.120.w3", "model.layers.40.block_sparse_moe.experts.121.w3", "model.layers.40.block_sparse_moe.experts.122.w3", "model.layers.40.block_sparse_moe.experts.123.w3", "model.layers.40.block_sparse_moe.experts.124.w3", "model.layers.40.block_sparse_moe.experts.125.w3", "model.layers.40.block_sparse_moe.experts.126.w3", "model.layers.40.block_sparse_moe.experts.127.w3", "model.layers.40.block_sparse_moe.experts.128.w3", "model.layers.40.block_sparse_moe.experts.129.w3", "model.layers.40.block_sparse_moe.experts.130.w3", "model.layers.40.block_sparse_moe.experts.131.w3", "model.layers.40.block_sparse_moe.experts.132.w3", "model.layers.40.block_sparse_moe.experts.133.w3", "model.layers.40.block_sparse_moe.experts.134.w3", "model.layers.40.block_sparse_moe.experts.135.w3", "model.layers.40.block_sparse_moe.experts.136.w3", "model.layers.40.block_sparse_moe.experts.137.w3", "model.layers.40.block_sparse_moe.experts.138.w3", "model.layers.40.block_sparse_moe.experts.139.w3", "model.layers.40.block_sparse_moe.experts.140.w3", "model.layers.40.block_sparse_moe.experts.141.w3", "model.layers.40.block_sparse_moe.experts.142.w3", "model.layers.40.block_sparse_moe.experts.143.w3", "model.layers.40.block_sparse_moe.experts.144.w3", "model.layers.40.block_sparse_moe.experts.145.w3", "model.layers.40.block_sparse_moe.experts.146.w3", "model.layers.40.block_sparse_moe.experts.147.w3", "model.layers.40.block_sparse_moe.experts.148.w3", "model.layers.40.block_sparse_moe.experts.149.w3", "model.layers.40.block_sparse_moe.experts.150.w3", "model.layers.40.block_sparse_moe.experts.151.w3", "model.layers.40.block_sparse_moe.experts.152.w3", "model.layers.40.block_sparse_moe.experts.153.w3", "model.layers.40.block_sparse_moe.experts.154.w3", "model.layers.40.block_sparse_moe.experts.155.w3", "model.layers.40.block_sparse_moe.experts.156.w3", "model.layers.40.block_sparse_moe.experts.157.w3", "model.layers.40.block_sparse_moe.experts.158.w3", "model.layers.40.block_sparse_moe.experts.159.w3", "model.layers.40.block_sparse_moe.experts.160.w3", "model.layers.40.block_sparse_moe.experts.161.w3", "model.layers.40.block_sparse_moe.experts.162.w3", "model.layers.40.block_sparse_moe.experts.163.w3", "model.layers.40.block_sparse_moe.experts.164.w3", "model.layers.40.block_sparse_moe.experts.165.w3", "model.layers.40.block_sparse_moe.experts.166.w3", "model.layers.40.block_sparse_moe.experts.167.w3", "model.layers.40.block_sparse_moe.experts.168.w3", "model.layers.40.block_sparse_moe.experts.169.w3", "model.layers.40.block_sparse_moe.experts.170.w3", "model.layers.40.block_sparse_moe.experts.171.w3", "model.layers.40.block_sparse_moe.experts.172.w3", "model.layers.40.block_sparse_moe.experts.173.w3", "model.layers.40.block_sparse_moe.experts.174.w3", "model.layers.40.block_sparse_moe.experts.175.w3", "model.layers.40.block_sparse_moe.experts.176.w3", "model.layers.40.block_sparse_moe.experts.177.w3", "model.layers.40.block_sparse_moe.experts.178.w3", "model.layers.40.block_sparse_moe.experts.179.w3", "model.layers.40.block_sparse_moe.experts.180.w3", "model.layers.40.block_sparse_moe.experts.181.w3", "model.layers.40.block_sparse_moe.experts.182.w3", "model.layers.40.block_sparse_moe.experts.183.w3", "model.layers.40.block_sparse_moe.experts.184.w3", "model.layers.40.block_sparse_moe.experts.185.w3", "model.layers.40.block_sparse_moe.experts.186.w3", "model.layers.40.block_sparse_moe.experts.187.w3", "model.layers.40.block_sparse_moe.experts.188.w3", "model.layers.40.block_sparse_moe.experts.189.w3", "model.layers.40.block_sparse_moe.experts.190.w3", "model.layers.40.block_sparse_moe.experts.191.w3", "model.layers.40.block_sparse_moe.experts.192.w3", "model.layers.40.block_sparse_moe.experts.193.w3", "model.layers.40.block_sparse_moe.experts.194.w3", "model.layers.40.block_sparse_moe.experts.195.w3", "model.layers.40.block_sparse_moe.experts.196.w3", "model.layers.40.block_sparse_moe.experts.197.w3", "model.layers.40.block_sparse_moe.experts.198.w3", "model.layers.40.block_sparse_moe.experts.199.w3", "model.layers.40.block_sparse_moe.experts.200.w3", "model.layers.40.block_sparse_moe.experts.201.w3", "model.layers.40.block_sparse_moe.experts.202.w3", "model.layers.40.block_sparse_moe.experts.203.w3", "model.layers.40.block_sparse_moe.experts.204.w3", "model.layers.40.block_sparse_moe.experts.205.w3", "model.layers.40.block_sparse_moe.experts.206.w3", "model.layers.40.block_sparse_moe.experts.207.w3", "model.layers.40.block_sparse_moe.experts.208.w3", "model.layers.40.block_sparse_moe.experts.209.w3", "model.layers.40.block_sparse_moe.experts.210.w3", "model.layers.40.block_sparse_moe.experts.211.w3", "model.layers.40.block_sparse_moe.experts.212.w3", "model.layers.40.block_sparse_moe.experts.213.w3", "model.layers.40.block_sparse_moe.experts.214.w3", "model.layers.40.block_sparse_moe.experts.215.w3", "model.layers.40.block_sparse_moe.experts.216.w3", "model.layers.40.block_sparse_moe.experts.217.w3", "model.layers.40.block_sparse_moe.experts.218.w3", "model.layers.40.block_sparse_moe.experts.219.w3", "model.layers.40.block_sparse_moe.experts.220.w3", "model.layers.40.block_sparse_moe.experts.221.w3", "model.layers.40.block_sparse_moe.experts.222.w3", "model.layers.40.block_sparse_moe.experts.223.w3", "model.layers.40.block_sparse_moe.experts.224.w3", "model.layers.40.block_sparse_moe.experts.225.w3", "model.layers.40.block_sparse_moe.experts.226.w3", "model.layers.40.block_sparse_moe.experts.227.w3", "model.layers.40.block_sparse_moe.experts.228.w3", "model.layers.40.block_sparse_moe.experts.229.w3", "model.layers.40.block_sparse_moe.experts.230.w3", "model.layers.40.block_sparse_moe.experts.231.w3", "model.layers.40.block_sparse_moe.experts.232.w3", "model.layers.40.block_sparse_moe.experts.233.w3", "model.layers.40.block_sparse_moe.experts.234.w3", "model.layers.40.block_sparse_moe.experts.235.w3", "model.layers.40.block_sparse_moe.experts.236.w3", "model.layers.40.block_sparse_moe.experts.237.w3", "model.layers.40.block_sparse_moe.experts.238.w3", "model.layers.40.block_sparse_moe.experts.239.w3", "model.layers.40.block_sparse_moe.experts.240.w3", "model.layers.40.block_sparse_moe.experts.241.w3", "model.layers.40.block_sparse_moe.experts.242.w3", "model.layers.40.block_sparse_moe.experts.243.w3", "model.layers.40.block_sparse_moe.experts.244.w3", "model.layers.40.block_sparse_moe.experts.245.w3", "model.layers.40.block_sparse_moe.experts.246.w3", "model.layers.40.block_sparse_moe.experts.247.w3", "model.layers.40.block_sparse_moe.experts.248.w3", "model.layers.40.block_sparse_moe.experts.249.w3", "model.layers.40.block_sparse_moe.experts.250.w3", "model.layers.40.block_sparse_moe.experts.251.w3", "model.layers.40.block_sparse_moe.experts.252.w3", "model.layers.40.block_sparse_moe.experts.253.w3", "model.layers.40.block_sparse_moe.experts.254.w3", "model.layers.40.block_sparse_moe.experts.255.w3", "model.layers.40.block_sparse_moe.experts.0.w2", "model.layers.40.block_sparse_moe.experts.1.w2", "model.layers.40.block_sparse_moe.experts.2.w2", "model.layers.40.block_sparse_moe.experts.3.w2", "model.layers.40.block_sparse_moe.experts.4.w2", "model.layers.40.block_sparse_moe.experts.5.w2", "model.layers.40.block_sparse_moe.experts.6.w2", "model.layers.40.block_sparse_moe.experts.7.w2", "model.layers.40.block_sparse_moe.experts.8.w2", "model.layers.40.block_sparse_moe.experts.9.w2", "model.layers.40.block_sparse_moe.experts.10.w2", "model.layers.40.block_sparse_moe.experts.11.w2", "model.layers.40.block_sparse_moe.experts.12.w2", "model.layers.40.block_sparse_moe.experts.13.w2", "model.layers.40.block_sparse_moe.experts.14.w2", "model.layers.40.block_sparse_moe.experts.15.w2", "model.layers.40.block_sparse_moe.experts.16.w2", "model.layers.40.block_sparse_moe.experts.17.w2", "model.layers.40.block_sparse_moe.experts.18.w2", "model.layers.40.block_sparse_moe.experts.19.w2", "model.layers.40.block_sparse_moe.experts.20.w2", "model.layers.40.block_sparse_moe.experts.21.w2", "model.layers.40.block_sparse_moe.experts.22.w2", "model.layers.40.block_sparse_moe.experts.23.w2", "model.layers.40.block_sparse_moe.experts.24.w2", "model.layers.40.block_sparse_moe.experts.25.w2", "model.layers.40.block_sparse_moe.experts.26.w2", "model.layers.40.block_sparse_moe.experts.27.w2", "model.layers.40.block_sparse_moe.experts.28.w2", "model.layers.40.block_sparse_moe.experts.29.w2", "model.layers.40.block_sparse_moe.experts.30.w2", "model.layers.40.block_sparse_moe.experts.31.w2", "model.layers.40.block_sparse_moe.experts.32.w2", "model.layers.40.block_sparse_moe.experts.33.w2", "model.layers.40.block_sparse_moe.experts.34.w2", "model.layers.40.block_sparse_moe.experts.35.w2", "model.layers.40.block_sparse_moe.experts.36.w2", "model.layers.40.block_sparse_moe.experts.37.w2", "model.layers.40.block_sparse_moe.experts.38.w2", "model.layers.40.block_sparse_moe.experts.39.w2", "model.layers.40.block_sparse_moe.experts.40.w2", "model.layers.40.block_sparse_moe.experts.41.w2", "model.layers.40.block_sparse_moe.experts.42.w2", "model.layers.40.block_sparse_moe.experts.43.w2", "model.layers.40.block_sparse_moe.experts.44.w2", "model.layers.40.block_sparse_moe.experts.45.w2", "model.layers.40.block_sparse_moe.experts.46.w2", "model.layers.40.block_sparse_moe.experts.47.w2", "model.layers.40.block_sparse_moe.experts.48.w2", "model.layers.40.block_sparse_moe.experts.49.w2", "model.layers.40.block_sparse_moe.experts.50.w2", "model.layers.40.block_sparse_moe.experts.51.w2", "model.layers.40.block_sparse_moe.experts.52.w2", "model.layers.40.block_sparse_moe.experts.53.w2", "model.layers.40.block_sparse_moe.experts.54.w2", "model.layers.40.block_sparse_moe.experts.55.w2", "model.layers.40.block_sparse_moe.experts.56.w2", "model.layers.40.block_sparse_moe.experts.57.w2", "model.layers.40.block_sparse_moe.experts.58.w2", "model.layers.40.block_sparse_moe.experts.59.w2", "model.layers.40.block_sparse_moe.experts.60.w2", "model.layers.40.block_sparse_moe.experts.61.w2", "model.layers.40.block_sparse_moe.experts.62.w2", "model.layers.40.block_sparse_moe.experts.63.w2", "model.layers.40.block_sparse_moe.experts.64.w2", "model.layers.40.block_sparse_moe.experts.65.w2", "model.layers.40.block_sparse_moe.experts.66.w2", "model.layers.40.block_sparse_moe.experts.67.w2", "model.layers.40.block_sparse_moe.experts.68.w2", "model.layers.40.block_sparse_moe.experts.69.w2", "model.layers.40.block_sparse_moe.experts.70.w2", "model.layers.40.block_sparse_moe.experts.71.w2", "model.layers.40.block_sparse_moe.experts.72.w2", "model.layers.40.block_sparse_moe.experts.73.w2", "model.layers.40.block_sparse_moe.experts.74.w2", "model.layers.40.block_sparse_moe.experts.75.w2", "model.layers.40.block_sparse_moe.experts.76.w2", "model.layers.40.block_sparse_moe.experts.77.w2", "model.layers.40.block_sparse_moe.experts.78.w2", "model.layers.40.block_sparse_moe.experts.79.w2", "model.layers.40.block_sparse_moe.experts.80.w2", "model.layers.40.block_sparse_moe.experts.81.w2", "model.layers.40.block_sparse_moe.experts.82.w2", "model.layers.40.block_sparse_moe.experts.83.w2", "model.layers.40.block_sparse_moe.experts.84.w2", "model.layers.40.block_sparse_moe.experts.85.w2", "model.layers.40.block_sparse_moe.experts.86.w2", "model.layers.40.block_sparse_moe.experts.87.w2", "model.layers.40.block_sparse_moe.experts.88.w2", "model.layers.40.block_sparse_moe.experts.89.w2", "model.layers.40.block_sparse_moe.experts.90.w2", "model.layers.40.block_sparse_moe.experts.91.w2", "model.layers.40.block_sparse_moe.experts.92.w2", "model.layers.40.block_sparse_moe.experts.93.w2", "model.layers.40.block_sparse_moe.experts.94.w2", "model.layers.40.block_sparse_moe.experts.95.w2", "model.layers.40.block_sparse_moe.experts.96.w2", "model.layers.40.block_sparse_moe.experts.97.w2", "model.layers.40.block_sparse_moe.experts.98.w2", "model.layers.40.block_sparse_moe.experts.99.w2", "model.layers.40.block_sparse_moe.experts.100.w2", "model.layers.40.block_sparse_moe.experts.101.w2", "model.layers.40.block_sparse_moe.experts.102.w2", "model.layers.40.block_sparse_moe.experts.103.w2", "model.layers.40.block_sparse_moe.experts.104.w2", "model.layers.40.block_sparse_moe.experts.105.w2", "model.layers.40.block_sparse_moe.experts.106.w2", "model.layers.40.block_sparse_moe.experts.107.w2", "model.layers.40.block_sparse_moe.experts.108.w2", "model.layers.40.block_sparse_moe.experts.109.w2", "model.layers.40.block_sparse_moe.experts.110.w2", "model.layers.40.block_sparse_moe.experts.111.w2", "model.layers.40.block_sparse_moe.experts.112.w2", "model.layers.40.block_sparse_moe.experts.113.w2", "model.layers.40.block_sparse_moe.experts.114.w2", "model.layers.40.block_sparse_moe.experts.115.w2", "model.layers.40.block_sparse_moe.experts.116.w2", "model.layers.40.block_sparse_moe.experts.117.w2", "model.layers.40.block_sparse_moe.experts.118.w2", "model.layers.40.block_sparse_moe.experts.119.w2", "model.layers.40.block_sparse_moe.experts.120.w2", "model.layers.40.block_sparse_moe.experts.121.w2", "model.layers.40.block_sparse_moe.experts.122.w2", "model.layers.40.block_sparse_moe.experts.123.w2", "model.layers.40.block_sparse_moe.experts.124.w2", "model.layers.40.block_sparse_moe.experts.125.w2", "model.layers.40.block_sparse_moe.experts.126.w2", "model.layers.40.block_sparse_moe.experts.127.w2", "model.layers.40.block_sparse_moe.experts.128.w2", "model.layers.40.block_sparse_moe.experts.129.w2", "model.layers.40.block_sparse_moe.experts.130.w2", "model.layers.40.block_sparse_moe.experts.131.w2", "model.layers.40.block_sparse_moe.experts.132.w2", "model.layers.40.block_sparse_moe.experts.133.w2", "model.layers.40.block_sparse_moe.experts.134.w2", "model.layers.40.block_sparse_moe.experts.135.w2", "model.layers.40.block_sparse_moe.experts.136.w2", "model.layers.40.block_sparse_moe.experts.137.w2", "model.layers.40.block_sparse_moe.experts.138.w2", "model.layers.40.block_sparse_moe.experts.139.w2", "model.layers.40.block_sparse_moe.experts.140.w2", "model.layers.40.block_sparse_moe.experts.141.w2", "model.layers.40.block_sparse_moe.experts.142.w2", "model.layers.40.block_sparse_moe.experts.143.w2", "model.layers.40.block_sparse_moe.experts.144.w2", "model.layers.40.block_sparse_moe.experts.145.w2", "model.layers.40.block_sparse_moe.experts.146.w2", "model.layers.40.block_sparse_moe.experts.147.w2", "model.layers.40.block_sparse_moe.experts.148.w2", "model.layers.40.block_sparse_moe.experts.149.w2", "model.layers.40.block_sparse_moe.experts.150.w2", "model.layers.40.block_sparse_moe.experts.151.w2", "model.layers.40.block_sparse_moe.experts.152.w2", "model.layers.40.block_sparse_moe.experts.153.w2", "model.layers.40.block_sparse_moe.experts.154.w2", "model.layers.40.block_sparse_moe.experts.155.w2", "model.layers.40.block_sparse_moe.experts.156.w2", "model.layers.40.block_sparse_moe.experts.157.w2", "model.layers.40.block_sparse_moe.experts.158.w2", "model.layers.40.block_sparse_moe.experts.159.w2", "model.layers.40.block_sparse_moe.experts.160.w2", "model.layers.40.block_sparse_moe.experts.161.w2", "model.layers.40.block_sparse_moe.experts.162.w2", "model.layers.40.block_sparse_moe.experts.163.w2", "model.layers.40.block_sparse_moe.experts.164.w2", "model.layers.40.block_sparse_moe.experts.165.w2", "model.layers.40.block_sparse_moe.experts.166.w2", "model.layers.40.block_sparse_moe.experts.167.w2", "model.layers.40.block_sparse_moe.experts.168.w2", "model.layers.40.block_sparse_moe.experts.169.w2", "model.layers.40.block_sparse_moe.experts.170.w2", "model.layers.40.block_sparse_moe.experts.171.w2", "model.layers.40.block_sparse_moe.experts.172.w2", "model.layers.40.block_sparse_moe.experts.173.w2", "model.layers.40.block_sparse_moe.experts.174.w2", "model.layers.40.block_sparse_moe.experts.175.w2", "model.layers.40.block_sparse_moe.experts.176.w2", "model.layers.40.block_sparse_moe.experts.177.w2", "model.layers.40.block_sparse_moe.experts.178.w2", "model.layers.40.block_sparse_moe.experts.179.w2", "model.layers.40.block_sparse_moe.experts.180.w2", "model.layers.40.block_sparse_moe.experts.181.w2", "model.layers.40.block_sparse_moe.experts.182.w2", "model.layers.40.block_sparse_moe.experts.183.w2", "model.layers.40.block_sparse_moe.experts.184.w2", "model.layers.40.block_sparse_moe.experts.185.w2", "model.layers.40.block_sparse_moe.experts.186.w2", "model.layers.40.block_sparse_moe.experts.187.w2", "model.layers.40.block_sparse_moe.experts.188.w2", "model.layers.40.block_sparse_moe.experts.189.w2", "model.layers.40.block_sparse_moe.experts.190.w2", "model.layers.40.block_sparse_moe.experts.191.w2", "model.layers.40.block_sparse_moe.experts.192.w2", "model.layers.40.block_sparse_moe.experts.193.w2", "model.layers.40.block_sparse_moe.experts.194.w2", "model.layers.40.block_sparse_moe.experts.195.w2", "model.layers.40.block_sparse_moe.experts.196.w2", "model.layers.40.block_sparse_moe.experts.197.w2", "model.layers.40.block_sparse_moe.experts.198.w2", "model.layers.40.block_sparse_moe.experts.199.w2", "model.layers.40.block_sparse_moe.experts.200.w2", "model.layers.40.block_sparse_moe.experts.201.w2", "model.layers.40.block_sparse_moe.experts.202.w2", "model.layers.40.block_sparse_moe.experts.203.w2", "model.layers.40.block_sparse_moe.experts.204.w2", "model.layers.40.block_sparse_moe.experts.205.w2", "model.layers.40.block_sparse_moe.experts.206.w2", "model.layers.40.block_sparse_moe.experts.207.w2", "model.layers.40.block_sparse_moe.experts.208.w2", "model.layers.40.block_sparse_moe.experts.209.w2", "model.layers.40.block_sparse_moe.experts.210.w2", "model.layers.40.block_sparse_moe.experts.211.w2", "model.layers.40.block_sparse_moe.experts.212.w2", "model.layers.40.block_sparse_moe.experts.213.w2", "model.layers.40.block_sparse_moe.experts.214.w2", "model.layers.40.block_sparse_moe.experts.215.w2", "model.layers.40.block_sparse_moe.experts.216.w2", "model.layers.40.block_sparse_moe.experts.217.w2", "model.layers.40.block_sparse_moe.experts.218.w2", "model.layers.40.block_sparse_moe.experts.219.w2", "model.layers.40.block_sparse_moe.experts.220.w2", "model.layers.40.block_sparse_moe.experts.221.w2", "model.layers.40.block_sparse_moe.experts.222.w2", "model.layers.40.block_sparse_moe.experts.223.w2", "model.layers.40.block_sparse_moe.experts.224.w2", "model.layers.40.block_sparse_moe.experts.225.w2", "model.layers.40.block_sparse_moe.experts.226.w2", "model.layers.40.block_sparse_moe.experts.227.w2", "model.layers.40.block_sparse_moe.experts.228.w2", "model.layers.40.block_sparse_moe.experts.229.w2", "model.layers.40.block_sparse_moe.experts.230.w2", "model.layers.40.block_sparse_moe.experts.231.w2", "model.layers.40.block_sparse_moe.experts.232.w2", "model.layers.40.block_sparse_moe.experts.233.w2", "model.layers.40.block_sparse_moe.experts.234.w2", "model.layers.40.block_sparse_moe.experts.235.w2", "model.layers.40.block_sparse_moe.experts.236.w2", "model.layers.40.block_sparse_moe.experts.237.w2", "model.layers.40.block_sparse_moe.experts.238.w2", "model.layers.40.block_sparse_moe.experts.239.w2", "model.layers.40.block_sparse_moe.experts.240.w2", "model.layers.40.block_sparse_moe.experts.241.w2", "model.layers.40.block_sparse_moe.experts.242.w2", "model.layers.40.block_sparse_moe.experts.243.w2", "model.layers.40.block_sparse_moe.experts.244.w2", "model.layers.40.block_sparse_moe.experts.245.w2", "model.layers.40.block_sparse_moe.experts.246.w2", "model.layers.40.block_sparse_moe.experts.247.w2", "model.layers.40.block_sparse_moe.experts.248.w2", "model.layers.40.block_sparse_moe.experts.249.w2", "model.layers.40.block_sparse_moe.experts.250.w2", "model.layers.40.block_sparse_moe.experts.251.w2", "model.layers.40.block_sparse_moe.experts.252.w2", "model.layers.40.block_sparse_moe.experts.253.w2", "model.layers.40.block_sparse_moe.experts.254.w2", "model.layers.40.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0013282129541039578, "dbits": 3623878656 } ] }, { "idx": 82, "layers": [ "model.layers.41.self_attn.q_proj", "model.layers.41.self_attn.k_proj", "model.layers.41.self_attn.v_proj", "model.layers.41.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004587431624531735, "dbits": 44040192 } ] }, { "idx": 83, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w1", "model.layers.41.block_sparse_moe.experts.1.w1", "model.layers.41.block_sparse_moe.experts.2.w1", "model.layers.41.block_sparse_moe.experts.3.w1", "model.layers.41.block_sparse_moe.experts.4.w1", "model.layers.41.block_sparse_moe.experts.5.w1", "model.layers.41.block_sparse_moe.experts.6.w1", "model.layers.41.block_sparse_moe.experts.7.w1", "model.layers.41.block_sparse_moe.experts.8.w1", "model.layers.41.block_sparse_moe.experts.9.w1", "model.layers.41.block_sparse_moe.experts.10.w1", "model.layers.41.block_sparse_moe.experts.11.w1", "model.layers.41.block_sparse_moe.experts.12.w1", "model.layers.41.block_sparse_moe.experts.13.w1", "model.layers.41.block_sparse_moe.experts.14.w1", "model.layers.41.block_sparse_moe.experts.15.w1", "model.layers.41.block_sparse_moe.experts.16.w1", "model.layers.41.block_sparse_moe.experts.17.w1", "model.layers.41.block_sparse_moe.experts.18.w1", "model.layers.41.block_sparse_moe.experts.19.w1", "model.layers.41.block_sparse_moe.experts.20.w1", "model.layers.41.block_sparse_moe.experts.21.w1", "model.layers.41.block_sparse_moe.experts.22.w1", "model.layers.41.block_sparse_moe.experts.23.w1", "model.layers.41.block_sparse_moe.experts.24.w1", "model.layers.41.block_sparse_moe.experts.25.w1", "model.layers.41.block_sparse_moe.experts.26.w1", "model.layers.41.block_sparse_moe.experts.27.w1", "model.layers.41.block_sparse_moe.experts.28.w1", "model.layers.41.block_sparse_moe.experts.29.w1", "model.layers.41.block_sparse_moe.experts.30.w1", "model.layers.41.block_sparse_moe.experts.31.w1", "model.layers.41.block_sparse_moe.experts.32.w1", "model.layers.41.block_sparse_moe.experts.33.w1", "model.layers.41.block_sparse_moe.experts.34.w1", "model.layers.41.block_sparse_moe.experts.35.w1", "model.layers.41.block_sparse_moe.experts.36.w1", "model.layers.41.block_sparse_moe.experts.37.w1", "model.layers.41.block_sparse_moe.experts.38.w1", "model.layers.41.block_sparse_moe.experts.39.w1", "model.layers.41.block_sparse_moe.experts.40.w1", "model.layers.41.block_sparse_moe.experts.41.w1", "model.layers.41.block_sparse_moe.experts.42.w1", "model.layers.41.block_sparse_moe.experts.43.w1", "model.layers.41.block_sparse_moe.experts.44.w1", "model.layers.41.block_sparse_moe.experts.45.w1", "model.layers.41.block_sparse_moe.experts.46.w1", "model.layers.41.block_sparse_moe.experts.47.w1", "model.layers.41.block_sparse_moe.experts.48.w1", "model.layers.41.block_sparse_moe.experts.49.w1", "model.layers.41.block_sparse_moe.experts.50.w1", "model.layers.41.block_sparse_moe.experts.51.w1", "model.layers.41.block_sparse_moe.experts.52.w1", "model.layers.41.block_sparse_moe.experts.53.w1", "model.layers.41.block_sparse_moe.experts.54.w1", "model.layers.41.block_sparse_moe.experts.55.w1", "model.layers.41.block_sparse_moe.experts.56.w1", "model.layers.41.block_sparse_moe.experts.57.w1", "model.layers.41.block_sparse_moe.experts.58.w1", "model.layers.41.block_sparse_moe.experts.59.w1", "model.layers.41.block_sparse_moe.experts.60.w1", "model.layers.41.block_sparse_moe.experts.61.w1", "model.layers.41.block_sparse_moe.experts.62.w1", "model.layers.41.block_sparse_moe.experts.63.w1", "model.layers.41.block_sparse_moe.experts.64.w1", "model.layers.41.block_sparse_moe.experts.65.w1", "model.layers.41.block_sparse_moe.experts.66.w1", "model.layers.41.block_sparse_moe.experts.67.w1", "model.layers.41.block_sparse_moe.experts.68.w1", "model.layers.41.block_sparse_moe.experts.69.w1", "model.layers.41.block_sparse_moe.experts.70.w1", "model.layers.41.block_sparse_moe.experts.71.w1", "model.layers.41.block_sparse_moe.experts.72.w1", "model.layers.41.block_sparse_moe.experts.73.w1", "model.layers.41.block_sparse_moe.experts.74.w1", "model.layers.41.block_sparse_moe.experts.75.w1", "model.layers.41.block_sparse_moe.experts.76.w1", "model.layers.41.block_sparse_moe.experts.77.w1", "model.layers.41.block_sparse_moe.experts.78.w1", "model.layers.41.block_sparse_moe.experts.79.w1", "model.layers.41.block_sparse_moe.experts.80.w1", "model.layers.41.block_sparse_moe.experts.81.w1", "model.layers.41.block_sparse_moe.experts.82.w1", "model.layers.41.block_sparse_moe.experts.83.w1", "model.layers.41.block_sparse_moe.experts.84.w1", "model.layers.41.block_sparse_moe.experts.85.w1", "model.layers.41.block_sparse_moe.experts.86.w1", "model.layers.41.block_sparse_moe.experts.87.w1", "model.layers.41.block_sparse_moe.experts.88.w1", "model.layers.41.block_sparse_moe.experts.89.w1", "model.layers.41.block_sparse_moe.experts.90.w1", "model.layers.41.block_sparse_moe.experts.91.w1", "model.layers.41.block_sparse_moe.experts.92.w1", "model.layers.41.block_sparse_moe.experts.93.w1", "model.layers.41.block_sparse_moe.experts.94.w1", "model.layers.41.block_sparse_moe.experts.95.w1", "model.layers.41.block_sparse_moe.experts.96.w1", "model.layers.41.block_sparse_moe.experts.97.w1", "model.layers.41.block_sparse_moe.experts.98.w1", "model.layers.41.block_sparse_moe.experts.99.w1", "model.layers.41.block_sparse_moe.experts.100.w1", "model.layers.41.block_sparse_moe.experts.101.w1", "model.layers.41.block_sparse_moe.experts.102.w1", "model.layers.41.block_sparse_moe.experts.103.w1", "model.layers.41.block_sparse_moe.experts.104.w1", "model.layers.41.block_sparse_moe.experts.105.w1", "model.layers.41.block_sparse_moe.experts.106.w1", "model.layers.41.block_sparse_moe.experts.107.w1", "model.layers.41.block_sparse_moe.experts.108.w1", "model.layers.41.block_sparse_moe.experts.109.w1", "model.layers.41.block_sparse_moe.experts.110.w1", "model.layers.41.block_sparse_moe.experts.111.w1", "model.layers.41.block_sparse_moe.experts.112.w1", "model.layers.41.block_sparse_moe.experts.113.w1", "model.layers.41.block_sparse_moe.experts.114.w1", "model.layers.41.block_sparse_moe.experts.115.w1", "model.layers.41.block_sparse_moe.experts.116.w1", "model.layers.41.block_sparse_moe.experts.117.w1", "model.layers.41.block_sparse_moe.experts.118.w1", "model.layers.41.block_sparse_moe.experts.119.w1", "model.layers.41.block_sparse_moe.experts.120.w1", "model.layers.41.block_sparse_moe.experts.121.w1", "model.layers.41.block_sparse_moe.experts.122.w1", "model.layers.41.block_sparse_moe.experts.123.w1", "model.layers.41.block_sparse_moe.experts.124.w1", "model.layers.41.block_sparse_moe.experts.125.w1", "model.layers.41.block_sparse_moe.experts.126.w1", "model.layers.41.block_sparse_moe.experts.127.w1", "model.layers.41.block_sparse_moe.experts.128.w1", "model.layers.41.block_sparse_moe.experts.129.w1", "model.layers.41.block_sparse_moe.experts.130.w1", "model.layers.41.block_sparse_moe.experts.131.w1", "model.layers.41.block_sparse_moe.experts.132.w1", "model.layers.41.block_sparse_moe.experts.133.w1", "model.layers.41.block_sparse_moe.experts.134.w1", "model.layers.41.block_sparse_moe.experts.135.w1", "model.layers.41.block_sparse_moe.experts.136.w1", "model.layers.41.block_sparse_moe.experts.137.w1", "model.layers.41.block_sparse_moe.experts.138.w1", "model.layers.41.block_sparse_moe.experts.139.w1", "model.layers.41.block_sparse_moe.experts.140.w1", "model.layers.41.block_sparse_moe.experts.141.w1", "model.layers.41.block_sparse_moe.experts.142.w1", "model.layers.41.block_sparse_moe.experts.143.w1", "model.layers.41.block_sparse_moe.experts.144.w1", "model.layers.41.block_sparse_moe.experts.145.w1", "model.layers.41.block_sparse_moe.experts.146.w1", "model.layers.41.block_sparse_moe.experts.147.w1", "model.layers.41.block_sparse_moe.experts.148.w1", "model.layers.41.block_sparse_moe.experts.149.w1", "model.layers.41.block_sparse_moe.experts.150.w1", "model.layers.41.block_sparse_moe.experts.151.w1", "model.layers.41.block_sparse_moe.experts.152.w1", "model.layers.41.block_sparse_moe.experts.153.w1", "model.layers.41.block_sparse_moe.experts.154.w1", "model.layers.41.block_sparse_moe.experts.155.w1", "model.layers.41.block_sparse_moe.experts.156.w1", "model.layers.41.block_sparse_moe.experts.157.w1", "model.layers.41.block_sparse_moe.experts.158.w1", "model.layers.41.block_sparse_moe.experts.159.w1", "model.layers.41.block_sparse_moe.experts.160.w1", "model.layers.41.block_sparse_moe.experts.161.w1", "model.layers.41.block_sparse_moe.experts.162.w1", "model.layers.41.block_sparse_moe.experts.163.w1", "model.layers.41.block_sparse_moe.experts.164.w1", "model.layers.41.block_sparse_moe.experts.165.w1", "model.layers.41.block_sparse_moe.experts.166.w1", "model.layers.41.block_sparse_moe.experts.167.w1", "model.layers.41.block_sparse_moe.experts.168.w1", "model.layers.41.block_sparse_moe.experts.169.w1", "model.layers.41.block_sparse_moe.experts.170.w1", "model.layers.41.block_sparse_moe.experts.171.w1", "model.layers.41.block_sparse_moe.experts.172.w1", "model.layers.41.block_sparse_moe.experts.173.w1", "model.layers.41.block_sparse_moe.experts.174.w1", "model.layers.41.block_sparse_moe.experts.175.w1", "model.layers.41.block_sparse_moe.experts.176.w1", "model.layers.41.block_sparse_moe.experts.177.w1", "model.layers.41.block_sparse_moe.experts.178.w1", "model.layers.41.block_sparse_moe.experts.179.w1", "model.layers.41.block_sparse_moe.experts.180.w1", "model.layers.41.block_sparse_moe.experts.181.w1", "model.layers.41.block_sparse_moe.experts.182.w1", "model.layers.41.block_sparse_moe.experts.183.w1", "model.layers.41.block_sparse_moe.experts.184.w1", "model.layers.41.block_sparse_moe.experts.185.w1", "model.layers.41.block_sparse_moe.experts.186.w1", "model.layers.41.block_sparse_moe.experts.187.w1", "model.layers.41.block_sparse_moe.experts.188.w1", "model.layers.41.block_sparse_moe.experts.189.w1", "model.layers.41.block_sparse_moe.experts.190.w1", "model.layers.41.block_sparse_moe.experts.191.w1", "model.layers.41.block_sparse_moe.experts.192.w1", "model.layers.41.block_sparse_moe.experts.193.w1", "model.layers.41.block_sparse_moe.experts.194.w1", "model.layers.41.block_sparse_moe.experts.195.w1", "model.layers.41.block_sparse_moe.experts.196.w1", "model.layers.41.block_sparse_moe.experts.197.w1", "model.layers.41.block_sparse_moe.experts.198.w1", "model.layers.41.block_sparse_moe.experts.199.w1", "model.layers.41.block_sparse_moe.experts.200.w1", "model.layers.41.block_sparse_moe.experts.201.w1", "model.layers.41.block_sparse_moe.experts.202.w1", "model.layers.41.block_sparse_moe.experts.203.w1", "model.layers.41.block_sparse_moe.experts.204.w1", "model.layers.41.block_sparse_moe.experts.205.w1", "model.layers.41.block_sparse_moe.experts.206.w1", "model.layers.41.block_sparse_moe.experts.207.w1", "model.layers.41.block_sparse_moe.experts.208.w1", "model.layers.41.block_sparse_moe.experts.209.w1", "model.layers.41.block_sparse_moe.experts.210.w1", "model.layers.41.block_sparse_moe.experts.211.w1", "model.layers.41.block_sparse_moe.experts.212.w1", "model.layers.41.block_sparse_moe.experts.213.w1", "model.layers.41.block_sparse_moe.experts.214.w1", "model.layers.41.block_sparse_moe.experts.215.w1", "model.layers.41.block_sparse_moe.experts.216.w1", "model.layers.41.block_sparse_moe.experts.217.w1", "model.layers.41.block_sparse_moe.experts.218.w1", "model.layers.41.block_sparse_moe.experts.219.w1", "model.layers.41.block_sparse_moe.experts.220.w1", "model.layers.41.block_sparse_moe.experts.221.w1", "model.layers.41.block_sparse_moe.experts.222.w1", "model.layers.41.block_sparse_moe.experts.223.w1", "model.layers.41.block_sparse_moe.experts.224.w1", "model.layers.41.block_sparse_moe.experts.225.w1", "model.layers.41.block_sparse_moe.experts.226.w1", "model.layers.41.block_sparse_moe.experts.227.w1", "model.layers.41.block_sparse_moe.experts.228.w1", "model.layers.41.block_sparse_moe.experts.229.w1", "model.layers.41.block_sparse_moe.experts.230.w1", "model.layers.41.block_sparse_moe.experts.231.w1", "model.layers.41.block_sparse_moe.experts.232.w1", "model.layers.41.block_sparse_moe.experts.233.w1", "model.layers.41.block_sparse_moe.experts.234.w1", "model.layers.41.block_sparse_moe.experts.235.w1", "model.layers.41.block_sparse_moe.experts.236.w1", "model.layers.41.block_sparse_moe.experts.237.w1", "model.layers.41.block_sparse_moe.experts.238.w1", "model.layers.41.block_sparse_moe.experts.239.w1", "model.layers.41.block_sparse_moe.experts.240.w1", "model.layers.41.block_sparse_moe.experts.241.w1", "model.layers.41.block_sparse_moe.experts.242.w1", "model.layers.41.block_sparse_moe.experts.243.w1", "model.layers.41.block_sparse_moe.experts.244.w1", "model.layers.41.block_sparse_moe.experts.245.w1", "model.layers.41.block_sparse_moe.experts.246.w1", "model.layers.41.block_sparse_moe.experts.247.w1", "model.layers.41.block_sparse_moe.experts.248.w1", "model.layers.41.block_sparse_moe.experts.249.w1", "model.layers.41.block_sparse_moe.experts.250.w1", "model.layers.41.block_sparse_moe.experts.251.w1", "model.layers.41.block_sparse_moe.experts.252.w1", "model.layers.41.block_sparse_moe.experts.253.w1", "model.layers.41.block_sparse_moe.experts.254.w1", "model.layers.41.block_sparse_moe.experts.255.w1", "model.layers.41.block_sparse_moe.experts.0.w3", "model.layers.41.block_sparse_moe.experts.1.w3", "model.layers.41.block_sparse_moe.experts.2.w3", "model.layers.41.block_sparse_moe.experts.3.w3", "model.layers.41.block_sparse_moe.experts.4.w3", "model.layers.41.block_sparse_moe.experts.5.w3", "model.layers.41.block_sparse_moe.experts.6.w3", "model.layers.41.block_sparse_moe.experts.7.w3", "model.layers.41.block_sparse_moe.experts.8.w3", "model.layers.41.block_sparse_moe.experts.9.w3", "model.layers.41.block_sparse_moe.experts.10.w3", "model.layers.41.block_sparse_moe.experts.11.w3", "model.layers.41.block_sparse_moe.experts.12.w3", "model.layers.41.block_sparse_moe.experts.13.w3", "model.layers.41.block_sparse_moe.experts.14.w3", "model.layers.41.block_sparse_moe.experts.15.w3", "model.layers.41.block_sparse_moe.experts.16.w3", "model.layers.41.block_sparse_moe.experts.17.w3", "model.layers.41.block_sparse_moe.experts.18.w3", "model.layers.41.block_sparse_moe.experts.19.w3", "model.layers.41.block_sparse_moe.experts.20.w3", "model.layers.41.block_sparse_moe.experts.21.w3", "model.layers.41.block_sparse_moe.experts.22.w3", "model.layers.41.block_sparse_moe.experts.23.w3", "model.layers.41.block_sparse_moe.experts.24.w3", "model.layers.41.block_sparse_moe.experts.25.w3", "model.layers.41.block_sparse_moe.experts.26.w3", "model.layers.41.block_sparse_moe.experts.27.w3", "model.layers.41.block_sparse_moe.experts.28.w3", "model.layers.41.block_sparse_moe.experts.29.w3", "model.layers.41.block_sparse_moe.experts.30.w3", "model.layers.41.block_sparse_moe.experts.31.w3", "model.layers.41.block_sparse_moe.experts.32.w3", "model.layers.41.block_sparse_moe.experts.33.w3", "model.layers.41.block_sparse_moe.experts.34.w3", "model.layers.41.block_sparse_moe.experts.35.w3", "model.layers.41.block_sparse_moe.experts.36.w3", "model.layers.41.block_sparse_moe.experts.37.w3", "model.layers.41.block_sparse_moe.experts.38.w3", "model.layers.41.block_sparse_moe.experts.39.w3", "model.layers.41.block_sparse_moe.experts.40.w3", "model.layers.41.block_sparse_moe.experts.41.w3", "model.layers.41.block_sparse_moe.experts.42.w3", "model.layers.41.block_sparse_moe.experts.43.w3", "model.layers.41.block_sparse_moe.experts.44.w3", "model.layers.41.block_sparse_moe.experts.45.w3", "model.layers.41.block_sparse_moe.experts.46.w3", "model.layers.41.block_sparse_moe.experts.47.w3", "model.layers.41.block_sparse_moe.experts.48.w3", "model.layers.41.block_sparse_moe.experts.49.w3", "model.layers.41.block_sparse_moe.experts.50.w3", "model.layers.41.block_sparse_moe.experts.51.w3", "model.layers.41.block_sparse_moe.experts.52.w3", "model.layers.41.block_sparse_moe.experts.53.w3", "model.layers.41.block_sparse_moe.experts.54.w3", "model.layers.41.block_sparse_moe.experts.55.w3", "model.layers.41.block_sparse_moe.experts.56.w3", "model.layers.41.block_sparse_moe.experts.57.w3", "model.layers.41.block_sparse_moe.experts.58.w3", "model.layers.41.block_sparse_moe.experts.59.w3", "model.layers.41.block_sparse_moe.experts.60.w3", "model.layers.41.block_sparse_moe.experts.61.w3", "model.layers.41.block_sparse_moe.experts.62.w3", "model.layers.41.block_sparse_moe.experts.63.w3", "model.layers.41.block_sparse_moe.experts.64.w3", "model.layers.41.block_sparse_moe.experts.65.w3", "model.layers.41.block_sparse_moe.experts.66.w3", "model.layers.41.block_sparse_moe.experts.67.w3", "model.layers.41.block_sparse_moe.experts.68.w3", "model.layers.41.block_sparse_moe.experts.69.w3", "model.layers.41.block_sparse_moe.experts.70.w3", "model.layers.41.block_sparse_moe.experts.71.w3", "model.layers.41.block_sparse_moe.experts.72.w3", "model.layers.41.block_sparse_moe.experts.73.w3", "model.layers.41.block_sparse_moe.experts.74.w3", "model.layers.41.block_sparse_moe.experts.75.w3", "model.layers.41.block_sparse_moe.experts.76.w3", "model.layers.41.block_sparse_moe.experts.77.w3", "model.layers.41.block_sparse_moe.experts.78.w3", "model.layers.41.block_sparse_moe.experts.79.w3", "model.layers.41.block_sparse_moe.experts.80.w3", "model.layers.41.block_sparse_moe.experts.81.w3", "model.layers.41.block_sparse_moe.experts.82.w3", "model.layers.41.block_sparse_moe.experts.83.w3", "model.layers.41.block_sparse_moe.experts.84.w3", "model.layers.41.block_sparse_moe.experts.85.w3", "model.layers.41.block_sparse_moe.experts.86.w3", "model.layers.41.block_sparse_moe.experts.87.w3", "model.layers.41.block_sparse_moe.experts.88.w3", "model.layers.41.block_sparse_moe.experts.89.w3", "model.layers.41.block_sparse_moe.experts.90.w3", "model.layers.41.block_sparse_moe.experts.91.w3", "model.layers.41.block_sparse_moe.experts.92.w3", "model.layers.41.block_sparse_moe.experts.93.w3", "model.layers.41.block_sparse_moe.experts.94.w3", "model.layers.41.block_sparse_moe.experts.95.w3", "model.layers.41.block_sparse_moe.experts.96.w3", "model.layers.41.block_sparse_moe.experts.97.w3", "model.layers.41.block_sparse_moe.experts.98.w3", "model.layers.41.block_sparse_moe.experts.99.w3", "model.layers.41.block_sparse_moe.experts.100.w3", "model.layers.41.block_sparse_moe.experts.101.w3", "model.layers.41.block_sparse_moe.experts.102.w3", "model.layers.41.block_sparse_moe.experts.103.w3", "model.layers.41.block_sparse_moe.experts.104.w3", "model.layers.41.block_sparse_moe.experts.105.w3", "model.layers.41.block_sparse_moe.experts.106.w3", "model.layers.41.block_sparse_moe.experts.107.w3", "model.layers.41.block_sparse_moe.experts.108.w3", "model.layers.41.block_sparse_moe.experts.109.w3", "model.layers.41.block_sparse_moe.experts.110.w3", "model.layers.41.block_sparse_moe.experts.111.w3", "model.layers.41.block_sparse_moe.experts.112.w3", "model.layers.41.block_sparse_moe.experts.113.w3", "model.layers.41.block_sparse_moe.experts.114.w3", "model.layers.41.block_sparse_moe.experts.115.w3", "model.layers.41.block_sparse_moe.experts.116.w3", "model.layers.41.block_sparse_moe.experts.117.w3", "model.layers.41.block_sparse_moe.experts.118.w3", "model.layers.41.block_sparse_moe.experts.119.w3", "model.layers.41.block_sparse_moe.experts.120.w3", "model.layers.41.block_sparse_moe.experts.121.w3", "model.layers.41.block_sparse_moe.experts.122.w3", "model.layers.41.block_sparse_moe.experts.123.w3", "model.layers.41.block_sparse_moe.experts.124.w3", "model.layers.41.block_sparse_moe.experts.125.w3", "model.layers.41.block_sparse_moe.experts.126.w3", "model.layers.41.block_sparse_moe.experts.127.w3", "model.layers.41.block_sparse_moe.experts.128.w3", "model.layers.41.block_sparse_moe.experts.129.w3", "model.layers.41.block_sparse_moe.experts.130.w3", "model.layers.41.block_sparse_moe.experts.131.w3", "model.layers.41.block_sparse_moe.experts.132.w3", "model.layers.41.block_sparse_moe.experts.133.w3", "model.layers.41.block_sparse_moe.experts.134.w3", "model.layers.41.block_sparse_moe.experts.135.w3", "model.layers.41.block_sparse_moe.experts.136.w3", "model.layers.41.block_sparse_moe.experts.137.w3", "model.layers.41.block_sparse_moe.experts.138.w3", "model.layers.41.block_sparse_moe.experts.139.w3", "model.layers.41.block_sparse_moe.experts.140.w3", "model.layers.41.block_sparse_moe.experts.141.w3", "model.layers.41.block_sparse_moe.experts.142.w3", "model.layers.41.block_sparse_moe.experts.143.w3", "model.layers.41.block_sparse_moe.experts.144.w3", "model.layers.41.block_sparse_moe.experts.145.w3", "model.layers.41.block_sparse_moe.experts.146.w3", "model.layers.41.block_sparse_moe.experts.147.w3", "model.layers.41.block_sparse_moe.experts.148.w3", "model.layers.41.block_sparse_moe.experts.149.w3", "model.layers.41.block_sparse_moe.experts.150.w3", "model.layers.41.block_sparse_moe.experts.151.w3", "model.layers.41.block_sparse_moe.experts.152.w3", "model.layers.41.block_sparse_moe.experts.153.w3", "model.layers.41.block_sparse_moe.experts.154.w3", "model.layers.41.block_sparse_moe.experts.155.w3", "model.layers.41.block_sparse_moe.experts.156.w3", "model.layers.41.block_sparse_moe.experts.157.w3", "model.layers.41.block_sparse_moe.experts.158.w3", "model.layers.41.block_sparse_moe.experts.159.w3", "model.layers.41.block_sparse_moe.experts.160.w3", "model.layers.41.block_sparse_moe.experts.161.w3", "model.layers.41.block_sparse_moe.experts.162.w3", "model.layers.41.block_sparse_moe.experts.163.w3", "model.layers.41.block_sparse_moe.experts.164.w3", "model.layers.41.block_sparse_moe.experts.165.w3", "model.layers.41.block_sparse_moe.experts.166.w3", "model.layers.41.block_sparse_moe.experts.167.w3", "model.layers.41.block_sparse_moe.experts.168.w3", "model.layers.41.block_sparse_moe.experts.169.w3", "model.layers.41.block_sparse_moe.experts.170.w3", "model.layers.41.block_sparse_moe.experts.171.w3", "model.layers.41.block_sparse_moe.experts.172.w3", "model.layers.41.block_sparse_moe.experts.173.w3", "model.layers.41.block_sparse_moe.experts.174.w3", "model.layers.41.block_sparse_moe.experts.175.w3", "model.layers.41.block_sparse_moe.experts.176.w3", "model.layers.41.block_sparse_moe.experts.177.w3", "model.layers.41.block_sparse_moe.experts.178.w3", "model.layers.41.block_sparse_moe.experts.179.w3", "model.layers.41.block_sparse_moe.experts.180.w3", "model.layers.41.block_sparse_moe.experts.181.w3", "model.layers.41.block_sparse_moe.experts.182.w3", "model.layers.41.block_sparse_moe.experts.183.w3", "model.layers.41.block_sparse_moe.experts.184.w3", "model.layers.41.block_sparse_moe.experts.185.w3", "model.layers.41.block_sparse_moe.experts.186.w3", "model.layers.41.block_sparse_moe.experts.187.w3", "model.layers.41.block_sparse_moe.experts.188.w3", "model.layers.41.block_sparse_moe.experts.189.w3", "model.layers.41.block_sparse_moe.experts.190.w3", "model.layers.41.block_sparse_moe.experts.191.w3", "model.layers.41.block_sparse_moe.experts.192.w3", "model.layers.41.block_sparse_moe.experts.193.w3", "model.layers.41.block_sparse_moe.experts.194.w3", "model.layers.41.block_sparse_moe.experts.195.w3", "model.layers.41.block_sparse_moe.experts.196.w3", "model.layers.41.block_sparse_moe.experts.197.w3", "model.layers.41.block_sparse_moe.experts.198.w3", "model.layers.41.block_sparse_moe.experts.199.w3", "model.layers.41.block_sparse_moe.experts.200.w3", "model.layers.41.block_sparse_moe.experts.201.w3", "model.layers.41.block_sparse_moe.experts.202.w3", "model.layers.41.block_sparse_moe.experts.203.w3", "model.layers.41.block_sparse_moe.experts.204.w3", "model.layers.41.block_sparse_moe.experts.205.w3", "model.layers.41.block_sparse_moe.experts.206.w3", "model.layers.41.block_sparse_moe.experts.207.w3", "model.layers.41.block_sparse_moe.experts.208.w3", "model.layers.41.block_sparse_moe.experts.209.w3", "model.layers.41.block_sparse_moe.experts.210.w3", "model.layers.41.block_sparse_moe.experts.211.w3", "model.layers.41.block_sparse_moe.experts.212.w3", "model.layers.41.block_sparse_moe.experts.213.w3", "model.layers.41.block_sparse_moe.experts.214.w3", "model.layers.41.block_sparse_moe.experts.215.w3", "model.layers.41.block_sparse_moe.experts.216.w3", "model.layers.41.block_sparse_moe.experts.217.w3", "model.layers.41.block_sparse_moe.experts.218.w3", "model.layers.41.block_sparse_moe.experts.219.w3", "model.layers.41.block_sparse_moe.experts.220.w3", "model.layers.41.block_sparse_moe.experts.221.w3", "model.layers.41.block_sparse_moe.experts.222.w3", "model.layers.41.block_sparse_moe.experts.223.w3", "model.layers.41.block_sparse_moe.experts.224.w3", "model.layers.41.block_sparse_moe.experts.225.w3", "model.layers.41.block_sparse_moe.experts.226.w3", "model.layers.41.block_sparse_moe.experts.227.w3", "model.layers.41.block_sparse_moe.experts.228.w3", "model.layers.41.block_sparse_moe.experts.229.w3", "model.layers.41.block_sparse_moe.experts.230.w3", "model.layers.41.block_sparse_moe.experts.231.w3", "model.layers.41.block_sparse_moe.experts.232.w3", "model.layers.41.block_sparse_moe.experts.233.w3", "model.layers.41.block_sparse_moe.experts.234.w3", "model.layers.41.block_sparse_moe.experts.235.w3", "model.layers.41.block_sparse_moe.experts.236.w3", "model.layers.41.block_sparse_moe.experts.237.w3", "model.layers.41.block_sparse_moe.experts.238.w3", "model.layers.41.block_sparse_moe.experts.239.w3", "model.layers.41.block_sparse_moe.experts.240.w3", "model.layers.41.block_sparse_moe.experts.241.w3", "model.layers.41.block_sparse_moe.experts.242.w3", "model.layers.41.block_sparse_moe.experts.243.w3", "model.layers.41.block_sparse_moe.experts.244.w3", "model.layers.41.block_sparse_moe.experts.245.w3", "model.layers.41.block_sparse_moe.experts.246.w3", "model.layers.41.block_sparse_moe.experts.247.w3", "model.layers.41.block_sparse_moe.experts.248.w3", "model.layers.41.block_sparse_moe.experts.249.w3", "model.layers.41.block_sparse_moe.experts.250.w3", "model.layers.41.block_sparse_moe.experts.251.w3", "model.layers.41.block_sparse_moe.experts.252.w3", "model.layers.41.block_sparse_moe.experts.253.w3", "model.layers.41.block_sparse_moe.experts.254.w3", "model.layers.41.block_sparse_moe.experts.255.w3", "model.layers.41.block_sparse_moe.experts.0.w2", "model.layers.41.block_sparse_moe.experts.1.w2", "model.layers.41.block_sparse_moe.experts.2.w2", "model.layers.41.block_sparse_moe.experts.3.w2", "model.layers.41.block_sparse_moe.experts.4.w2", "model.layers.41.block_sparse_moe.experts.5.w2", "model.layers.41.block_sparse_moe.experts.6.w2", "model.layers.41.block_sparse_moe.experts.7.w2", "model.layers.41.block_sparse_moe.experts.8.w2", "model.layers.41.block_sparse_moe.experts.9.w2", "model.layers.41.block_sparse_moe.experts.10.w2", "model.layers.41.block_sparse_moe.experts.11.w2", "model.layers.41.block_sparse_moe.experts.12.w2", "model.layers.41.block_sparse_moe.experts.13.w2", "model.layers.41.block_sparse_moe.experts.14.w2", "model.layers.41.block_sparse_moe.experts.15.w2", "model.layers.41.block_sparse_moe.experts.16.w2", "model.layers.41.block_sparse_moe.experts.17.w2", "model.layers.41.block_sparse_moe.experts.18.w2", "model.layers.41.block_sparse_moe.experts.19.w2", "model.layers.41.block_sparse_moe.experts.20.w2", "model.layers.41.block_sparse_moe.experts.21.w2", "model.layers.41.block_sparse_moe.experts.22.w2", "model.layers.41.block_sparse_moe.experts.23.w2", "model.layers.41.block_sparse_moe.experts.24.w2", "model.layers.41.block_sparse_moe.experts.25.w2", "model.layers.41.block_sparse_moe.experts.26.w2", "model.layers.41.block_sparse_moe.experts.27.w2", "model.layers.41.block_sparse_moe.experts.28.w2", "model.layers.41.block_sparse_moe.experts.29.w2", "model.layers.41.block_sparse_moe.experts.30.w2", "model.layers.41.block_sparse_moe.experts.31.w2", "model.layers.41.block_sparse_moe.experts.32.w2", "model.layers.41.block_sparse_moe.experts.33.w2", "model.layers.41.block_sparse_moe.experts.34.w2", "model.layers.41.block_sparse_moe.experts.35.w2", "model.layers.41.block_sparse_moe.experts.36.w2", "model.layers.41.block_sparse_moe.experts.37.w2", "model.layers.41.block_sparse_moe.experts.38.w2", "model.layers.41.block_sparse_moe.experts.39.w2", "model.layers.41.block_sparse_moe.experts.40.w2", "model.layers.41.block_sparse_moe.experts.41.w2", "model.layers.41.block_sparse_moe.experts.42.w2", "model.layers.41.block_sparse_moe.experts.43.w2", "model.layers.41.block_sparse_moe.experts.44.w2", "model.layers.41.block_sparse_moe.experts.45.w2", "model.layers.41.block_sparse_moe.experts.46.w2", "model.layers.41.block_sparse_moe.experts.47.w2", "model.layers.41.block_sparse_moe.experts.48.w2", "model.layers.41.block_sparse_moe.experts.49.w2", "model.layers.41.block_sparse_moe.experts.50.w2", "model.layers.41.block_sparse_moe.experts.51.w2", "model.layers.41.block_sparse_moe.experts.52.w2", "model.layers.41.block_sparse_moe.experts.53.w2", "model.layers.41.block_sparse_moe.experts.54.w2", "model.layers.41.block_sparse_moe.experts.55.w2", "model.layers.41.block_sparse_moe.experts.56.w2", "model.layers.41.block_sparse_moe.experts.57.w2", "model.layers.41.block_sparse_moe.experts.58.w2", "model.layers.41.block_sparse_moe.experts.59.w2", "model.layers.41.block_sparse_moe.experts.60.w2", "model.layers.41.block_sparse_moe.experts.61.w2", "model.layers.41.block_sparse_moe.experts.62.w2", "model.layers.41.block_sparse_moe.experts.63.w2", "model.layers.41.block_sparse_moe.experts.64.w2", "model.layers.41.block_sparse_moe.experts.65.w2", "model.layers.41.block_sparse_moe.experts.66.w2", "model.layers.41.block_sparse_moe.experts.67.w2", "model.layers.41.block_sparse_moe.experts.68.w2", "model.layers.41.block_sparse_moe.experts.69.w2", "model.layers.41.block_sparse_moe.experts.70.w2", "model.layers.41.block_sparse_moe.experts.71.w2", "model.layers.41.block_sparse_moe.experts.72.w2", "model.layers.41.block_sparse_moe.experts.73.w2", "model.layers.41.block_sparse_moe.experts.74.w2", "model.layers.41.block_sparse_moe.experts.75.w2", "model.layers.41.block_sparse_moe.experts.76.w2", "model.layers.41.block_sparse_moe.experts.77.w2", "model.layers.41.block_sparse_moe.experts.78.w2", "model.layers.41.block_sparse_moe.experts.79.w2", "model.layers.41.block_sparse_moe.experts.80.w2", "model.layers.41.block_sparse_moe.experts.81.w2", "model.layers.41.block_sparse_moe.experts.82.w2", "model.layers.41.block_sparse_moe.experts.83.w2", "model.layers.41.block_sparse_moe.experts.84.w2", "model.layers.41.block_sparse_moe.experts.85.w2", "model.layers.41.block_sparse_moe.experts.86.w2", "model.layers.41.block_sparse_moe.experts.87.w2", "model.layers.41.block_sparse_moe.experts.88.w2", "model.layers.41.block_sparse_moe.experts.89.w2", "model.layers.41.block_sparse_moe.experts.90.w2", "model.layers.41.block_sparse_moe.experts.91.w2", "model.layers.41.block_sparse_moe.experts.92.w2", "model.layers.41.block_sparse_moe.experts.93.w2", "model.layers.41.block_sparse_moe.experts.94.w2", "model.layers.41.block_sparse_moe.experts.95.w2", "model.layers.41.block_sparse_moe.experts.96.w2", "model.layers.41.block_sparse_moe.experts.97.w2", "model.layers.41.block_sparse_moe.experts.98.w2", "model.layers.41.block_sparse_moe.experts.99.w2", "model.layers.41.block_sparse_moe.experts.100.w2", "model.layers.41.block_sparse_moe.experts.101.w2", "model.layers.41.block_sparse_moe.experts.102.w2", "model.layers.41.block_sparse_moe.experts.103.w2", "model.layers.41.block_sparse_moe.experts.104.w2", "model.layers.41.block_sparse_moe.experts.105.w2", "model.layers.41.block_sparse_moe.experts.106.w2", "model.layers.41.block_sparse_moe.experts.107.w2", "model.layers.41.block_sparse_moe.experts.108.w2", "model.layers.41.block_sparse_moe.experts.109.w2", "model.layers.41.block_sparse_moe.experts.110.w2", "model.layers.41.block_sparse_moe.experts.111.w2", "model.layers.41.block_sparse_moe.experts.112.w2", "model.layers.41.block_sparse_moe.experts.113.w2", "model.layers.41.block_sparse_moe.experts.114.w2", "model.layers.41.block_sparse_moe.experts.115.w2", "model.layers.41.block_sparse_moe.experts.116.w2", "model.layers.41.block_sparse_moe.experts.117.w2", "model.layers.41.block_sparse_moe.experts.118.w2", "model.layers.41.block_sparse_moe.experts.119.w2", "model.layers.41.block_sparse_moe.experts.120.w2", "model.layers.41.block_sparse_moe.experts.121.w2", "model.layers.41.block_sparse_moe.experts.122.w2", "model.layers.41.block_sparse_moe.experts.123.w2", "model.layers.41.block_sparse_moe.experts.124.w2", "model.layers.41.block_sparse_moe.experts.125.w2", "model.layers.41.block_sparse_moe.experts.126.w2", "model.layers.41.block_sparse_moe.experts.127.w2", "model.layers.41.block_sparse_moe.experts.128.w2", "model.layers.41.block_sparse_moe.experts.129.w2", "model.layers.41.block_sparse_moe.experts.130.w2", "model.layers.41.block_sparse_moe.experts.131.w2", "model.layers.41.block_sparse_moe.experts.132.w2", "model.layers.41.block_sparse_moe.experts.133.w2", "model.layers.41.block_sparse_moe.experts.134.w2", "model.layers.41.block_sparse_moe.experts.135.w2", "model.layers.41.block_sparse_moe.experts.136.w2", "model.layers.41.block_sparse_moe.experts.137.w2", "model.layers.41.block_sparse_moe.experts.138.w2", "model.layers.41.block_sparse_moe.experts.139.w2", "model.layers.41.block_sparse_moe.experts.140.w2", "model.layers.41.block_sparse_moe.experts.141.w2", "model.layers.41.block_sparse_moe.experts.142.w2", "model.layers.41.block_sparse_moe.experts.143.w2", "model.layers.41.block_sparse_moe.experts.144.w2", "model.layers.41.block_sparse_moe.experts.145.w2", "model.layers.41.block_sparse_moe.experts.146.w2", "model.layers.41.block_sparse_moe.experts.147.w2", "model.layers.41.block_sparse_moe.experts.148.w2", "model.layers.41.block_sparse_moe.experts.149.w2", "model.layers.41.block_sparse_moe.experts.150.w2", "model.layers.41.block_sparse_moe.experts.151.w2", "model.layers.41.block_sparse_moe.experts.152.w2", "model.layers.41.block_sparse_moe.experts.153.w2", "model.layers.41.block_sparse_moe.experts.154.w2", "model.layers.41.block_sparse_moe.experts.155.w2", "model.layers.41.block_sparse_moe.experts.156.w2", "model.layers.41.block_sparse_moe.experts.157.w2", "model.layers.41.block_sparse_moe.experts.158.w2", "model.layers.41.block_sparse_moe.experts.159.w2", "model.layers.41.block_sparse_moe.experts.160.w2", "model.layers.41.block_sparse_moe.experts.161.w2", "model.layers.41.block_sparse_moe.experts.162.w2", "model.layers.41.block_sparse_moe.experts.163.w2", "model.layers.41.block_sparse_moe.experts.164.w2", "model.layers.41.block_sparse_moe.experts.165.w2", "model.layers.41.block_sparse_moe.experts.166.w2", "model.layers.41.block_sparse_moe.experts.167.w2", "model.layers.41.block_sparse_moe.experts.168.w2", "model.layers.41.block_sparse_moe.experts.169.w2", "model.layers.41.block_sparse_moe.experts.170.w2", "model.layers.41.block_sparse_moe.experts.171.w2", "model.layers.41.block_sparse_moe.experts.172.w2", "model.layers.41.block_sparse_moe.experts.173.w2", "model.layers.41.block_sparse_moe.experts.174.w2", "model.layers.41.block_sparse_moe.experts.175.w2", "model.layers.41.block_sparse_moe.experts.176.w2", "model.layers.41.block_sparse_moe.experts.177.w2", "model.layers.41.block_sparse_moe.experts.178.w2", "model.layers.41.block_sparse_moe.experts.179.w2", "model.layers.41.block_sparse_moe.experts.180.w2", "model.layers.41.block_sparse_moe.experts.181.w2", "model.layers.41.block_sparse_moe.experts.182.w2", "model.layers.41.block_sparse_moe.experts.183.w2", "model.layers.41.block_sparse_moe.experts.184.w2", "model.layers.41.block_sparse_moe.experts.185.w2", "model.layers.41.block_sparse_moe.experts.186.w2", "model.layers.41.block_sparse_moe.experts.187.w2", "model.layers.41.block_sparse_moe.experts.188.w2", "model.layers.41.block_sparse_moe.experts.189.w2", "model.layers.41.block_sparse_moe.experts.190.w2", "model.layers.41.block_sparse_moe.experts.191.w2", "model.layers.41.block_sparse_moe.experts.192.w2", "model.layers.41.block_sparse_moe.experts.193.w2", "model.layers.41.block_sparse_moe.experts.194.w2", "model.layers.41.block_sparse_moe.experts.195.w2", "model.layers.41.block_sparse_moe.experts.196.w2", "model.layers.41.block_sparse_moe.experts.197.w2", "model.layers.41.block_sparse_moe.experts.198.w2", "model.layers.41.block_sparse_moe.experts.199.w2", "model.layers.41.block_sparse_moe.experts.200.w2", "model.layers.41.block_sparse_moe.experts.201.w2", "model.layers.41.block_sparse_moe.experts.202.w2", "model.layers.41.block_sparse_moe.experts.203.w2", "model.layers.41.block_sparse_moe.experts.204.w2", "model.layers.41.block_sparse_moe.experts.205.w2", "model.layers.41.block_sparse_moe.experts.206.w2", "model.layers.41.block_sparse_moe.experts.207.w2", "model.layers.41.block_sparse_moe.experts.208.w2", "model.layers.41.block_sparse_moe.experts.209.w2", "model.layers.41.block_sparse_moe.experts.210.w2", "model.layers.41.block_sparse_moe.experts.211.w2", "model.layers.41.block_sparse_moe.experts.212.w2", "model.layers.41.block_sparse_moe.experts.213.w2", "model.layers.41.block_sparse_moe.experts.214.w2", "model.layers.41.block_sparse_moe.experts.215.w2", "model.layers.41.block_sparse_moe.experts.216.w2", "model.layers.41.block_sparse_moe.experts.217.w2", "model.layers.41.block_sparse_moe.experts.218.w2", "model.layers.41.block_sparse_moe.experts.219.w2", "model.layers.41.block_sparse_moe.experts.220.w2", "model.layers.41.block_sparse_moe.experts.221.w2", "model.layers.41.block_sparse_moe.experts.222.w2", "model.layers.41.block_sparse_moe.experts.223.w2", "model.layers.41.block_sparse_moe.experts.224.w2", "model.layers.41.block_sparse_moe.experts.225.w2", "model.layers.41.block_sparse_moe.experts.226.w2", "model.layers.41.block_sparse_moe.experts.227.w2", "model.layers.41.block_sparse_moe.experts.228.w2", "model.layers.41.block_sparse_moe.experts.229.w2", "model.layers.41.block_sparse_moe.experts.230.w2", "model.layers.41.block_sparse_moe.experts.231.w2", "model.layers.41.block_sparse_moe.experts.232.w2", "model.layers.41.block_sparse_moe.experts.233.w2", "model.layers.41.block_sparse_moe.experts.234.w2", "model.layers.41.block_sparse_moe.experts.235.w2", "model.layers.41.block_sparse_moe.experts.236.w2", "model.layers.41.block_sparse_moe.experts.237.w2", "model.layers.41.block_sparse_moe.experts.238.w2", "model.layers.41.block_sparse_moe.experts.239.w2", "model.layers.41.block_sparse_moe.experts.240.w2", "model.layers.41.block_sparse_moe.experts.241.w2", "model.layers.41.block_sparse_moe.experts.242.w2", "model.layers.41.block_sparse_moe.experts.243.w2", "model.layers.41.block_sparse_moe.experts.244.w2", "model.layers.41.block_sparse_moe.experts.245.w2", "model.layers.41.block_sparse_moe.experts.246.w2", "model.layers.41.block_sparse_moe.experts.247.w2", "model.layers.41.block_sparse_moe.experts.248.w2", "model.layers.41.block_sparse_moe.experts.249.w2", "model.layers.41.block_sparse_moe.experts.250.w2", "model.layers.41.block_sparse_moe.experts.251.w2", "model.layers.41.block_sparse_moe.experts.252.w2", "model.layers.41.block_sparse_moe.experts.253.w2", "model.layers.41.block_sparse_moe.experts.254.w2", "model.layers.41.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.001455854997038819, "dbits": 3623878656 } ] }, { "idx": 84, "layers": [ "model.layers.42.self_attn.q_proj", "model.layers.42.self_attn.k_proj", "model.layers.42.self_attn.v_proj", "model.layers.42.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006056883558630932, "dbits": 44040192 } ] }, { "idx": 85, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w1", "model.layers.42.block_sparse_moe.experts.1.w1", "model.layers.42.block_sparse_moe.experts.2.w1", "model.layers.42.block_sparse_moe.experts.3.w1", "model.layers.42.block_sparse_moe.experts.4.w1", "model.layers.42.block_sparse_moe.experts.5.w1", "model.layers.42.block_sparse_moe.experts.6.w1", "model.layers.42.block_sparse_moe.experts.7.w1", "model.layers.42.block_sparse_moe.experts.8.w1", "model.layers.42.block_sparse_moe.experts.9.w1", "model.layers.42.block_sparse_moe.experts.10.w1", "model.layers.42.block_sparse_moe.experts.11.w1", "model.layers.42.block_sparse_moe.experts.12.w1", "model.layers.42.block_sparse_moe.experts.13.w1", "model.layers.42.block_sparse_moe.experts.14.w1", "model.layers.42.block_sparse_moe.experts.15.w1", "model.layers.42.block_sparse_moe.experts.16.w1", "model.layers.42.block_sparse_moe.experts.17.w1", "model.layers.42.block_sparse_moe.experts.18.w1", "model.layers.42.block_sparse_moe.experts.19.w1", "model.layers.42.block_sparse_moe.experts.20.w1", "model.layers.42.block_sparse_moe.experts.21.w1", "model.layers.42.block_sparse_moe.experts.22.w1", "model.layers.42.block_sparse_moe.experts.23.w1", "model.layers.42.block_sparse_moe.experts.24.w1", "model.layers.42.block_sparse_moe.experts.25.w1", "model.layers.42.block_sparse_moe.experts.26.w1", "model.layers.42.block_sparse_moe.experts.27.w1", "model.layers.42.block_sparse_moe.experts.28.w1", "model.layers.42.block_sparse_moe.experts.29.w1", "model.layers.42.block_sparse_moe.experts.30.w1", "model.layers.42.block_sparse_moe.experts.31.w1", "model.layers.42.block_sparse_moe.experts.32.w1", "model.layers.42.block_sparse_moe.experts.33.w1", "model.layers.42.block_sparse_moe.experts.34.w1", "model.layers.42.block_sparse_moe.experts.35.w1", "model.layers.42.block_sparse_moe.experts.36.w1", "model.layers.42.block_sparse_moe.experts.37.w1", "model.layers.42.block_sparse_moe.experts.38.w1", "model.layers.42.block_sparse_moe.experts.39.w1", "model.layers.42.block_sparse_moe.experts.40.w1", "model.layers.42.block_sparse_moe.experts.41.w1", "model.layers.42.block_sparse_moe.experts.42.w1", "model.layers.42.block_sparse_moe.experts.43.w1", "model.layers.42.block_sparse_moe.experts.44.w1", "model.layers.42.block_sparse_moe.experts.45.w1", "model.layers.42.block_sparse_moe.experts.46.w1", "model.layers.42.block_sparse_moe.experts.47.w1", "model.layers.42.block_sparse_moe.experts.48.w1", "model.layers.42.block_sparse_moe.experts.49.w1", "model.layers.42.block_sparse_moe.experts.50.w1", "model.layers.42.block_sparse_moe.experts.51.w1", "model.layers.42.block_sparse_moe.experts.52.w1", "model.layers.42.block_sparse_moe.experts.53.w1", "model.layers.42.block_sparse_moe.experts.54.w1", "model.layers.42.block_sparse_moe.experts.55.w1", "model.layers.42.block_sparse_moe.experts.56.w1", "model.layers.42.block_sparse_moe.experts.57.w1", "model.layers.42.block_sparse_moe.experts.58.w1", "model.layers.42.block_sparse_moe.experts.59.w1", "model.layers.42.block_sparse_moe.experts.60.w1", "model.layers.42.block_sparse_moe.experts.61.w1", "model.layers.42.block_sparse_moe.experts.62.w1", "model.layers.42.block_sparse_moe.experts.63.w1", "model.layers.42.block_sparse_moe.experts.64.w1", "model.layers.42.block_sparse_moe.experts.65.w1", "model.layers.42.block_sparse_moe.experts.66.w1", "model.layers.42.block_sparse_moe.experts.67.w1", "model.layers.42.block_sparse_moe.experts.68.w1", "model.layers.42.block_sparse_moe.experts.69.w1", "model.layers.42.block_sparse_moe.experts.70.w1", "model.layers.42.block_sparse_moe.experts.71.w1", "model.layers.42.block_sparse_moe.experts.72.w1", "model.layers.42.block_sparse_moe.experts.73.w1", "model.layers.42.block_sparse_moe.experts.74.w1", "model.layers.42.block_sparse_moe.experts.75.w1", "model.layers.42.block_sparse_moe.experts.76.w1", "model.layers.42.block_sparse_moe.experts.77.w1", "model.layers.42.block_sparse_moe.experts.78.w1", "model.layers.42.block_sparse_moe.experts.79.w1", "model.layers.42.block_sparse_moe.experts.80.w1", "model.layers.42.block_sparse_moe.experts.81.w1", "model.layers.42.block_sparse_moe.experts.82.w1", "model.layers.42.block_sparse_moe.experts.83.w1", "model.layers.42.block_sparse_moe.experts.84.w1", "model.layers.42.block_sparse_moe.experts.85.w1", "model.layers.42.block_sparse_moe.experts.86.w1", "model.layers.42.block_sparse_moe.experts.87.w1", "model.layers.42.block_sparse_moe.experts.88.w1", "model.layers.42.block_sparse_moe.experts.89.w1", "model.layers.42.block_sparse_moe.experts.90.w1", "model.layers.42.block_sparse_moe.experts.91.w1", "model.layers.42.block_sparse_moe.experts.92.w1", "model.layers.42.block_sparse_moe.experts.93.w1", "model.layers.42.block_sparse_moe.experts.94.w1", "model.layers.42.block_sparse_moe.experts.95.w1", "model.layers.42.block_sparse_moe.experts.96.w1", "model.layers.42.block_sparse_moe.experts.97.w1", "model.layers.42.block_sparse_moe.experts.98.w1", "model.layers.42.block_sparse_moe.experts.99.w1", "model.layers.42.block_sparse_moe.experts.100.w1", "model.layers.42.block_sparse_moe.experts.101.w1", "model.layers.42.block_sparse_moe.experts.102.w1", "model.layers.42.block_sparse_moe.experts.103.w1", "model.layers.42.block_sparse_moe.experts.104.w1", "model.layers.42.block_sparse_moe.experts.105.w1", "model.layers.42.block_sparse_moe.experts.106.w1", "model.layers.42.block_sparse_moe.experts.107.w1", "model.layers.42.block_sparse_moe.experts.108.w1", "model.layers.42.block_sparse_moe.experts.109.w1", "model.layers.42.block_sparse_moe.experts.110.w1", "model.layers.42.block_sparse_moe.experts.111.w1", "model.layers.42.block_sparse_moe.experts.112.w1", "model.layers.42.block_sparse_moe.experts.113.w1", "model.layers.42.block_sparse_moe.experts.114.w1", "model.layers.42.block_sparse_moe.experts.115.w1", "model.layers.42.block_sparse_moe.experts.116.w1", "model.layers.42.block_sparse_moe.experts.117.w1", "model.layers.42.block_sparse_moe.experts.118.w1", "model.layers.42.block_sparse_moe.experts.119.w1", "model.layers.42.block_sparse_moe.experts.120.w1", "model.layers.42.block_sparse_moe.experts.121.w1", "model.layers.42.block_sparse_moe.experts.122.w1", "model.layers.42.block_sparse_moe.experts.123.w1", "model.layers.42.block_sparse_moe.experts.124.w1", "model.layers.42.block_sparse_moe.experts.125.w1", "model.layers.42.block_sparse_moe.experts.126.w1", "model.layers.42.block_sparse_moe.experts.127.w1", "model.layers.42.block_sparse_moe.experts.128.w1", "model.layers.42.block_sparse_moe.experts.129.w1", "model.layers.42.block_sparse_moe.experts.130.w1", "model.layers.42.block_sparse_moe.experts.131.w1", "model.layers.42.block_sparse_moe.experts.132.w1", "model.layers.42.block_sparse_moe.experts.133.w1", "model.layers.42.block_sparse_moe.experts.134.w1", "model.layers.42.block_sparse_moe.experts.135.w1", "model.layers.42.block_sparse_moe.experts.136.w1", "model.layers.42.block_sparse_moe.experts.137.w1", "model.layers.42.block_sparse_moe.experts.138.w1", "model.layers.42.block_sparse_moe.experts.139.w1", "model.layers.42.block_sparse_moe.experts.140.w1", "model.layers.42.block_sparse_moe.experts.141.w1", "model.layers.42.block_sparse_moe.experts.142.w1", "model.layers.42.block_sparse_moe.experts.143.w1", "model.layers.42.block_sparse_moe.experts.144.w1", "model.layers.42.block_sparse_moe.experts.145.w1", "model.layers.42.block_sparse_moe.experts.146.w1", "model.layers.42.block_sparse_moe.experts.147.w1", "model.layers.42.block_sparse_moe.experts.148.w1", "model.layers.42.block_sparse_moe.experts.149.w1", "model.layers.42.block_sparse_moe.experts.150.w1", "model.layers.42.block_sparse_moe.experts.151.w1", "model.layers.42.block_sparse_moe.experts.152.w1", "model.layers.42.block_sparse_moe.experts.153.w1", "model.layers.42.block_sparse_moe.experts.154.w1", "model.layers.42.block_sparse_moe.experts.155.w1", "model.layers.42.block_sparse_moe.experts.156.w1", "model.layers.42.block_sparse_moe.experts.157.w1", "model.layers.42.block_sparse_moe.experts.158.w1", "model.layers.42.block_sparse_moe.experts.159.w1", "model.layers.42.block_sparse_moe.experts.160.w1", "model.layers.42.block_sparse_moe.experts.161.w1", "model.layers.42.block_sparse_moe.experts.162.w1", "model.layers.42.block_sparse_moe.experts.163.w1", "model.layers.42.block_sparse_moe.experts.164.w1", "model.layers.42.block_sparse_moe.experts.165.w1", "model.layers.42.block_sparse_moe.experts.166.w1", "model.layers.42.block_sparse_moe.experts.167.w1", "model.layers.42.block_sparse_moe.experts.168.w1", "model.layers.42.block_sparse_moe.experts.169.w1", "model.layers.42.block_sparse_moe.experts.170.w1", "model.layers.42.block_sparse_moe.experts.171.w1", "model.layers.42.block_sparse_moe.experts.172.w1", "model.layers.42.block_sparse_moe.experts.173.w1", "model.layers.42.block_sparse_moe.experts.174.w1", "model.layers.42.block_sparse_moe.experts.175.w1", "model.layers.42.block_sparse_moe.experts.176.w1", "model.layers.42.block_sparse_moe.experts.177.w1", "model.layers.42.block_sparse_moe.experts.178.w1", "model.layers.42.block_sparse_moe.experts.179.w1", "model.layers.42.block_sparse_moe.experts.180.w1", "model.layers.42.block_sparse_moe.experts.181.w1", "model.layers.42.block_sparse_moe.experts.182.w1", "model.layers.42.block_sparse_moe.experts.183.w1", "model.layers.42.block_sparse_moe.experts.184.w1", "model.layers.42.block_sparse_moe.experts.185.w1", "model.layers.42.block_sparse_moe.experts.186.w1", "model.layers.42.block_sparse_moe.experts.187.w1", "model.layers.42.block_sparse_moe.experts.188.w1", "model.layers.42.block_sparse_moe.experts.189.w1", "model.layers.42.block_sparse_moe.experts.190.w1", "model.layers.42.block_sparse_moe.experts.191.w1", "model.layers.42.block_sparse_moe.experts.192.w1", "model.layers.42.block_sparse_moe.experts.193.w1", "model.layers.42.block_sparse_moe.experts.194.w1", "model.layers.42.block_sparse_moe.experts.195.w1", "model.layers.42.block_sparse_moe.experts.196.w1", "model.layers.42.block_sparse_moe.experts.197.w1", "model.layers.42.block_sparse_moe.experts.198.w1", "model.layers.42.block_sparse_moe.experts.199.w1", "model.layers.42.block_sparse_moe.experts.200.w1", "model.layers.42.block_sparse_moe.experts.201.w1", "model.layers.42.block_sparse_moe.experts.202.w1", "model.layers.42.block_sparse_moe.experts.203.w1", "model.layers.42.block_sparse_moe.experts.204.w1", "model.layers.42.block_sparse_moe.experts.205.w1", "model.layers.42.block_sparse_moe.experts.206.w1", "model.layers.42.block_sparse_moe.experts.207.w1", "model.layers.42.block_sparse_moe.experts.208.w1", "model.layers.42.block_sparse_moe.experts.209.w1", "model.layers.42.block_sparse_moe.experts.210.w1", "model.layers.42.block_sparse_moe.experts.211.w1", "model.layers.42.block_sparse_moe.experts.212.w1", "model.layers.42.block_sparse_moe.experts.213.w1", "model.layers.42.block_sparse_moe.experts.214.w1", "model.layers.42.block_sparse_moe.experts.215.w1", "model.layers.42.block_sparse_moe.experts.216.w1", "model.layers.42.block_sparse_moe.experts.217.w1", "model.layers.42.block_sparse_moe.experts.218.w1", "model.layers.42.block_sparse_moe.experts.219.w1", "model.layers.42.block_sparse_moe.experts.220.w1", "model.layers.42.block_sparse_moe.experts.221.w1", "model.layers.42.block_sparse_moe.experts.222.w1", "model.layers.42.block_sparse_moe.experts.223.w1", "model.layers.42.block_sparse_moe.experts.224.w1", "model.layers.42.block_sparse_moe.experts.225.w1", "model.layers.42.block_sparse_moe.experts.226.w1", "model.layers.42.block_sparse_moe.experts.227.w1", "model.layers.42.block_sparse_moe.experts.228.w1", "model.layers.42.block_sparse_moe.experts.229.w1", "model.layers.42.block_sparse_moe.experts.230.w1", "model.layers.42.block_sparse_moe.experts.231.w1", "model.layers.42.block_sparse_moe.experts.232.w1", "model.layers.42.block_sparse_moe.experts.233.w1", "model.layers.42.block_sparse_moe.experts.234.w1", "model.layers.42.block_sparse_moe.experts.235.w1", "model.layers.42.block_sparse_moe.experts.236.w1", "model.layers.42.block_sparse_moe.experts.237.w1", "model.layers.42.block_sparse_moe.experts.238.w1", "model.layers.42.block_sparse_moe.experts.239.w1", "model.layers.42.block_sparse_moe.experts.240.w1", "model.layers.42.block_sparse_moe.experts.241.w1", "model.layers.42.block_sparse_moe.experts.242.w1", "model.layers.42.block_sparse_moe.experts.243.w1", "model.layers.42.block_sparse_moe.experts.244.w1", "model.layers.42.block_sparse_moe.experts.245.w1", "model.layers.42.block_sparse_moe.experts.246.w1", "model.layers.42.block_sparse_moe.experts.247.w1", "model.layers.42.block_sparse_moe.experts.248.w1", "model.layers.42.block_sparse_moe.experts.249.w1", "model.layers.42.block_sparse_moe.experts.250.w1", "model.layers.42.block_sparse_moe.experts.251.w1", "model.layers.42.block_sparse_moe.experts.252.w1", "model.layers.42.block_sparse_moe.experts.253.w1", "model.layers.42.block_sparse_moe.experts.254.w1", "model.layers.42.block_sparse_moe.experts.255.w1", "model.layers.42.block_sparse_moe.experts.0.w3", "model.layers.42.block_sparse_moe.experts.1.w3", "model.layers.42.block_sparse_moe.experts.2.w3", "model.layers.42.block_sparse_moe.experts.3.w3", "model.layers.42.block_sparse_moe.experts.4.w3", "model.layers.42.block_sparse_moe.experts.5.w3", "model.layers.42.block_sparse_moe.experts.6.w3", "model.layers.42.block_sparse_moe.experts.7.w3", "model.layers.42.block_sparse_moe.experts.8.w3", "model.layers.42.block_sparse_moe.experts.9.w3", "model.layers.42.block_sparse_moe.experts.10.w3", "model.layers.42.block_sparse_moe.experts.11.w3", "model.layers.42.block_sparse_moe.experts.12.w3", "model.layers.42.block_sparse_moe.experts.13.w3", "model.layers.42.block_sparse_moe.experts.14.w3", "model.layers.42.block_sparse_moe.experts.15.w3", "model.layers.42.block_sparse_moe.experts.16.w3", "model.layers.42.block_sparse_moe.experts.17.w3", "model.layers.42.block_sparse_moe.experts.18.w3", "model.layers.42.block_sparse_moe.experts.19.w3", "model.layers.42.block_sparse_moe.experts.20.w3", "model.layers.42.block_sparse_moe.experts.21.w3", "model.layers.42.block_sparse_moe.experts.22.w3", "model.layers.42.block_sparse_moe.experts.23.w3", "model.layers.42.block_sparse_moe.experts.24.w3", "model.layers.42.block_sparse_moe.experts.25.w3", "model.layers.42.block_sparse_moe.experts.26.w3", "model.layers.42.block_sparse_moe.experts.27.w3", "model.layers.42.block_sparse_moe.experts.28.w3", "model.layers.42.block_sparse_moe.experts.29.w3", "model.layers.42.block_sparse_moe.experts.30.w3", "model.layers.42.block_sparse_moe.experts.31.w3", "model.layers.42.block_sparse_moe.experts.32.w3", "model.layers.42.block_sparse_moe.experts.33.w3", "model.layers.42.block_sparse_moe.experts.34.w3", "model.layers.42.block_sparse_moe.experts.35.w3", "model.layers.42.block_sparse_moe.experts.36.w3", "model.layers.42.block_sparse_moe.experts.37.w3", "model.layers.42.block_sparse_moe.experts.38.w3", "model.layers.42.block_sparse_moe.experts.39.w3", "model.layers.42.block_sparse_moe.experts.40.w3", "model.layers.42.block_sparse_moe.experts.41.w3", "model.layers.42.block_sparse_moe.experts.42.w3", "model.layers.42.block_sparse_moe.experts.43.w3", "model.layers.42.block_sparse_moe.experts.44.w3", "model.layers.42.block_sparse_moe.experts.45.w3", "model.layers.42.block_sparse_moe.experts.46.w3", "model.layers.42.block_sparse_moe.experts.47.w3", "model.layers.42.block_sparse_moe.experts.48.w3", "model.layers.42.block_sparse_moe.experts.49.w3", "model.layers.42.block_sparse_moe.experts.50.w3", "model.layers.42.block_sparse_moe.experts.51.w3", "model.layers.42.block_sparse_moe.experts.52.w3", "model.layers.42.block_sparse_moe.experts.53.w3", "model.layers.42.block_sparse_moe.experts.54.w3", "model.layers.42.block_sparse_moe.experts.55.w3", "model.layers.42.block_sparse_moe.experts.56.w3", "model.layers.42.block_sparse_moe.experts.57.w3", "model.layers.42.block_sparse_moe.experts.58.w3", "model.layers.42.block_sparse_moe.experts.59.w3", "model.layers.42.block_sparse_moe.experts.60.w3", "model.layers.42.block_sparse_moe.experts.61.w3", "model.layers.42.block_sparse_moe.experts.62.w3", "model.layers.42.block_sparse_moe.experts.63.w3", "model.layers.42.block_sparse_moe.experts.64.w3", "model.layers.42.block_sparse_moe.experts.65.w3", "model.layers.42.block_sparse_moe.experts.66.w3", "model.layers.42.block_sparse_moe.experts.67.w3", "model.layers.42.block_sparse_moe.experts.68.w3", "model.layers.42.block_sparse_moe.experts.69.w3", "model.layers.42.block_sparse_moe.experts.70.w3", "model.layers.42.block_sparse_moe.experts.71.w3", "model.layers.42.block_sparse_moe.experts.72.w3", "model.layers.42.block_sparse_moe.experts.73.w3", "model.layers.42.block_sparse_moe.experts.74.w3", "model.layers.42.block_sparse_moe.experts.75.w3", "model.layers.42.block_sparse_moe.experts.76.w3", "model.layers.42.block_sparse_moe.experts.77.w3", "model.layers.42.block_sparse_moe.experts.78.w3", "model.layers.42.block_sparse_moe.experts.79.w3", "model.layers.42.block_sparse_moe.experts.80.w3", "model.layers.42.block_sparse_moe.experts.81.w3", "model.layers.42.block_sparse_moe.experts.82.w3", "model.layers.42.block_sparse_moe.experts.83.w3", "model.layers.42.block_sparse_moe.experts.84.w3", "model.layers.42.block_sparse_moe.experts.85.w3", "model.layers.42.block_sparse_moe.experts.86.w3", "model.layers.42.block_sparse_moe.experts.87.w3", "model.layers.42.block_sparse_moe.experts.88.w3", "model.layers.42.block_sparse_moe.experts.89.w3", "model.layers.42.block_sparse_moe.experts.90.w3", "model.layers.42.block_sparse_moe.experts.91.w3", "model.layers.42.block_sparse_moe.experts.92.w3", "model.layers.42.block_sparse_moe.experts.93.w3", "model.layers.42.block_sparse_moe.experts.94.w3", "model.layers.42.block_sparse_moe.experts.95.w3", "model.layers.42.block_sparse_moe.experts.96.w3", "model.layers.42.block_sparse_moe.experts.97.w3", "model.layers.42.block_sparse_moe.experts.98.w3", "model.layers.42.block_sparse_moe.experts.99.w3", "model.layers.42.block_sparse_moe.experts.100.w3", "model.layers.42.block_sparse_moe.experts.101.w3", "model.layers.42.block_sparse_moe.experts.102.w3", "model.layers.42.block_sparse_moe.experts.103.w3", "model.layers.42.block_sparse_moe.experts.104.w3", "model.layers.42.block_sparse_moe.experts.105.w3", "model.layers.42.block_sparse_moe.experts.106.w3", "model.layers.42.block_sparse_moe.experts.107.w3", "model.layers.42.block_sparse_moe.experts.108.w3", "model.layers.42.block_sparse_moe.experts.109.w3", "model.layers.42.block_sparse_moe.experts.110.w3", "model.layers.42.block_sparse_moe.experts.111.w3", "model.layers.42.block_sparse_moe.experts.112.w3", "model.layers.42.block_sparse_moe.experts.113.w3", "model.layers.42.block_sparse_moe.experts.114.w3", "model.layers.42.block_sparse_moe.experts.115.w3", "model.layers.42.block_sparse_moe.experts.116.w3", "model.layers.42.block_sparse_moe.experts.117.w3", "model.layers.42.block_sparse_moe.experts.118.w3", "model.layers.42.block_sparse_moe.experts.119.w3", "model.layers.42.block_sparse_moe.experts.120.w3", "model.layers.42.block_sparse_moe.experts.121.w3", "model.layers.42.block_sparse_moe.experts.122.w3", "model.layers.42.block_sparse_moe.experts.123.w3", "model.layers.42.block_sparse_moe.experts.124.w3", "model.layers.42.block_sparse_moe.experts.125.w3", "model.layers.42.block_sparse_moe.experts.126.w3", "model.layers.42.block_sparse_moe.experts.127.w3", "model.layers.42.block_sparse_moe.experts.128.w3", "model.layers.42.block_sparse_moe.experts.129.w3", "model.layers.42.block_sparse_moe.experts.130.w3", "model.layers.42.block_sparse_moe.experts.131.w3", "model.layers.42.block_sparse_moe.experts.132.w3", "model.layers.42.block_sparse_moe.experts.133.w3", "model.layers.42.block_sparse_moe.experts.134.w3", "model.layers.42.block_sparse_moe.experts.135.w3", "model.layers.42.block_sparse_moe.experts.136.w3", "model.layers.42.block_sparse_moe.experts.137.w3", "model.layers.42.block_sparse_moe.experts.138.w3", "model.layers.42.block_sparse_moe.experts.139.w3", "model.layers.42.block_sparse_moe.experts.140.w3", "model.layers.42.block_sparse_moe.experts.141.w3", "model.layers.42.block_sparse_moe.experts.142.w3", "model.layers.42.block_sparse_moe.experts.143.w3", "model.layers.42.block_sparse_moe.experts.144.w3", "model.layers.42.block_sparse_moe.experts.145.w3", "model.layers.42.block_sparse_moe.experts.146.w3", "model.layers.42.block_sparse_moe.experts.147.w3", "model.layers.42.block_sparse_moe.experts.148.w3", "model.layers.42.block_sparse_moe.experts.149.w3", "model.layers.42.block_sparse_moe.experts.150.w3", "model.layers.42.block_sparse_moe.experts.151.w3", "model.layers.42.block_sparse_moe.experts.152.w3", "model.layers.42.block_sparse_moe.experts.153.w3", "model.layers.42.block_sparse_moe.experts.154.w3", "model.layers.42.block_sparse_moe.experts.155.w3", "model.layers.42.block_sparse_moe.experts.156.w3", "model.layers.42.block_sparse_moe.experts.157.w3", "model.layers.42.block_sparse_moe.experts.158.w3", "model.layers.42.block_sparse_moe.experts.159.w3", "model.layers.42.block_sparse_moe.experts.160.w3", "model.layers.42.block_sparse_moe.experts.161.w3", "model.layers.42.block_sparse_moe.experts.162.w3", "model.layers.42.block_sparse_moe.experts.163.w3", "model.layers.42.block_sparse_moe.experts.164.w3", "model.layers.42.block_sparse_moe.experts.165.w3", "model.layers.42.block_sparse_moe.experts.166.w3", "model.layers.42.block_sparse_moe.experts.167.w3", "model.layers.42.block_sparse_moe.experts.168.w3", "model.layers.42.block_sparse_moe.experts.169.w3", "model.layers.42.block_sparse_moe.experts.170.w3", "model.layers.42.block_sparse_moe.experts.171.w3", "model.layers.42.block_sparse_moe.experts.172.w3", "model.layers.42.block_sparse_moe.experts.173.w3", "model.layers.42.block_sparse_moe.experts.174.w3", "model.layers.42.block_sparse_moe.experts.175.w3", "model.layers.42.block_sparse_moe.experts.176.w3", "model.layers.42.block_sparse_moe.experts.177.w3", "model.layers.42.block_sparse_moe.experts.178.w3", "model.layers.42.block_sparse_moe.experts.179.w3", "model.layers.42.block_sparse_moe.experts.180.w3", "model.layers.42.block_sparse_moe.experts.181.w3", "model.layers.42.block_sparse_moe.experts.182.w3", "model.layers.42.block_sparse_moe.experts.183.w3", "model.layers.42.block_sparse_moe.experts.184.w3", "model.layers.42.block_sparse_moe.experts.185.w3", "model.layers.42.block_sparse_moe.experts.186.w3", "model.layers.42.block_sparse_moe.experts.187.w3", "model.layers.42.block_sparse_moe.experts.188.w3", "model.layers.42.block_sparse_moe.experts.189.w3", "model.layers.42.block_sparse_moe.experts.190.w3", "model.layers.42.block_sparse_moe.experts.191.w3", "model.layers.42.block_sparse_moe.experts.192.w3", "model.layers.42.block_sparse_moe.experts.193.w3", "model.layers.42.block_sparse_moe.experts.194.w3", "model.layers.42.block_sparse_moe.experts.195.w3", "model.layers.42.block_sparse_moe.experts.196.w3", "model.layers.42.block_sparse_moe.experts.197.w3", "model.layers.42.block_sparse_moe.experts.198.w3", "model.layers.42.block_sparse_moe.experts.199.w3", "model.layers.42.block_sparse_moe.experts.200.w3", "model.layers.42.block_sparse_moe.experts.201.w3", "model.layers.42.block_sparse_moe.experts.202.w3", "model.layers.42.block_sparse_moe.experts.203.w3", "model.layers.42.block_sparse_moe.experts.204.w3", "model.layers.42.block_sparse_moe.experts.205.w3", "model.layers.42.block_sparse_moe.experts.206.w3", "model.layers.42.block_sparse_moe.experts.207.w3", "model.layers.42.block_sparse_moe.experts.208.w3", "model.layers.42.block_sparse_moe.experts.209.w3", "model.layers.42.block_sparse_moe.experts.210.w3", "model.layers.42.block_sparse_moe.experts.211.w3", "model.layers.42.block_sparse_moe.experts.212.w3", "model.layers.42.block_sparse_moe.experts.213.w3", "model.layers.42.block_sparse_moe.experts.214.w3", "model.layers.42.block_sparse_moe.experts.215.w3", "model.layers.42.block_sparse_moe.experts.216.w3", "model.layers.42.block_sparse_moe.experts.217.w3", "model.layers.42.block_sparse_moe.experts.218.w3", "model.layers.42.block_sparse_moe.experts.219.w3", "model.layers.42.block_sparse_moe.experts.220.w3", "model.layers.42.block_sparse_moe.experts.221.w3", "model.layers.42.block_sparse_moe.experts.222.w3", "model.layers.42.block_sparse_moe.experts.223.w3", "model.layers.42.block_sparse_moe.experts.224.w3", "model.layers.42.block_sparse_moe.experts.225.w3", "model.layers.42.block_sparse_moe.experts.226.w3", "model.layers.42.block_sparse_moe.experts.227.w3", "model.layers.42.block_sparse_moe.experts.228.w3", "model.layers.42.block_sparse_moe.experts.229.w3", "model.layers.42.block_sparse_moe.experts.230.w3", "model.layers.42.block_sparse_moe.experts.231.w3", "model.layers.42.block_sparse_moe.experts.232.w3", "model.layers.42.block_sparse_moe.experts.233.w3", "model.layers.42.block_sparse_moe.experts.234.w3", "model.layers.42.block_sparse_moe.experts.235.w3", "model.layers.42.block_sparse_moe.experts.236.w3", "model.layers.42.block_sparse_moe.experts.237.w3", "model.layers.42.block_sparse_moe.experts.238.w3", "model.layers.42.block_sparse_moe.experts.239.w3", "model.layers.42.block_sparse_moe.experts.240.w3", "model.layers.42.block_sparse_moe.experts.241.w3", "model.layers.42.block_sparse_moe.experts.242.w3", "model.layers.42.block_sparse_moe.experts.243.w3", "model.layers.42.block_sparse_moe.experts.244.w3", "model.layers.42.block_sparse_moe.experts.245.w3", "model.layers.42.block_sparse_moe.experts.246.w3", "model.layers.42.block_sparse_moe.experts.247.w3", "model.layers.42.block_sparse_moe.experts.248.w3", "model.layers.42.block_sparse_moe.experts.249.w3", "model.layers.42.block_sparse_moe.experts.250.w3", "model.layers.42.block_sparse_moe.experts.251.w3", "model.layers.42.block_sparse_moe.experts.252.w3", "model.layers.42.block_sparse_moe.experts.253.w3", "model.layers.42.block_sparse_moe.experts.254.w3", "model.layers.42.block_sparse_moe.experts.255.w3", "model.layers.42.block_sparse_moe.experts.0.w2", "model.layers.42.block_sparse_moe.experts.1.w2", "model.layers.42.block_sparse_moe.experts.2.w2", "model.layers.42.block_sparse_moe.experts.3.w2", "model.layers.42.block_sparse_moe.experts.4.w2", "model.layers.42.block_sparse_moe.experts.5.w2", "model.layers.42.block_sparse_moe.experts.6.w2", "model.layers.42.block_sparse_moe.experts.7.w2", "model.layers.42.block_sparse_moe.experts.8.w2", "model.layers.42.block_sparse_moe.experts.9.w2", "model.layers.42.block_sparse_moe.experts.10.w2", "model.layers.42.block_sparse_moe.experts.11.w2", "model.layers.42.block_sparse_moe.experts.12.w2", "model.layers.42.block_sparse_moe.experts.13.w2", "model.layers.42.block_sparse_moe.experts.14.w2", "model.layers.42.block_sparse_moe.experts.15.w2", "model.layers.42.block_sparse_moe.experts.16.w2", "model.layers.42.block_sparse_moe.experts.17.w2", "model.layers.42.block_sparse_moe.experts.18.w2", "model.layers.42.block_sparse_moe.experts.19.w2", "model.layers.42.block_sparse_moe.experts.20.w2", "model.layers.42.block_sparse_moe.experts.21.w2", "model.layers.42.block_sparse_moe.experts.22.w2", "model.layers.42.block_sparse_moe.experts.23.w2", "model.layers.42.block_sparse_moe.experts.24.w2", "model.layers.42.block_sparse_moe.experts.25.w2", "model.layers.42.block_sparse_moe.experts.26.w2", "model.layers.42.block_sparse_moe.experts.27.w2", "model.layers.42.block_sparse_moe.experts.28.w2", "model.layers.42.block_sparse_moe.experts.29.w2", "model.layers.42.block_sparse_moe.experts.30.w2", "model.layers.42.block_sparse_moe.experts.31.w2", "model.layers.42.block_sparse_moe.experts.32.w2", "model.layers.42.block_sparse_moe.experts.33.w2", "model.layers.42.block_sparse_moe.experts.34.w2", "model.layers.42.block_sparse_moe.experts.35.w2", "model.layers.42.block_sparse_moe.experts.36.w2", "model.layers.42.block_sparse_moe.experts.37.w2", "model.layers.42.block_sparse_moe.experts.38.w2", "model.layers.42.block_sparse_moe.experts.39.w2", "model.layers.42.block_sparse_moe.experts.40.w2", "model.layers.42.block_sparse_moe.experts.41.w2", "model.layers.42.block_sparse_moe.experts.42.w2", "model.layers.42.block_sparse_moe.experts.43.w2", "model.layers.42.block_sparse_moe.experts.44.w2", "model.layers.42.block_sparse_moe.experts.45.w2", "model.layers.42.block_sparse_moe.experts.46.w2", "model.layers.42.block_sparse_moe.experts.47.w2", "model.layers.42.block_sparse_moe.experts.48.w2", "model.layers.42.block_sparse_moe.experts.49.w2", "model.layers.42.block_sparse_moe.experts.50.w2", "model.layers.42.block_sparse_moe.experts.51.w2", "model.layers.42.block_sparse_moe.experts.52.w2", "model.layers.42.block_sparse_moe.experts.53.w2", "model.layers.42.block_sparse_moe.experts.54.w2", "model.layers.42.block_sparse_moe.experts.55.w2", "model.layers.42.block_sparse_moe.experts.56.w2", "model.layers.42.block_sparse_moe.experts.57.w2", "model.layers.42.block_sparse_moe.experts.58.w2", "model.layers.42.block_sparse_moe.experts.59.w2", "model.layers.42.block_sparse_moe.experts.60.w2", "model.layers.42.block_sparse_moe.experts.61.w2", "model.layers.42.block_sparse_moe.experts.62.w2", "model.layers.42.block_sparse_moe.experts.63.w2", "model.layers.42.block_sparse_moe.experts.64.w2", "model.layers.42.block_sparse_moe.experts.65.w2", "model.layers.42.block_sparse_moe.experts.66.w2", "model.layers.42.block_sparse_moe.experts.67.w2", "model.layers.42.block_sparse_moe.experts.68.w2", "model.layers.42.block_sparse_moe.experts.69.w2", "model.layers.42.block_sparse_moe.experts.70.w2", "model.layers.42.block_sparse_moe.experts.71.w2", "model.layers.42.block_sparse_moe.experts.72.w2", "model.layers.42.block_sparse_moe.experts.73.w2", "model.layers.42.block_sparse_moe.experts.74.w2", "model.layers.42.block_sparse_moe.experts.75.w2", "model.layers.42.block_sparse_moe.experts.76.w2", "model.layers.42.block_sparse_moe.experts.77.w2", "model.layers.42.block_sparse_moe.experts.78.w2", "model.layers.42.block_sparse_moe.experts.79.w2", "model.layers.42.block_sparse_moe.experts.80.w2", "model.layers.42.block_sparse_moe.experts.81.w2", "model.layers.42.block_sparse_moe.experts.82.w2", "model.layers.42.block_sparse_moe.experts.83.w2", "model.layers.42.block_sparse_moe.experts.84.w2", "model.layers.42.block_sparse_moe.experts.85.w2", "model.layers.42.block_sparse_moe.experts.86.w2", "model.layers.42.block_sparse_moe.experts.87.w2", "model.layers.42.block_sparse_moe.experts.88.w2", "model.layers.42.block_sparse_moe.experts.89.w2", "model.layers.42.block_sparse_moe.experts.90.w2", "model.layers.42.block_sparse_moe.experts.91.w2", "model.layers.42.block_sparse_moe.experts.92.w2", "model.layers.42.block_sparse_moe.experts.93.w2", "model.layers.42.block_sparse_moe.experts.94.w2", "model.layers.42.block_sparse_moe.experts.95.w2", "model.layers.42.block_sparse_moe.experts.96.w2", "model.layers.42.block_sparse_moe.experts.97.w2", "model.layers.42.block_sparse_moe.experts.98.w2", "model.layers.42.block_sparse_moe.experts.99.w2", "model.layers.42.block_sparse_moe.experts.100.w2", "model.layers.42.block_sparse_moe.experts.101.w2", "model.layers.42.block_sparse_moe.experts.102.w2", "model.layers.42.block_sparse_moe.experts.103.w2", "model.layers.42.block_sparse_moe.experts.104.w2", "model.layers.42.block_sparse_moe.experts.105.w2", "model.layers.42.block_sparse_moe.experts.106.w2", "model.layers.42.block_sparse_moe.experts.107.w2", "model.layers.42.block_sparse_moe.experts.108.w2", "model.layers.42.block_sparse_moe.experts.109.w2", "model.layers.42.block_sparse_moe.experts.110.w2", "model.layers.42.block_sparse_moe.experts.111.w2", "model.layers.42.block_sparse_moe.experts.112.w2", "model.layers.42.block_sparse_moe.experts.113.w2", "model.layers.42.block_sparse_moe.experts.114.w2", "model.layers.42.block_sparse_moe.experts.115.w2", "model.layers.42.block_sparse_moe.experts.116.w2", "model.layers.42.block_sparse_moe.experts.117.w2", "model.layers.42.block_sparse_moe.experts.118.w2", "model.layers.42.block_sparse_moe.experts.119.w2", "model.layers.42.block_sparse_moe.experts.120.w2", "model.layers.42.block_sparse_moe.experts.121.w2", "model.layers.42.block_sparse_moe.experts.122.w2", "model.layers.42.block_sparse_moe.experts.123.w2", "model.layers.42.block_sparse_moe.experts.124.w2", "model.layers.42.block_sparse_moe.experts.125.w2", "model.layers.42.block_sparse_moe.experts.126.w2", "model.layers.42.block_sparse_moe.experts.127.w2", "model.layers.42.block_sparse_moe.experts.128.w2", "model.layers.42.block_sparse_moe.experts.129.w2", "model.layers.42.block_sparse_moe.experts.130.w2", "model.layers.42.block_sparse_moe.experts.131.w2", "model.layers.42.block_sparse_moe.experts.132.w2", "model.layers.42.block_sparse_moe.experts.133.w2", "model.layers.42.block_sparse_moe.experts.134.w2", "model.layers.42.block_sparse_moe.experts.135.w2", "model.layers.42.block_sparse_moe.experts.136.w2", "model.layers.42.block_sparse_moe.experts.137.w2", "model.layers.42.block_sparse_moe.experts.138.w2", "model.layers.42.block_sparse_moe.experts.139.w2", "model.layers.42.block_sparse_moe.experts.140.w2", "model.layers.42.block_sparse_moe.experts.141.w2", "model.layers.42.block_sparse_moe.experts.142.w2", "model.layers.42.block_sparse_moe.experts.143.w2", "model.layers.42.block_sparse_moe.experts.144.w2", "model.layers.42.block_sparse_moe.experts.145.w2", "model.layers.42.block_sparse_moe.experts.146.w2", "model.layers.42.block_sparse_moe.experts.147.w2", "model.layers.42.block_sparse_moe.experts.148.w2", "model.layers.42.block_sparse_moe.experts.149.w2", "model.layers.42.block_sparse_moe.experts.150.w2", "model.layers.42.block_sparse_moe.experts.151.w2", "model.layers.42.block_sparse_moe.experts.152.w2", "model.layers.42.block_sparse_moe.experts.153.w2", "model.layers.42.block_sparse_moe.experts.154.w2", "model.layers.42.block_sparse_moe.experts.155.w2", "model.layers.42.block_sparse_moe.experts.156.w2", "model.layers.42.block_sparse_moe.experts.157.w2", "model.layers.42.block_sparse_moe.experts.158.w2", "model.layers.42.block_sparse_moe.experts.159.w2", "model.layers.42.block_sparse_moe.experts.160.w2", "model.layers.42.block_sparse_moe.experts.161.w2", "model.layers.42.block_sparse_moe.experts.162.w2", "model.layers.42.block_sparse_moe.experts.163.w2", "model.layers.42.block_sparse_moe.experts.164.w2", "model.layers.42.block_sparse_moe.experts.165.w2", "model.layers.42.block_sparse_moe.experts.166.w2", "model.layers.42.block_sparse_moe.experts.167.w2", "model.layers.42.block_sparse_moe.experts.168.w2", "model.layers.42.block_sparse_moe.experts.169.w2", "model.layers.42.block_sparse_moe.experts.170.w2", "model.layers.42.block_sparse_moe.experts.171.w2", "model.layers.42.block_sparse_moe.experts.172.w2", "model.layers.42.block_sparse_moe.experts.173.w2", "model.layers.42.block_sparse_moe.experts.174.w2", "model.layers.42.block_sparse_moe.experts.175.w2", "model.layers.42.block_sparse_moe.experts.176.w2", "model.layers.42.block_sparse_moe.experts.177.w2", "model.layers.42.block_sparse_moe.experts.178.w2", "model.layers.42.block_sparse_moe.experts.179.w2", "model.layers.42.block_sparse_moe.experts.180.w2", "model.layers.42.block_sparse_moe.experts.181.w2", "model.layers.42.block_sparse_moe.experts.182.w2", "model.layers.42.block_sparse_moe.experts.183.w2", "model.layers.42.block_sparse_moe.experts.184.w2", "model.layers.42.block_sparse_moe.experts.185.w2", "model.layers.42.block_sparse_moe.experts.186.w2", "model.layers.42.block_sparse_moe.experts.187.w2", "model.layers.42.block_sparse_moe.experts.188.w2", "model.layers.42.block_sparse_moe.experts.189.w2", "model.layers.42.block_sparse_moe.experts.190.w2", "model.layers.42.block_sparse_moe.experts.191.w2", "model.layers.42.block_sparse_moe.experts.192.w2", "model.layers.42.block_sparse_moe.experts.193.w2", "model.layers.42.block_sparse_moe.experts.194.w2", "model.layers.42.block_sparse_moe.experts.195.w2", "model.layers.42.block_sparse_moe.experts.196.w2", "model.layers.42.block_sparse_moe.experts.197.w2", "model.layers.42.block_sparse_moe.experts.198.w2", "model.layers.42.block_sparse_moe.experts.199.w2", "model.layers.42.block_sparse_moe.experts.200.w2", "model.layers.42.block_sparse_moe.experts.201.w2", "model.layers.42.block_sparse_moe.experts.202.w2", "model.layers.42.block_sparse_moe.experts.203.w2", "model.layers.42.block_sparse_moe.experts.204.w2", "model.layers.42.block_sparse_moe.experts.205.w2", "model.layers.42.block_sparse_moe.experts.206.w2", "model.layers.42.block_sparse_moe.experts.207.w2", "model.layers.42.block_sparse_moe.experts.208.w2", "model.layers.42.block_sparse_moe.experts.209.w2", "model.layers.42.block_sparse_moe.experts.210.w2", "model.layers.42.block_sparse_moe.experts.211.w2", "model.layers.42.block_sparse_moe.experts.212.w2", "model.layers.42.block_sparse_moe.experts.213.w2", "model.layers.42.block_sparse_moe.experts.214.w2", "model.layers.42.block_sparse_moe.experts.215.w2", "model.layers.42.block_sparse_moe.experts.216.w2", "model.layers.42.block_sparse_moe.experts.217.w2", "model.layers.42.block_sparse_moe.experts.218.w2", "model.layers.42.block_sparse_moe.experts.219.w2", "model.layers.42.block_sparse_moe.experts.220.w2", "model.layers.42.block_sparse_moe.experts.221.w2", "model.layers.42.block_sparse_moe.experts.222.w2", "model.layers.42.block_sparse_moe.experts.223.w2", "model.layers.42.block_sparse_moe.experts.224.w2", "model.layers.42.block_sparse_moe.experts.225.w2", "model.layers.42.block_sparse_moe.experts.226.w2", "model.layers.42.block_sparse_moe.experts.227.w2", "model.layers.42.block_sparse_moe.experts.228.w2", "model.layers.42.block_sparse_moe.experts.229.w2", "model.layers.42.block_sparse_moe.experts.230.w2", "model.layers.42.block_sparse_moe.experts.231.w2", "model.layers.42.block_sparse_moe.experts.232.w2", "model.layers.42.block_sparse_moe.experts.233.w2", "model.layers.42.block_sparse_moe.experts.234.w2", "model.layers.42.block_sparse_moe.experts.235.w2", "model.layers.42.block_sparse_moe.experts.236.w2", "model.layers.42.block_sparse_moe.experts.237.w2", "model.layers.42.block_sparse_moe.experts.238.w2", "model.layers.42.block_sparse_moe.experts.239.w2", "model.layers.42.block_sparse_moe.experts.240.w2", "model.layers.42.block_sparse_moe.experts.241.w2", "model.layers.42.block_sparse_moe.experts.242.w2", "model.layers.42.block_sparse_moe.experts.243.w2", "model.layers.42.block_sparse_moe.experts.244.w2", "model.layers.42.block_sparse_moe.experts.245.w2", "model.layers.42.block_sparse_moe.experts.246.w2", "model.layers.42.block_sparse_moe.experts.247.w2", "model.layers.42.block_sparse_moe.experts.248.w2", "model.layers.42.block_sparse_moe.experts.249.w2", "model.layers.42.block_sparse_moe.experts.250.w2", "model.layers.42.block_sparse_moe.experts.251.w2", "model.layers.42.block_sparse_moe.experts.252.w2", "model.layers.42.block_sparse_moe.experts.253.w2", "model.layers.42.block_sparse_moe.experts.254.w2", "model.layers.42.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0019942669197917096, "dbits": 3623878656 } ] }, { "idx": 86, "layers": [ "model.layers.43.self_attn.q_proj", "model.layers.43.self_attn.k_proj", "model.layers.43.self_attn.v_proj", "model.layers.43.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0025670597329735534, "dbits": 44040192 } ] }, { "idx": 87, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w1", "model.layers.43.block_sparse_moe.experts.1.w1", "model.layers.43.block_sparse_moe.experts.2.w1", "model.layers.43.block_sparse_moe.experts.3.w1", "model.layers.43.block_sparse_moe.experts.4.w1", "model.layers.43.block_sparse_moe.experts.5.w1", "model.layers.43.block_sparse_moe.experts.6.w1", "model.layers.43.block_sparse_moe.experts.7.w1", "model.layers.43.block_sparse_moe.experts.8.w1", "model.layers.43.block_sparse_moe.experts.9.w1", "model.layers.43.block_sparse_moe.experts.10.w1", "model.layers.43.block_sparse_moe.experts.11.w1", "model.layers.43.block_sparse_moe.experts.12.w1", "model.layers.43.block_sparse_moe.experts.13.w1", "model.layers.43.block_sparse_moe.experts.14.w1", "model.layers.43.block_sparse_moe.experts.15.w1", "model.layers.43.block_sparse_moe.experts.16.w1", "model.layers.43.block_sparse_moe.experts.17.w1", "model.layers.43.block_sparse_moe.experts.18.w1", "model.layers.43.block_sparse_moe.experts.19.w1", "model.layers.43.block_sparse_moe.experts.20.w1", "model.layers.43.block_sparse_moe.experts.21.w1", "model.layers.43.block_sparse_moe.experts.22.w1", "model.layers.43.block_sparse_moe.experts.23.w1", "model.layers.43.block_sparse_moe.experts.24.w1", "model.layers.43.block_sparse_moe.experts.25.w1", "model.layers.43.block_sparse_moe.experts.26.w1", "model.layers.43.block_sparse_moe.experts.27.w1", "model.layers.43.block_sparse_moe.experts.28.w1", "model.layers.43.block_sparse_moe.experts.29.w1", "model.layers.43.block_sparse_moe.experts.30.w1", "model.layers.43.block_sparse_moe.experts.31.w1", "model.layers.43.block_sparse_moe.experts.32.w1", "model.layers.43.block_sparse_moe.experts.33.w1", "model.layers.43.block_sparse_moe.experts.34.w1", "model.layers.43.block_sparse_moe.experts.35.w1", "model.layers.43.block_sparse_moe.experts.36.w1", "model.layers.43.block_sparse_moe.experts.37.w1", "model.layers.43.block_sparse_moe.experts.38.w1", "model.layers.43.block_sparse_moe.experts.39.w1", "model.layers.43.block_sparse_moe.experts.40.w1", "model.layers.43.block_sparse_moe.experts.41.w1", "model.layers.43.block_sparse_moe.experts.42.w1", "model.layers.43.block_sparse_moe.experts.43.w1", "model.layers.43.block_sparse_moe.experts.44.w1", "model.layers.43.block_sparse_moe.experts.45.w1", "model.layers.43.block_sparse_moe.experts.46.w1", "model.layers.43.block_sparse_moe.experts.47.w1", "model.layers.43.block_sparse_moe.experts.48.w1", "model.layers.43.block_sparse_moe.experts.49.w1", "model.layers.43.block_sparse_moe.experts.50.w1", "model.layers.43.block_sparse_moe.experts.51.w1", "model.layers.43.block_sparse_moe.experts.52.w1", "model.layers.43.block_sparse_moe.experts.53.w1", "model.layers.43.block_sparse_moe.experts.54.w1", "model.layers.43.block_sparse_moe.experts.55.w1", "model.layers.43.block_sparse_moe.experts.56.w1", "model.layers.43.block_sparse_moe.experts.57.w1", "model.layers.43.block_sparse_moe.experts.58.w1", "model.layers.43.block_sparse_moe.experts.59.w1", "model.layers.43.block_sparse_moe.experts.60.w1", "model.layers.43.block_sparse_moe.experts.61.w1", "model.layers.43.block_sparse_moe.experts.62.w1", "model.layers.43.block_sparse_moe.experts.63.w1", "model.layers.43.block_sparse_moe.experts.64.w1", "model.layers.43.block_sparse_moe.experts.65.w1", "model.layers.43.block_sparse_moe.experts.66.w1", "model.layers.43.block_sparse_moe.experts.67.w1", "model.layers.43.block_sparse_moe.experts.68.w1", "model.layers.43.block_sparse_moe.experts.69.w1", "model.layers.43.block_sparse_moe.experts.70.w1", "model.layers.43.block_sparse_moe.experts.71.w1", "model.layers.43.block_sparse_moe.experts.72.w1", "model.layers.43.block_sparse_moe.experts.73.w1", "model.layers.43.block_sparse_moe.experts.74.w1", "model.layers.43.block_sparse_moe.experts.75.w1", "model.layers.43.block_sparse_moe.experts.76.w1", "model.layers.43.block_sparse_moe.experts.77.w1", "model.layers.43.block_sparse_moe.experts.78.w1", "model.layers.43.block_sparse_moe.experts.79.w1", "model.layers.43.block_sparse_moe.experts.80.w1", "model.layers.43.block_sparse_moe.experts.81.w1", "model.layers.43.block_sparse_moe.experts.82.w1", "model.layers.43.block_sparse_moe.experts.83.w1", "model.layers.43.block_sparse_moe.experts.84.w1", "model.layers.43.block_sparse_moe.experts.85.w1", "model.layers.43.block_sparse_moe.experts.86.w1", "model.layers.43.block_sparse_moe.experts.87.w1", "model.layers.43.block_sparse_moe.experts.88.w1", "model.layers.43.block_sparse_moe.experts.89.w1", "model.layers.43.block_sparse_moe.experts.90.w1", "model.layers.43.block_sparse_moe.experts.91.w1", "model.layers.43.block_sparse_moe.experts.92.w1", "model.layers.43.block_sparse_moe.experts.93.w1", "model.layers.43.block_sparse_moe.experts.94.w1", "model.layers.43.block_sparse_moe.experts.95.w1", "model.layers.43.block_sparse_moe.experts.96.w1", "model.layers.43.block_sparse_moe.experts.97.w1", "model.layers.43.block_sparse_moe.experts.98.w1", "model.layers.43.block_sparse_moe.experts.99.w1", "model.layers.43.block_sparse_moe.experts.100.w1", "model.layers.43.block_sparse_moe.experts.101.w1", "model.layers.43.block_sparse_moe.experts.102.w1", "model.layers.43.block_sparse_moe.experts.103.w1", "model.layers.43.block_sparse_moe.experts.104.w1", "model.layers.43.block_sparse_moe.experts.105.w1", "model.layers.43.block_sparse_moe.experts.106.w1", "model.layers.43.block_sparse_moe.experts.107.w1", "model.layers.43.block_sparse_moe.experts.108.w1", "model.layers.43.block_sparse_moe.experts.109.w1", "model.layers.43.block_sparse_moe.experts.110.w1", "model.layers.43.block_sparse_moe.experts.111.w1", "model.layers.43.block_sparse_moe.experts.112.w1", "model.layers.43.block_sparse_moe.experts.113.w1", "model.layers.43.block_sparse_moe.experts.114.w1", "model.layers.43.block_sparse_moe.experts.115.w1", "model.layers.43.block_sparse_moe.experts.116.w1", "model.layers.43.block_sparse_moe.experts.117.w1", "model.layers.43.block_sparse_moe.experts.118.w1", "model.layers.43.block_sparse_moe.experts.119.w1", "model.layers.43.block_sparse_moe.experts.120.w1", "model.layers.43.block_sparse_moe.experts.121.w1", "model.layers.43.block_sparse_moe.experts.122.w1", "model.layers.43.block_sparse_moe.experts.123.w1", "model.layers.43.block_sparse_moe.experts.124.w1", "model.layers.43.block_sparse_moe.experts.125.w1", "model.layers.43.block_sparse_moe.experts.126.w1", "model.layers.43.block_sparse_moe.experts.127.w1", "model.layers.43.block_sparse_moe.experts.128.w1", "model.layers.43.block_sparse_moe.experts.129.w1", "model.layers.43.block_sparse_moe.experts.130.w1", "model.layers.43.block_sparse_moe.experts.131.w1", "model.layers.43.block_sparse_moe.experts.132.w1", "model.layers.43.block_sparse_moe.experts.133.w1", "model.layers.43.block_sparse_moe.experts.134.w1", "model.layers.43.block_sparse_moe.experts.135.w1", "model.layers.43.block_sparse_moe.experts.136.w1", "model.layers.43.block_sparse_moe.experts.137.w1", "model.layers.43.block_sparse_moe.experts.138.w1", "model.layers.43.block_sparse_moe.experts.139.w1", "model.layers.43.block_sparse_moe.experts.140.w1", "model.layers.43.block_sparse_moe.experts.141.w1", "model.layers.43.block_sparse_moe.experts.142.w1", "model.layers.43.block_sparse_moe.experts.143.w1", "model.layers.43.block_sparse_moe.experts.144.w1", "model.layers.43.block_sparse_moe.experts.145.w1", "model.layers.43.block_sparse_moe.experts.146.w1", "model.layers.43.block_sparse_moe.experts.147.w1", "model.layers.43.block_sparse_moe.experts.148.w1", "model.layers.43.block_sparse_moe.experts.149.w1", "model.layers.43.block_sparse_moe.experts.150.w1", "model.layers.43.block_sparse_moe.experts.151.w1", "model.layers.43.block_sparse_moe.experts.152.w1", "model.layers.43.block_sparse_moe.experts.153.w1", "model.layers.43.block_sparse_moe.experts.154.w1", "model.layers.43.block_sparse_moe.experts.155.w1", "model.layers.43.block_sparse_moe.experts.156.w1", "model.layers.43.block_sparse_moe.experts.157.w1", "model.layers.43.block_sparse_moe.experts.158.w1", "model.layers.43.block_sparse_moe.experts.159.w1", "model.layers.43.block_sparse_moe.experts.160.w1", "model.layers.43.block_sparse_moe.experts.161.w1", "model.layers.43.block_sparse_moe.experts.162.w1", "model.layers.43.block_sparse_moe.experts.163.w1", "model.layers.43.block_sparse_moe.experts.164.w1", "model.layers.43.block_sparse_moe.experts.165.w1", "model.layers.43.block_sparse_moe.experts.166.w1", "model.layers.43.block_sparse_moe.experts.167.w1", "model.layers.43.block_sparse_moe.experts.168.w1", "model.layers.43.block_sparse_moe.experts.169.w1", "model.layers.43.block_sparse_moe.experts.170.w1", "model.layers.43.block_sparse_moe.experts.171.w1", "model.layers.43.block_sparse_moe.experts.172.w1", "model.layers.43.block_sparse_moe.experts.173.w1", "model.layers.43.block_sparse_moe.experts.174.w1", "model.layers.43.block_sparse_moe.experts.175.w1", "model.layers.43.block_sparse_moe.experts.176.w1", "model.layers.43.block_sparse_moe.experts.177.w1", "model.layers.43.block_sparse_moe.experts.178.w1", "model.layers.43.block_sparse_moe.experts.179.w1", "model.layers.43.block_sparse_moe.experts.180.w1", "model.layers.43.block_sparse_moe.experts.181.w1", "model.layers.43.block_sparse_moe.experts.182.w1", "model.layers.43.block_sparse_moe.experts.183.w1", "model.layers.43.block_sparse_moe.experts.184.w1", "model.layers.43.block_sparse_moe.experts.185.w1", "model.layers.43.block_sparse_moe.experts.186.w1", "model.layers.43.block_sparse_moe.experts.187.w1", "model.layers.43.block_sparse_moe.experts.188.w1", "model.layers.43.block_sparse_moe.experts.189.w1", "model.layers.43.block_sparse_moe.experts.190.w1", "model.layers.43.block_sparse_moe.experts.191.w1", "model.layers.43.block_sparse_moe.experts.192.w1", "model.layers.43.block_sparse_moe.experts.193.w1", "model.layers.43.block_sparse_moe.experts.194.w1", "model.layers.43.block_sparse_moe.experts.195.w1", "model.layers.43.block_sparse_moe.experts.196.w1", "model.layers.43.block_sparse_moe.experts.197.w1", "model.layers.43.block_sparse_moe.experts.198.w1", "model.layers.43.block_sparse_moe.experts.199.w1", "model.layers.43.block_sparse_moe.experts.200.w1", "model.layers.43.block_sparse_moe.experts.201.w1", "model.layers.43.block_sparse_moe.experts.202.w1", "model.layers.43.block_sparse_moe.experts.203.w1", "model.layers.43.block_sparse_moe.experts.204.w1", "model.layers.43.block_sparse_moe.experts.205.w1", "model.layers.43.block_sparse_moe.experts.206.w1", "model.layers.43.block_sparse_moe.experts.207.w1", "model.layers.43.block_sparse_moe.experts.208.w1", "model.layers.43.block_sparse_moe.experts.209.w1", "model.layers.43.block_sparse_moe.experts.210.w1", "model.layers.43.block_sparse_moe.experts.211.w1", "model.layers.43.block_sparse_moe.experts.212.w1", "model.layers.43.block_sparse_moe.experts.213.w1", "model.layers.43.block_sparse_moe.experts.214.w1", "model.layers.43.block_sparse_moe.experts.215.w1", "model.layers.43.block_sparse_moe.experts.216.w1", "model.layers.43.block_sparse_moe.experts.217.w1", "model.layers.43.block_sparse_moe.experts.218.w1", "model.layers.43.block_sparse_moe.experts.219.w1", "model.layers.43.block_sparse_moe.experts.220.w1", "model.layers.43.block_sparse_moe.experts.221.w1", "model.layers.43.block_sparse_moe.experts.222.w1", "model.layers.43.block_sparse_moe.experts.223.w1", "model.layers.43.block_sparse_moe.experts.224.w1", "model.layers.43.block_sparse_moe.experts.225.w1", "model.layers.43.block_sparse_moe.experts.226.w1", "model.layers.43.block_sparse_moe.experts.227.w1", "model.layers.43.block_sparse_moe.experts.228.w1", "model.layers.43.block_sparse_moe.experts.229.w1", "model.layers.43.block_sparse_moe.experts.230.w1", "model.layers.43.block_sparse_moe.experts.231.w1", "model.layers.43.block_sparse_moe.experts.232.w1", "model.layers.43.block_sparse_moe.experts.233.w1", "model.layers.43.block_sparse_moe.experts.234.w1", "model.layers.43.block_sparse_moe.experts.235.w1", "model.layers.43.block_sparse_moe.experts.236.w1", "model.layers.43.block_sparse_moe.experts.237.w1", "model.layers.43.block_sparse_moe.experts.238.w1", "model.layers.43.block_sparse_moe.experts.239.w1", "model.layers.43.block_sparse_moe.experts.240.w1", "model.layers.43.block_sparse_moe.experts.241.w1", "model.layers.43.block_sparse_moe.experts.242.w1", "model.layers.43.block_sparse_moe.experts.243.w1", "model.layers.43.block_sparse_moe.experts.244.w1", "model.layers.43.block_sparse_moe.experts.245.w1", "model.layers.43.block_sparse_moe.experts.246.w1", "model.layers.43.block_sparse_moe.experts.247.w1", "model.layers.43.block_sparse_moe.experts.248.w1", "model.layers.43.block_sparse_moe.experts.249.w1", "model.layers.43.block_sparse_moe.experts.250.w1", "model.layers.43.block_sparse_moe.experts.251.w1", "model.layers.43.block_sparse_moe.experts.252.w1", "model.layers.43.block_sparse_moe.experts.253.w1", "model.layers.43.block_sparse_moe.experts.254.w1", "model.layers.43.block_sparse_moe.experts.255.w1", "model.layers.43.block_sparse_moe.experts.0.w3", "model.layers.43.block_sparse_moe.experts.1.w3", "model.layers.43.block_sparse_moe.experts.2.w3", "model.layers.43.block_sparse_moe.experts.3.w3", "model.layers.43.block_sparse_moe.experts.4.w3", "model.layers.43.block_sparse_moe.experts.5.w3", "model.layers.43.block_sparse_moe.experts.6.w3", "model.layers.43.block_sparse_moe.experts.7.w3", "model.layers.43.block_sparse_moe.experts.8.w3", "model.layers.43.block_sparse_moe.experts.9.w3", "model.layers.43.block_sparse_moe.experts.10.w3", "model.layers.43.block_sparse_moe.experts.11.w3", "model.layers.43.block_sparse_moe.experts.12.w3", "model.layers.43.block_sparse_moe.experts.13.w3", "model.layers.43.block_sparse_moe.experts.14.w3", "model.layers.43.block_sparse_moe.experts.15.w3", "model.layers.43.block_sparse_moe.experts.16.w3", "model.layers.43.block_sparse_moe.experts.17.w3", "model.layers.43.block_sparse_moe.experts.18.w3", "model.layers.43.block_sparse_moe.experts.19.w3", "model.layers.43.block_sparse_moe.experts.20.w3", "model.layers.43.block_sparse_moe.experts.21.w3", "model.layers.43.block_sparse_moe.experts.22.w3", "model.layers.43.block_sparse_moe.experts.23.w3", "model.layers.43.block_sparse_moe.experts.24.w3", "model.layers.43.block_sparse_moe.experts.25.w3", "model.layers.43.block_sparse_moe.experts.26.w3", "model.layers.43.block_sparse_moe.experts.27.w3", "model.layers.43.block_sparse_moe.experts.28.w3", "model.layers.43.block_sparse_moe.experts.29.w3", "model.layers.43.block_sparse_moe.experts.30.w3", "model.layers.43.block_sparse_moe.experts.31.w3", "model.layers.43.block_sparse_moe.experts.32.w3", "model.layers.43.block_sparse_moe.experts.33.w3", "model.layers.43.block_sparse_moe.experts.34.w3", "model.layers.43.block_sparse_moe.experts.35.w3", "model.layers.43.block_sparse_moe.experts.36.w3", "model.layers.43.block_sparse_moe.experts.37.w3", "model.layers.43.block_sparse_moe.experts.38.w3", "model.layers.43.block_sparse_moe.experts.39.w3", "model.layers.43.block_sparse_moe.experts.40.w3", "model.layers.43.block_sparse_moe.experts.41.w3", "model.layers.43.block_sparse_moe.experts.42.w3", "model.layers.43.block_sparse_moe.experts.43.w3", "model.layers.43.block_sparse_moe.experts.44.w3", "model.layers.43.block_sparse_moe.experts.45.w3", "model.layers.43.block_sparse_moe.experts.46.w3", "model.layers.43.block_sparse_moe.experts.47.w3", "model.layers.43.block_sparse_moe.experts.48.w3", "model.layers.43.block_sparse_moe.experts.49.w3", "model.layers.43.block_sparse_moe.experts.50.w3", "model.layers.43.block_sparse_moe.experts.51.w3", "model.layers.43.block_sparse_moe.experts.52.w3", "model.layers.43.block_sparse_moe.experts.53.w3", "model.layers.43.block_sparse_moe.experts.54.w3", "model.layers.43.block_sparse_moe.experts.55.w3", "model.layers.43.block_sparse_moe.experts.56.w3", "model.layers.43.block_sparse_moe.experts.57.w3", "model.layers.43.block_sparse_moe.experts.58.w3", "model.layers.43.block_sparse_moe.experts.59.w3", "model.layers.43.block_sparse_moe.experts.60.w3", "model.layers.43.block_sparse_moe.experts.61.w3", "model.layers.43.block_sparse_moe.experts.62.w3", "model.layers.43.block_sparse_moe.experts.63.w3", "model.layers.43.block_sparse_moe.experts.64.w3", "model.layers.43.block_sparse_moe.experts.65.w3", "model.layers.43.block_sparse_moe.experts.66.w3", "model.layers.43.block_sparse_moe.experts.67.w3", "model.layers.43.block_sparse_moe.experts.68.w3", "model.layers.43.block_sparse_moe.experts.69.w3", "model.layers.43.block_sparse_moe.experts.70.w3", "model.layers.43.block_sparse_moe.experts.71.w3", "model.layers.43.block_sparse_moe.experts.72.w3", "model.layers.43.block_sparse_moe.experts.73.w3", "model.layers.43.block_sparse_moe.experts.74.w3", "model.layers.43.block_sparse_moe.experts.75.w3", "model.layers.43.block_sparse_moe.experts.76.w3", "model.layers.43.block_sparse_moe.experts.77.w3", "model.layers.43.block_sparse_moe.experts.78.w3", "model.layers.43.block_sparse_moe.experts.79.w3", "model.layers.43.block_sparse_moe.experts.80.w3", "model.layers.43.block_sparse_moe.experts.81.w3", "model.layers.43.block_sparse_moe.experts.82.w3", "model.layers.43.block_sparse_moe.experts.83.w3", "model.layers.43.block_sparse_moe.experts.84.w3", "model.layers.43.block_sparse_moe.experts.85.w3", "model.layers.43.block_sparse_moe.experts.86.w3", "model.layers.43.block_sparse_moe.experts.87.w3", "model.layers.43.block_sparse_moe.experts.88.w3", "model.layers.43.block_sparse_moe.experts.89.w3", "model.layers.43.block_sparse_moe.experts.90.w3", "model.layers.43.block_sparse_moe.experts.91.w3", "model.layers.43.block_sparse_moe.experts.92.w3", "model.layers.43.block_sparse_moe.experts.93.w3", "model.layers.43.block_sparse_moe.experts.94.w3", "model.layers.43.block_sparse_moe.experts.95.w3", "model.layers.43.block_sparse_moe.experts.96.w3", "model.layers.43.block_sparse_moe.experts.97.w3", "model.layers.43.block_sparse_moe.experts.98.w3", "model.layers.43.block_sparse_moe.experts.99.w3", "model.layers.43.block_sparse_moe.experts.100.w3", "model.layers.43.block_sparse_moe.experts.101.w3", "model.layers.43.block_sparse_moe.experts.102.w3", "model.layers.43.block_sparse_moe.experts.103.w3", "model.layers.43.block_sparse_moe.experts.104.w3", "model.layers.43.block_sparse_moe.experts.105.w3", "model.layers.43.block_sparse_moe.experts.106.w3", "model.layers.43.block_sparse_moe.experts.107.w3", "model.layers.43.block_sparse_moe.experts.108.w3", "model.layers.43.block_sparse_moe.experts.109.w3", "model.layers.43.block_sparse_moe.experts.110.w3", "model.layers.43.block_sparse_moe.experts.111.w3", "model.layers.43.block_sparse_moe.experts.112.w3", "model.layers.43.block_sparse_moe.experts.113.w3", "model.layers.43.block_sparse_moe.experts.114.w3", "model.layers.43.block_sparse_moe.experts.115.w3", "model.layers.43.block_sparse_moe.experts.116.w3", "model.layers.43.block_sparse_moe.experts.117.w3", "model.layers.43.block_sparse_moe.experts.118.w3", "model.layers.43.block_sparse_moe.experts.119.w3", "model.layers.43.block_sparse_moe.experts.120.w3", "model.layers.43.block_sparse_moe.experts.121.w3", "model.layers.43.block_sparse_moe.experts.122.w3", "model.layers.43.block_sparse_moe.experts.123.w3", "model.layers.43.block_sparse_moe.experts.124.w3", "model.layers.43.block_sparse_moe.experts.125.w3", "model.layers.43.block_sparse_moe.experts.126.w3", "model.layers.43.block_sparse_moe.experts.127.w3", "model.layers.43.block_sparse_moe.experts.128.w3", "model.layers.43.block_sparse_moe.experts.129.w3", "model.layers.43.block_sparse_moe.experts.130.w3", "model.layers.43.block_sparse_moe.experts.131.w3", "model.layers.43.block_sparse_moe.experts.132.w3", "model.layers.43.block_sparse_moe.experts.133.w3", "model.layers.43.block_sparse_moe.experts.134.w3", "model.layers.43.block_sparse_moe.experts.135.w3", "model.layers.43.block_sparse_moe.experts.136.w3", "model.layers.43.block_sparse_moe.experts.137.w3", "model.layers.43.block_sparse_moe.experts.138.w3", "model.layers.43.block_sparse_moe.experts.139.w3", "model.layers.43.block_sparse_moe.experts.140.w3", "model.layers.43.block_sparse_moe.experts.141.w3", "model.layers.43.block_sparse_moe.experts.142.w3", "model.layers.43.block_sparse_moe.experts.143.w3", "model.layers.43.block_sparse_moe.experts.144.w3", "model.layers.43.block_sparse_moe.experts.145.w3", "model.layers.43.block_sparse_moe.experts.146.w3", "model.layers.43.block_sparse_moe.experts.147.w3", "model.layers.43.block_sparse_moe.experts.148.w3", "model.layers.43.block_sparse_moe.experts.149.w3", "model.layers.43.block_sparse_moe.experts.150.w3", "model.layers.43.block_sparse_moe.experts.151.w3", "model.layers.43.block_sparse_moe.experts.152.w3", "model.layers.43.block_sparse_moe.experts.153.w3", "model.layers.43.block_sparse_moe.experts.154.w3", "model.layers.43.block_sparse_moe.experts.155.w3", "model.layers.43.block_sparse_moe.experts.156.w3", "model.layers.43.block_sparse_moe.experts.157.w3", "model.layers.43.block_sparse_moe.experts.158.w3", "model.layers.43.block_sparse_moe.experts.159.w3", "model.layers.43.block_sparse_moe.experts.160.w3", "model.layers.43.block_sparse_moe.experts.161.w3", "model.layers.43.block_sparse_moe.experts.162.w3", "model.layers.43.block_sparse_moe.experts.163.w3", "model.layers.43.block_sparse_moe.experts.164.w3", "model.layers.43.block_sparse_moe.experts.165.w3", "model.layers.43.block_sparse_moe.experts.166.w3", "model.layers.43.block_sparse_moe.experts.167.w3", "model.layers.43.block_sparse_moe.experts.168.w3", "model.layers.43.block_sparse_moe.experts.169.w3", "model.layers.43.block_sparse_moe.experts.170.w3", "model.layers.43.block_sparse_moe.experts.171.w3", "model.layers.43.block_sparse_moe.experts.172.w3", "model.layers.43.block_sparse_moe.experts.173.w3", "model.layers.43.block_sparse_moe.experts.174.w3", "model.layers.43.block_sparse_moe.experts.175.w3", "model.layers.43.block_sparse_moe.experts.176.w3", "model.layers.43.block_sparse_moe.experts.177.w3", "model.layers.43.block_sparse_moe.experts.178.w3", "model.layers.43.block_sparse_moe.experts.179.w3", "model.layers.43.block_sparse_moe.experts.180.w3", "model.layers.43.block_sparse_moe.experts.181.w3", "model.layers.43.block_sparse_moe.experts.182.w3", "model.layers.43.block_sparse_moe.experts.183.w3", "model.layers.43.block_sparse_moe.experts.184.w3", "model.layers.43.block_sparse_moe.experts.185.w3", "model.layers.43.block_sparse_moe.experts.186.w3", "model.layers.43.block_sparse_moe.experts.187.w3", "model.layers.43.block_sparse_moe.experts.188.w3", "model.layers.43.block_sparse_moe.experts.189.w3", "model.layers.43.block_sparse_moe.experts.190.w3", "model.layers.43.block_sparse_moe.experts.191.w3", "model.layers.43.block_sparse_moe.experts.192.w3", "model.layers.43.block_sparse_moe.experts.193.w3", "model.layers.43.block_sparse_moe.experts.194.w3", "model.layers.43.block_sparse_moe.experts.195.w3", "model.layers.43.block_sparse_moe.experts.196.w3", "model.layers.43.block_sparse_moe.experts.197.w3", "model.layers.43.block_sparse_moe.experts.198.w3", "model.layers.43.block_sparse_moe.experts.199.w3", "model.layers.43.block_sparse_moe.experts.200.w3", "model.layers.43.block_sparse_moe.experts.201.w3", "model.layers.43.block_sparse_moe.experts.202.w3", "model.layers.43.block_sparse_moe.experts.203.w3", "model.layers.43.block_sparse_moe.experts.204.w3", "model.layers.43.block_sparse_moe.experts.205.w3", "model.layers.43.block_sparse_moe.experts.206.w3", "model.layers.43.block_sparse_moe.experts.207.w3", "model.layers.43.block_sparse_moe.experts.208.w3", "model.layers.43.block_sparse_moe.experts.209.w3", "model.layers.43.block_sparse_moe.experts.210.w3", "model.layers.43.block_sparse_moe.experts.211.w3", "model.layers.43.block_sparse_moe.experts.212.w3", "model.layers.43.block_sparse_moe.experts.213.w3", "model.layers.43.block_sparse_moe.experts.214.w3", "model.layers.43.block_sparse_moe.experts.215.w3", "model.layers.43.block_sparse_moe.experts.216.w3", "model.layers.43.block_sparse_moe.experts.217.w3", "model.layers.43.block_sparse_moe.experts.218.w3", "model.layers.43.block_sparse_moe.experts.219.w3", "model.layers.43.block_sparse_moe.experts.220.w3", "model.layers.43.block_sparse_moe.experts.221.w3", "model.layers.43.block_sparse_moe.experts.222.w3", "model.layers.43.block_sparse_moe.experts.223.w3", "model.layers.43.block_sparse_moe.experts.224.w3", "model.layers.43.block_sparse_moe.experts.225.w3", "model.layers.43.block_sparse_moe.experts.226.w3", "model.layers.43.block_sparse_moe.experts.227.w3", "model.layers.43.block_sparse_moe.experts.228.w3", "model.layers.43.block_sparse_moe.experts.229.w3", "model.layers.43.block_sparse_moe.experts.230.w3", "model.layers.43.block_sparse_moe.experts.231.w3", "model.layers.43.block_sparse_moe.experts.232.w3", "model.layers.43.block_sparse_moe.experts.233.w3", "model.layers.43.block_sparse_moe.experts.234.w3", "model.layers.43.block_sparse_moe.experts.235.w3", "model.layers.43.block_sparse_moe.experts.236.w3", "model.layers.43.block_sparse_moe.experts.237.w3", "model.layers.43.block_sparse_moe.experts.238.w3", "model.layers.43.block_sparse_moe.experts.239.w3", "model.layers.43.block_sparse_moe.experts.240.w3", "model.layers.43.block_sparse_moe.experts.241.w3", "model.layers.43.block_sparse_moe.experts.242.w3", "model.layers.43.block_sparse_moe.experts.243.w3", "model.layers.43.block_sparse_moe.experts.244.w3", "model.layers.43.block_sparse_moe.experts.245.w3", "model.layers.43.block_sparse_moe.experts.246.w3", "model.layers.43.block_sparse_moe.experts.247.w3", "model.layers.43.block_sparse_moe.experts.248.w3", "model.layers.43.block_sparse_moe.experts.249.w3", "model.layers.43.block_sparse_moe.experts.250.w3", "model.layers.43.block_sparse_moe.experts.251.w3", "model.layers.43.block_sparse_moe.experts.252.w3", "model.layers.43.block_sparse_moe.experts.253.w3", "model.layers.43.block_sparse_moe.experts.254.w3", "model.layers.43.block_sparse_moe.experts.255.w3", "model.layers.43.block_sparse_moe.experts.0.w2", "model.layers.43.block_sparse_moe.experts.1.w2", "model.layers.43.block_sparse_moe.experts.2.w2", "model.layers.43.block_sparse_moe.experts.3.w2", "model.layers.43.block_sparse_moe.experts.4.w2", "model.layers.43.block_sparse_moe.experts.5.w2", "model.layers.43.block_sparse_moe.experts.6.w2", "model.layers.43.block_sparse_moe.experts.7.w2", "model.layers.43.block_sparse_moe.experts.8.w2", "model.layers.43.block_sparse_moe.experts.9.w2", "model.layers.43.block_sparse_moe.experts.10.w2", "model.layers.43.block_sparse_moe.experts.11.w2", "model.layers.43.block_sparse_moe.experts.12.w2", "model.layers.43.block_sparse_moe.experts.13.w2", "model.layers.43.block_sparse_moe.experts.14.w2", "model.layers.43.block_sparse_moe.experts.15.w2", "model.layers.43.block_sparse_moe.experts.16.w2", "model.layers.43.block_sparse_moe.experts.17.w2", "model.layers.43.block_sparse_moe.experts.18.w2", "model.layers.43.block_sparse_moe.experts.19.w2", "model.layers.43.block_sparse_moe.experts.20.w2", "model.layers.43.block_sparse_moe.experts.21.w2", "model.layers.43.block_sparse_moe.experts.22.w2", "model.layers.43.block_sparse_moe.experts.23.w2", "model.layers.43.block_sparse_moe.experts.24.w2", "model.layers.43.block_sparse_moe.experts.25.w2", "model.layers.43.block_sparse_moe.experts.26.w2", "model.layers.43.block_sparse_moe.experts.27.w2", "model.layers.43.block_sparse_moe.experts.28.w2", "model.layers.43.block_sparse_moe.experts.29.w2", "model.layers.43.block_sparse_moe.experts.30.w2", "model.layers.43.block_sparse_moe.experts.31.w2", "model.layers.43.block_sparse_moe.experts.32.w2", "model.layers.43.block_sparse_moe.experts.33.w2", "model.layers.43.block_sparse_moe.experts.34.w2", "model.layers.43.block_sparse_moe.experts.35.w2", "model.layers.43.block_sparse_moe.experts.36.w2", "model.layers.43.block_sparse_moe.experts.37.w2", "model.layers.43.block_sparse_moe.experts.38.w2", "model.layers.43.block_sparse_moe.experts.39.w2", "model.layers.43.block_sparse_moe.experts.40.w2", "model.layers.43.block_sparse_moe.experts.41.w2", "model.layers.43.block_sparse_moe.experts.42.w2", "model.layers.43.block_sparse_moe.experts.43.w2", "model.layers.43.block_sparse_moe.experts.44.w2", "model.layers.43.block_sparse_moe.experts.45.w2", "model.layers.43.block_sparse_moe.experts.46.w2", "model.layers.43.block_sparse_moe.experts.47.w2", "model.layers.43.block_sparse_moe.experts.48.w2", "model.layers.43.block_sparse_moe.experts.49.w2", "model.layers.43.block_sparse_moe.experts.50.w2", "model.layers.43.block_sparse_moe.experts.51.w2", "model.layers.43.block_sparse_moe.experts.52.w2", "model.layers.43.block_sparse_moe.experts.53.w2", "model.layers.43.block_sparse_moe.experts.54.w2", "model.layers.43.block_sparse_moe.experts.55.w2", "model.layers.43.block_sparse_moe.experts.56.w2", "model.layers.43.block_sparse_moe.experts.57.w2", "model.layers.43.block_sparse_moe.experts.58.w2", "model.layers.43.block_sparse_moe.experts.59.w2", "model.layers.43.block_sparse_moe.experts.60.w2", "model.layers.43.block_sparse_moe.experts.61.w2", "model.layers.43.block_sparse_moe.experts.62.w2", "model.layers.43.block_sparse_moe.experts.63.w2", "model.layers.43.block_sparse_moe.experts.64.w2", "model.layers.43.block_sparse_moe.experts.65.w2", "model.layers.43.block_sparse_moe.experts.66.w2", "model.layers.43.block_sparse_moe.experts.67.w2", "model.layers.43.block_sparse_moe.experts.68.w2", "model.layers.43.block_sparse_moe.experts.69.w2", "model.layers.43.block_sparse_moe.experts.70.w2", "model.layers.43.block_sparse_moe.experts.71.w2", "model.layers.43.block_sparse_moe.experts.72.w2", "model.layers.43.block_sparse_moe.experts.73.w2", "model.layers.43.block_sparse_moe.experts.74.w2", "model.layers.43.block_sparse_moe.experts.75.w2", "model.layers.43.block_sparse_moe.experts.76.w2", "model.layers.43.block_sparse_moe.experts.77.w2", "model.layers.43.block_sparse_moe.experts.78.w2", "model.layers.43.block_sparse_moe.experts.79.w2", "model.layers.43.block_sparse_moe.experts.80.w2", "model.layers.43.block_sparse_moe.experts.81.w2", "model.layers.43.block_sparse_moe.experts.82.w2", "model.layers.43.block_sparse_moe.experts.83.w2", "model.layers.43.block_sparse_moe.experts.84.w2", "model.layers.43.block_sparse_moe.experts.85.w2", "model.layers.43.block_sparse_moe.experts.86.w2", "model.layers.43.block_sparse_moe.experts.87.w2", "model.layers.43.block_sparse_moe.experts.88.w2", "model.layers.43.block_sparse_moe.experts.89.w2", "model.layers.43.block_sparse_moe.experts.90.w2", "model.layers.43.block_sparse_moe.experts.91.w2", "model.layers.43.block_sparse_moe.experts.92.w2", "model.layers.43.block_sparse_moe.experts.93.w2", "model.layers.43.block_sparse_moe.experts.94.w2", "model.layers.43.block_sparse_moe.experts.95.w2", "model.layers.43.block_sparse_moe.experts.96.w2", "model.layers.43.block_sparse_moe.experts.97.w2", "model.layers.43.block_sparse_moe.experts.98.w2", "model.layers.43.block_sparse_moe.experts.99.w2", "model.layers.43.block_sparse_moe.experts.100.w2", "model.layers.43.block_sparse_moe.experts.101.w2", "model.layers.43.block_sparse_moe.experts.102.w2", "model.layers.43.block_sparse_moe.experts.103.w2", "model.layers.43.block_sparse_moe.experts.104.w2", "model.layers.43.block_sparse_moe.experts.105.w2", "model.layers.43.block_sparse_moe.experts.106.w2", "model.layers.43.block_sparse_moe.experts.107.w2", "model.layers.43.block_sparse_moe.experts.108.w2", "model.layers.43.block_sparse_moe.experts.109.w2", "model.layers.43.block_sparse_moe.experts.110.w2", "model.layers.43.block_sparse_moe.experts.111.w2", "model.layers.43.block_sparse_moe.experts.112.w2", "model.layers.43.block_sparse_moe.experts.113.w2", "model.layers.43.block_sparse_moe.experts.114.w2", "model.layers.43.block_sparse_moe.experts.115.w2", "model.layers.43.block_sparse_moe.experts.116.w2", "model.layers.43.block_sparse_moe.experts.117.w2", "model.layers.43.block_sparse_moe.experts.118.w2", "model.layers.43.block_sparse_moe.experts.119.w2", "model.layers.43.block_sparse_moe.experts.120.w2", "model.layers.43.block_sparse_moe.experts.121.w2", "model.layers.43.block_sparse_moe.experts.122.w2", "model.layers.43.block_sparse_moe.experts.123.w2", "model.layers.43.block_sparse_moe.experts.124.w2", "model.layers.43.block_sparse_moe.experts.125.w2", "model.layers.43.block_sparse_moe.experts.126.w2", "model.layers.43.block_sparse_moe.experts.127.w2", "model.layers.43.block_sparse_moe.experts.128.w2", "model.layers.43.block_sparse_moe.experts.129.w2", "model.layers.43.block_sparse_moe.experts.130.w2", "model.layers.43.block_sparse_moe.experts.131.w2", "model.layers.43.block_sparse_moe.experts.132.w2", "model.layers.43.block_sparse_moe.experts.133.w2", "model.layers.43.block_sparse_moe.experts.134.w2", "model.layers.43.block_sparse_moe.experts.135.w2", "model.layers.43.block_sparse_moe.experts.136.w2", "model.layers.43.block_sparse_moe.experts.137.w2", "model.layers.43.block_sparse_moe.experts.138.w2", "model.layers.43.block_sparse_moe.experts.139.w2", "model.layers.43.block_sparse_moe.experts.140.w2", "model.layers.43.block_sparse_moe.experts.141.w2", "model.layers.43.block_sparse_moe.experts.142.w2", "model.layers.43.block_sparse_moe.experts.143.w2", "model.layers.43.block_sparse_moe.experts.144.w2", "model.layers.43.block_sparse_moe.experts.145.w2", "model.layers.43.block_sparse_moe.experts.146.w2", "model.layers.43.block_sparse_moe.experts.147.w2", "model.layers.43.block_sparse_moe.experts.148.w2", "model.layers.43.block_sparse_moe.experts.149.w2", "model.layers.43.block_sparse_moe.experts.150.w2", "model.layers.43.block_sparse_moe.experts.151.w2", "model.layers.43.block_sparse_moe.experts.152.w2", "model.layers.43.block_sparse_moe.experts.153.w2", "model.layers.43.block_sparse_moe.experts.154.w2", "model.layers.43.block_sparse_moe.experts.155.w2", "model.layers.43.block_sparse_moe.experts.156.w2", "model.layers.43.block_sparse_moe.experts.157.w2", "model.layers.43.block_sparse_moe.experts.158.w2", "model.layers.43.block_sparse_moe.experts.159.w2", "model.layers.43.block_sparse_moe.experts.160.w2", "model.layers.43.block_sparse_moe.experts.161.w2", "model.layers.43.block_sparse_moe.experts.162.w2", "model.layers.43.block_sparse_moe.experts.163.w2", "model.layers.43.block_sparse_moe.experts.164.w2", "model.layers.43.block_sparse_moe.experts.165.w2", "model.layers.43.block_sparse_moe.experts.166.w2", "model.layers.43.block_sparse_moe.experts.167.w2", "model.layers.43.block_sparse_moe.experts.168.w2", "model.layers.43.block_sparse_moe.experts.169.w2", "model.layers.43.block_sparse_moe.experts.170.w2", "model.layers.43.block_sparse_moe.experts.171.w2", "model.layers.43.block_sparse_moe.experts.172.w2", "model.layers.43.block_sparse_moe.experts.173.w2", "model.layers.43.block_sparse_moe.experts.174.w2", "model.layers.43.block_sparse_moe.experts.175.w2", "model.layers.43.block_sparse_moe.experts.176.w2", "model.layers.43.block_sparse_moe.experts.177.w2", "model.layers.43.block_sparse_moe.experts.178.w2", "model.layers.43.block_sparse_moe.experts.179.w2", "model.layers.43.block_sparse_moe.experts.180.w2", "model.layers.43.block_sparse_moe.experts.181.w2", "model.layers.43.block_sparse_moe.experts.182.w2", "model.layers.43.block_sparse_moe.experts.183.w2", "model.layers.43.block_sparse_moe.experts.184.w2", "model.layers.43.block_sparse_moe.experts.185.w2", "model.layers.43.block_sparse_moe.experts.186.w2", "model.layers.43.block_sparse_moe.experts.187.w2", "model.layers.43.block_sparse_moe.experts.188.w2", "model.layers.43.block_sparse_moe.experts.189.w2", "model.layers.43.block_sparse_moe.experts.190.w2", "model.layers.43.block_sparse_moe.experts.191.w2", "model.layers.43.block_sparse_moe.experts.192.w2", "model.layers.43.block_sparse_moe.experts.193.w2", "model.layers.43.block_sparse_moe.experts.194.w2", "model.layers.43.block_sparse_moe.experts.195.w2", "model.layers.43.block_sparse_moe.experts.196.w2", "model.layers.43.block_sparse_moe.experts.197.w2", "model.layers.43.block_sparse_moe.experts.198.w2", "model.layers.43.block_sparse_moe.experts.199.w2", "model.layers.43.block_sparse_moe.experts.200.w2", "model.layers.43.block_sparse_moe.experts.201.w2", "model.layers.43.block_sparse_moe.experts.202.w2", "model.layers.43.block_sparse_moe.experts.203.w2", "model.layers.43.block_sparse_moe.experts.204.w2", "model.layers.43.block_sparse_moe.experts.205.w2", "model.layers.43.block_sparse_moe.experts.206.w2", "model.layers.43.block_sparse_moe.experts.207.w2", "model.layers.43.block_sparse_moe.experts.208.w2", "model.layers.43.block_sparse_moe.experts.209.w2", "model.layers.43.block_sparse_moe.experts.210.w2", "model.layers.43.block_sparse_moe.experts.211.w2", "model.layers.43.block_sparse_moe.experts.212.w2", "model.layers.43.block_sparse_moe.experts.213.w2", "model.layers.43.block_sparse_moe.experts.214.w2", "model.layers.43.block_sparse_moe.experts.215.w2", "model.layers.43.block_sparse_moe.experts.216.w2", "model.layers.43.block_sparse_moe.experts.217.w2", "model.layers.43.block_sparse_moe.experts.218.w2", "model.layers.43.block_sparse_moe.experts.219.w2", "model.layers.43.block_sparse_moe.experts.220.w2", "model.layers.43.block_sparse_moe.experts.221.w2", "model.layers.43.block_sparse_moe.experts.222.w2", "model.layers.43.block_sparse_moe.experts.223.w2", "model.layers.43.block_sparse_moe.experts.224.w2", "model.layers.43.block_sparse_moe.experts.225.w2", "model.layers.43.block_sparse_moe.experts.226.w2", "model.layers.43.block_sparse_moe.experts.227.w2", "model.layers.43.block_sparse_moe.experts.228.w2", "model.layers.43.block_sparse_moe.experts.229.w2", "model.layers.43.block_sparse_moe.experts.230.w2", "model.layers.43.block_sparse_moe.experts.231.w2", "model.layers.43.block_sparse_moe.experts.232.w2", "model.layers.43.block_sparse_moe.experts.233.w2", "model.layers.43.block_sparse_moe.experts.234.w2", "model.layers.43.block_sparse_moe.experts.235.w2", "model.layers.43.block_sparse_moe.experts.236.w2", "model.layers.43.block_sparse_moe.experts.237.w2", "model.layers.43.block_sparse_moe.experts.238.w2", "model.layers.43.block_sparse_moe.experts.239.w2", "model.layers.43.block_sparse_moe.experts.240.w2", "model.layers.43.block_sparse_moe.experts.241.w2", "model.layers.43.block_sparse_moe.experts.242.w2", "model.layers.43.block_sparse_moe.experts.243.w2", "model.layers.43.block_sparse_moe.experts.244.w2", "model.layers.43.block_sparse_moe.experts.245.w2", "model.layers.43.block_sparse_moe.experts.246.w2", "model.layers.43.block_sparse_moe.experts.247.w2", "model.layers.43.block_sparse_moe.experts.248.w2", "model.layers.43.block_sparse_moe.experts.249.w2", "model.layers.43.block_sparse_moe.experts.250.w2", "model.layers.43.block_sparse_moe.experts.251.w2", "model.layers.43.block_sparse_moe.experts.252.w2", "model.layers.43.block_sparse_moe.experts.253.w2", "model.layers.43.block_sparse_moe.experts.254.w2", "model.layers.43.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0012615539133548626, "dbits": 3623878656 } ] }, { "idx": 88, "layers": [ "model.layers.44.self_attn.q_proj", "model.layers.44.self_attn.k_proj", "model.layers.44.self_attn.v_proj", "model.layers.44.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0030733199790120014, "dbits": 44040192 } ] }, { "idx": 89, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w1", "model.layers.44.block_sparse_moe.experts.1.w1", "model.layers.44.block_sparse_moe.experts.2.w1", "model.layers.44.block_sparse_moe.experts.3.w1", "model.layers.44.block_sparse_moe.experts.4.w1", "model.layers.44.block_sparse_moe.experts.5.w1", "model.layers.44.block_sparse_moe.experts.6.w1", "model.layers.44.block_sparse_moe.experts.7.w1", "model.layers.44.block_sparse_moe.experts.8.w1", "model.layers.44.block_sparse_moe.experts.9.w1", "model.layers.44.block_sparse_moe.experts.10.w1", "model.layers.44.block_sparse_moe.experts.11.w1", "model.layers.44.block_sparse_moe.experts.12.w1", "model.layers.44.block_sparse_moe.experts.13.w1", "model.layers.44.block_sparse_moe.experts.14.w1", "model.layers.44.block_sparse_moe.experts.15.w1", "model.layers.44.block_sparse_moe.experts.16.w1", "model.layers.44.block_sparse_moe.experts.17.w1", "model.layers.44.block_sparse_moe.experts.18.w1", "model.layers.44.block_sparse_moe.experts.19.w1", "model.layers.44.block_sparse_moe.experts.20.w1", "model.layers.44.block_sparse_moe.experts.21.w1", "model.layers.44.block_sparse_moe.experts.22.w1", "model.layers.44.block_sparse_moe.experts.23.w1", "model.layers.44.block_sparse_moe.experts.24.w1", "model.layers.44.block_sparse_moe.experts.25.w1", "model.layers.44.block_sparse_moe.experts.26.w1", "model.layers.44.block_sparse_moe.experts.27.w1", "model.layers.44.block_sparse_moe.experts.28.w1", "model.layers.44.block_sparse_moe.experts.29.w1", "model.layers.44.block_sparse_moe.experts.30.w1", "model.layers.44.block_sparse_moe.experts.31.w1", "model.layers.44.block_sparse_moe.experts.32.w1", "model.layers.44.block_sparse_moe.experts.33.w1", "model.layers.44.block_sparse_moe.experts.34.w1", "model.layers.44.block_sparse_moe.experts.35.w1", "model.layers.44.block_sparse_moe.experts.36.w1", "model.layers.44.block_sparse_moe.experts.37.w1", "model.layers.44.block_sparse_moe.experts.38.w1", "model.layers.44.block_sparse_moe.experts.39.w1", "model.layers.44.block_sparse_moe.experts.40.w1", "model.layers.44.block_sparse_moe.experts.41.w1", "model.layers.44.block_sparse_moe.experts.42.w1", "model.layers.44.block_sparse_moe.experts.43.w1", "model.layers.44.block_sparse_moe.experts.44.w1", "model.layers.44.block_sparse_moe.experts.45.w1", "model.layers.44.block_sparse_moe.experts.46.w1", "model.layers.44.block_sparse_moe.experts.47.w1", "model.layers.44.block_sparse_moe.experts.48.w1", "model.layers.44.block_sparse_moe.experts.49.w1", "model.layers.44.block_sparse_moe.experts.50.w1", "model.layers.44.block_sparse_moe.experts.51.w1", "model.layers.44.block_sparse_moe.experts.52.w1", "model.layers.44.block_sparse_moe.experts.53.w1", "model.layers.44.block_sparse_moe.experts.54.w1", "model.layers.44.block_sparse_moe.experts.55.w1", "model.layers.44.block_sparse_moe.experts.56.w1", "model.layers.44.block_sparse_moe.experts.57.w1", "model.layers.44.block_sparse_moe.experts.58.w1", "model.layers.44.block_sparse_moe.experts.59.w1", "model.layers.44.block_sparse_moe.experts.60.w1", "model.layers.44.block_sparse_moe.experts.61.w1", "model.layers.44.block_sparse_moe.experts.62.w1", "model.layers.44.block_sparse_moe.experts.63.w1", "model.layers.44.block_sparse_moe.experts.64.w1", "model.layers.44.block_sparse_moe.experts.65.w1", "model.layers.44.block_sparse_moe.experts.66.w1", "model.layers.44.block_sparse_moe.experts.67.w1", "model.layers.44.block_sparse_moe.experts.68.w1", "model.layers.44.block_sparse_moe.experts.69.w1", "model.layers.44.block_sparse_moe.experts.70.w1", "model.layers.44.block_sparse_moe.experts.71.w1", "model.layers.44.block_sparse_moe.experts.72.w1", "model.layers.44.block_sparse_moe.experts.73.w1", "model.layers.44.block_sparse_moe.experts.74.w1", "model.layers.44.block_sparse_moe.experts.75.w1", "model.layers.44.block_sparse_moe.experts.76.w1", "model.layers.44.block_sparse_moe.experts.77.w1", "model.layers.44.block_sparse_moe.experts.78.w1", "model.layers.44.block_sparse_moe.experts.79.w1", "model.layers.44.block_sparse_moe.experts.80.w1", "model.layers.44.block_sparse_moe.experts.81.w1", "model.layers.44.block_sparse_moe.experts.82.w1", "model.layers.44.block_sparse_moe.experts.83.w1", "model.layers.44.block_sparse_moe.experts.84.w1", "model.layers.44.block_sparse_moe.experts.85.w1", "model.layers.44.block_sparse_moe.experts.86.w1", "model.layers.44.block_sparse_moe.experts.87.w1", "model.layers.44.block_sparse_moe.experts.88.w1", "model.layers.44.block_sparse_moe.experts.89.w1", "model.layers.44.block_sparse_moe.experts.90.w1", "model.layers.44.block_sparse_moe.experts.91.w1", "model.layers.44.block_sparse_moe.experts.92.w1", "model.layers.44.block_sparse_moe.experts.93.w1", "model.layers.44.block_sparse_moe.experts.94.w1", "model.layers.44.block_sparse_moe.experts.95.w1", "model.layers.44.block_sparse_moe.experts.96.w1", "model.layers.44.block_sparse_moe.experts.97.w1", "model.layers.44.block_sparse_moe.experts.98.w1", "model.layers.44.block_sparse_moe.experts.99.w1", "model.layers.44.block_sparse_moe.experts.100.w1", "model.layers.44.block_sparse_moe.experts.101.w1", "model.layers.44.block_sparse_moe.experts.102.w1", "model.layers.44.block_sparse_moe.experts.103.w1", "model.layers.44.block_sparse_moe.experts.104.w1", "model.layers.44.block_sparse_moe.experts.105.w1", "model.layers.44.block_sparse_moe.experts.106.w1", "model.layers.44.block_sparse_moe.experts.107.w1", "model.layers.44.block_sparse_moe.experts.108.w1", "model.layers.44.block_sparse_moe.experts.109.w1", "model.layers.44.block_sparse_moe.experts.110.w1", "model.layers.44.block_sparse_moe.experts.111.w1", "model.layers.44.block_sparse_moe.experts.112.w1", "model.layers.44.block_sparse_moe.experts.113.w1", "model.layers.44.block_sparse_moe.experts.114.w1", "model.layers.44.block_sparse_moe.experts.115.w1", "model.layers.44.block_sparse_moe.experts.116.w1", "model.layers.44.block_sparse_moe.experts.117.w1", "model.layers.44.block_sparse_moe.experts.118.w1", "model.layers.44.block_sparse_moe.experts.119.w1", "model.layers.44.block_sparse_moe.experts.120.w1", "model.layers.44.block_sparse_moe.experts.121.w1", "model.layers.44.block_sparse_moe.experts.122.w1", "model.layers.44.block_sparse_moe.experts.123.w1", "model.layers.44.block_sparse_moe.experts.124.w1", "model.layers.44.block_sparse_moe.experts.125.w1", "model.layers.44.block_sparse_moe.experts.126.w1", "model.layers.44.block_sparse_moe.experts.127.w1", "model.layers.44.block_sparse_moe.experts.128.w1", "model.layers.44.block_sparse_moe.experts.129.w1", "model.layers.44.block_sparse_moe.experts.130.w1", "model.layers.44.block_sparse_moe.experts.131.w1", "model.layers.44.block_sparse_moe.experts.132.w1", "model.layers.44.block_sparse_moe.experts.133.w1", "model.layers.44.block_sparse_moe.experts.134.w1", "model.layers.44.block_sparse_moe.experts.135.w1", "model.layers.44.block_sparse_moe.experts.136.w1", "model.layers.44.block_sparse_moe.experts.137.w1", "model.layers.44.block_sparse_moe.experts.138.w1", "model.layers.44.block_sparse_moe.experts.139.w1", "model.layers.44.block_sparse_moe.experts.140.w1", "model.layers.44.block_sparse_moe.experts.141.w1", "model.layers.44.block_sparse_moe.experts.142.w1", "model.layers.44.block_sparse_moe.experts.143.w1", "model.layers.44.block_sparse_moe.experts.144.w1", "model.layers.44.block_sparse_moe.experts.145.w1", "model.layers.44.block_sparse_moe.experts.146.w1", "model.layers.44.block_sparse_moe.experts.147.w1", "model.layers.44.block_sparse_moe.experts.148.w1", "model.layers.44.block_sparse_moe.experts.149.w1", "model.layers.44.block_sparse_moe.experts.150.w1", "model.layers.44.block_sparse_moe.experts.151.w1", "model.layers.44.block_sparse_moe.experts.152.w1", "model.layers.44.block_sparse_moe.experts.153.w1", "model.layers.44.block_sparse_moe.experts.154.w1", "model.layers.44.block_sparse_moe.experts.155.w1", "model.layers.44.block_sparse_moe.experts.156.w1", "model.layers.44.block_sparse_moe.experts.157.w1", "model.layers.44.block_sparse_moe.experts.158.w1", "model.layers.44.block_sparse_moe.experts.159.w1", "model.layers.44.block_sparse_moe.experts.160.w1", "model.layers.44.block_sparse_moe.experts.161.w1", "model.layers.44.block_sparse_moe.experts.162.w1", "model.layers.44.block_sparse_moe.experts.163.w1", "model.layers.44.block_sparse_moe.experts.164.w1", "model.layers.44.block_sparse_moe.experts.165.w1", "model.layers.44.block_sparse_moe.experts.166.w1", "model.layers.44.block_sparse_moe.experts.167.w1", "model.layers.44.block_sparse_moe.experts.168.w1", "model.layers.44.block_sparse_moe.experts.169.w1", "model.layers.44.block_sparse_moe.experts.170.w1", "model.layers.44.block_sparse_moe.experts.171.w1", "model.layers.44.block_sparse_moe.experts.172.w1", "model.layers.44.block_sparse_moe.experts.173.w1", "model.layers.44.block_sparse_moe.experts.174.w1", "model.layers.44.block_sparse_moe.experts.175.w1", "model.layers.44.block_sparse_moe.experts.176.w1", "model.layers.44.block_sparse_moe.experts.177.w1", "model.layers.44.block_sparse_moe.experts.178.w1", "model.layers.44.block_sparse_moe.experts.179.w1", "model.layers.44.block_sparse_moe.experts.180.w1", "model.layers.44.block_sparse_moe.experts.181.w1", "model.layers.44.block_sparse_moe.experts.182.w1", "model.layers.44.block_sparse_moe.experts.183.w1", "model.layers.44.block_sparse_moe.experts.184.w1", "model.layers.44.block_sparse_moe.experts.185.w1", "model.layers.44.block_sparse_moe.experts.186.w1", "model.layers.44.block_sparse_moe.experts.187.w1", "model.layers.44.block_sparse_moe.experts.188.w1", "model.layers.44.block_sparse_moe.experts.189.w1", "model.layers.44.block_sparse_moe.experts.190.w1", "model.layers.44.block_sparse_moe.experts.191.w1", "model.layers.44.block_sparse_moe.experts.192.w1", "model.layers.44.block_sparse_moe.experts.193.w1", "model.layers.44.block_sparse_moe.experts.194.w1", "model.layers.44.block_sparse_moe.experts.195.w1", "model.layers.44.block_sparse_moe.experts.196.w1", "model.layers.44.block_sparse_moe.experts.197.w1", "model.layers.44.block_sparse_moe.experts.198.w1", "model.layers.44.block_sparse_moe.experts.199.w1", "model.layers.44.block_sparse_moe.experts.200.w1", "model.layers.44.block_sparse_moe.experts.201.w1", "model.layers.44.block_sparse_moe.experts.202.w1", "model.layers.44.block_sparse_moe.experts.203.w1", "model.layers.44.block_sparse_moe.experts.204.w1", "model.layers.44.block_sparse_moe.experts.205.w1", "model.layers.44.block_sparse_moe.experts.206.w1", "model.layers.44.block_sparse_moe.experts.207.w1", "model.layers.44.block_sparse_moe.experts.208.w1", "model.layers.44.block_sparse_moe.experts.209.w1", "model.layers.44.block_sparse_moe.experts.210.w1", "model.layers.44.block_sparse_moe.experts.211.w1", "model.layers.44.block_sparse_moe.experts.212.w1", "model.layers.44.block_sparse_moe.experts.213.w1", "model.layers.44.block_sparse_moe.experts.214.w1", "model.layers.44.block_sparse_moe.experts.215.w1", "model.layers.44.block_sparse_moe.experts.216.w1", "model.layers.44.block_sparse_moe.experts.217.w1", "model.layers.44.block_sparse_moe.experts.218.w1", "model.layers.44.block_sparse_moe.experts.219.w1", "model.layers.44.block_sparse_moe.experts.220.w1", "model.layers.44.block_sparse_moe.experts.221.w1", "model.layers.44.block_sparse_moe.experts.222.w1", "model.layers.44.block_sparse_moe.experts.223.w1", "model.layers.44.block_sparse_moe.experts.224.w1", "model.layers.44.block_sparse_moe.experts.225.w1", "model.layers.44.block_sparse_moe.experts.226.w1", "model.layers.44.block_sparse_moe.experts.227.w1", "model.layers.44.block_sparse_moe.experts.228.w1", "model.layers.44.block_sparse_moe.experts.229.w1", "model.layers.44.block_sparse_moe.experts.230.w1", "model.layers.44.block_sparse_moe.experts.231.w1", "model.layers.44.block_sparse_moe.experts.232.w1", "model.layers.44.block_sparse_moe.experts.233.w1", "model.layers.44.block_sparse_moe.experts.234.w1", "model.layers.44.block_sparse_moe.experts.235.w1", "model.layers.44.block_sparse_moe.experts.236.w1", "model.layers.44.block_sparse_moe.experts.237.w1", "model.layers.44.block_sparse_moe.experts.238.w1", "model.layers.44.block_sparse_moe.experts.239.w1", "model.layers.44.block_sparse_moe.experts.240.w1", "model.layers.44.block_sparse_moe.experts.241.w1", "model.layers.44.block_sparse_moe.experts.242.w1", "model.layers.44.block_sparse_moe.experts.243.w1", "model.layers.44.block_sparse_moe.experts.244.w1", "model.layers.44.block_sparse_moe.experts.245.w1", "model.layers.44.block_sparse_moe.experts.246.w1", "model.layers.44.block_sparse_moe.experts.247.w1", "model.layers.44.block_sparse_moe.experts.248.w1", "model.layers.44.block_sparse_moe.experts.249.w1", "model.layers.44.block_sparse_moe.experts.250.w1", "model.layers.44.block_sparse_moe.experts.251.w1", "model.layers.44.block_sparse_moe.experts.252.w1", "model.layers.44.block_sparse_moe.experts.253.w1", "model.layers.44.block_sparse_moe.experts.254.w1", "model.layers.44.block_sparse_moe.experts.255.w1", "model.layers.44.block_sparse_moe.experts.0.w3", "model.layers.44.block_sparse_moe.experts.1.w3", "model.layers.44.block_sparse_moe.experts.2.w3", "model.layers.44.block_sparse_moe.experts.3.w3", "model.layers.44.block_sparse_moe.experts.4.w3", "model.layers.44.block_sparse_moe.experts.5.w3", "model.layers.44.block_sparse_moe.experts.6.w3", "model.layers.44.block_sparse_moe.experts.7.w3", "model.layers.44.block_sparse_moe.experts.8.w3", "model.layers.44.block_sparse_moe.experts.9.w3", "model.layers.44.block_sparse_moe.experts.10.w3", "model.layers.44.block_sparse_moe.experts.11.w3", "model.layers.44.block_sparse_moe.experts.12.w3", "model.layers.44.block_sparse_moe.experts.13.w3", "model.layers.44.block_sparse_moe.experts.14.w3", "model.layers.44.block_sparse_moe.experts.15.w3", "model.layers.44.block_sparse_moe.experts.16.w3", "model.layers.44.block_sparse_moe.experts.17.w3", "model.layers.44.block_sparse_moe.experts.18.w3", "model.layers.44.block_sparse_moe.experts.19.w3", "model.layers.44.block_sparse_moe.experts.20.w3", "model.layers.44.block_sparse_moe.experts.21.w3", "model.layers.44.block_sparse_moe.experts.22.w3", "model.layers.44.block_sparse_moe.experts.23.w3", "model.layers.44.block_sparse_moe.experts.24.w3", "model.layers.44.block_sparse_moe.experts.25.w3", "model.layers.44.block_sparse_moe.experts.26.w3", "model.layers.44.block_sparse_moe.experts.27.w3", "model.layers.44.block_sparse_moe.experts.28.w3", "model.layers.44.block_sparse_moe.experts.29.w3", "model.layers.44.block_sparse_moe.experts.30.w3", "model.layers.44.block_sparse_moe.experts.31.w3", "model.layers.44.block_sparse_moe.experts.32.w3", "model.layers.44.block_sparse_moe.experts.33.w3", "model.layers.44.block_sparse_moe.experts.34.w3", "model.layers.44.block_sparse_moe.experts.35.w3", "model.layers.44.block_sparse_moe.experts.36.w3", "model.layers.44.block_sparse_moe.experts.37.w3", "model.layers.44.block_sparse_moe.experts.38.w3", "model.layers.44.block_sparse_moe.experts.39.w3", "model.layers.44.block_sparse_moe.experts.40.w3", "model.layers.44.block_sparse_moe.experts.41.w3", "model.layers.44.block_sparse_moe.experts.42.w3", "model.layers.44.block_sparse_moe.experts.43.w3", "model.layers.44.block_sparse_moe.experts.44.w3", "model.layers.44.block_sparse_moe.experts.45.w3", "model.layers.44.block_sparse_moe.experts.46.w3", "model.layers.44.block_sparse_moe.experts.47.w3", "model.layers.44.block_sparse_moe.experts.48.w3", "model.layers.44.block_sparse_moe.experts.49.w3", "model.layers.44.block_sparse_moe.experts.50.w3", "model.layers.44.block_sparse_moe.experts.51.w3", "model.layers.44.block_sparse_moe.experts.52.w3", "model.layers.44.block_sparse_moe.experts.53.w3", "model.layers.44.block_sparse_moe.experts.54.w3", "model.layers.44.block_sparse_moe.experts.55.w3", "model.layers.44.block_sparse_moe.experts.56.w3", "model.layers.44.block_sparse_moe.experts.57.w3", "model.layers.44.block_sparse_moe.experts.58.w3", "model.layers.44.block_sparse_moe.experts.59.w3", "model.layers.44.block_sparse_moe.experts.60.w3", "model.layers.44.block_sparse_moe.experts.61.w3", "model.layers.44.block_sparse_moe.experts.62.w3", "model.layers.44.block_sparse_moe.experts.63.w3", "model.layers.44.block_sparse_moe.experts.64.w3", "model.layers.44.block_sparse_moe.experts.65.w3", "model.layers.44.block_sparse_moe.experts.66.w3", "model.layers.44.block_sparse_moe.experts.67.w3", "model.layers.44.block_sparse_moe.experts.68.w3", "model.layers.44.block_sparse_moe.experts.69.w3", "model.layers.44.block_sparse_moe.experts.70.w3", "model.layers.44.block_sparse_moe.experts.71.w3", "model.layers.44.block_sparse_moe.experts.72.w3", "model.layers.44.block_sparse_moe.experts.73.w3", "model.layers.44.block_sparse_moe.experts.74.w3", "model.layers.44.block_sparse_moe.experts.75.w3", "model.layers.44.block_sparse_moe.experts.76.w3", "model.layers.44.block_sparse_moe.experts.77.w3", "model.layers.44.block_sparse_moe.experts.78.w3", "model.layers.44.block_sparse_moe.experts.79.w3", "model.layers.44.block_sparse_moe.experts.80.w3", "model.layers.44.block_sparse_moe.experts.81.w3", "model.layers.44.block_sparse_moe.experts.82.w3", "model.layers.44.block_sparse_moe.experts.83.w3", "model.layers.44.block_sparse_moe.experts.84.w3", "model.layers.44.block_sparse_moe.experts.85.w3", "model.layers.44.block_sparse_moe.experts.86.w3", "model.layers.44.block_sparse_moe.experts.87.w3", "model.layers.44.block_sparse_moe.experts.88.w3", "model.layers.44.block_sparse_moe.experts.89.w3", "model.layers.44.block_sparse_moe.experts.90.w3", "model.layers.44.block_sparse_moe.experts.91.w3", "model.layers.44.block_sparse_moe.experts.92.w3", "model.layers.44.block_sparse_moe.experts.93.w3", "model.layers.44.block_sparse_moe.experts.94.w3", "model.layers.44.block_sparse_moe.experts.95.w3", "model.layers.44.block_sparse_moe.experts.96.w3", "model.layers.44.block_sparse_moe.experts.97.w3", "model.layers.44.block_sparse_moe.experts.98.w3", "model.layers.44.block_sparse_moe.experts.99.w3", "model.layers.44.block_sparse_moe.experts.100.w3", "model.layers.44.block_sparse_moe.experts.101.w3", "model.layers.44.block_sparse_moe.experts.102.w3", "model.layers.44.block_sparse_moe.experts.103.w3", "model.layers.44.block_sparse_moe.experts.104.w3", "model.layers.44.block_sparse_moe.experts.105.w3", "model.layers.44.block_sparse_moe.experts.106.w3", "model.layers.44.block_sparse_moe.experts.107.w3", "model.layers.44.block_sparse_moe.experts.108.w3", "model.layers.44.block_sparse_moe.experts.109.w3", "model.layers.44.block_sparse_moe.experts.110.w3", "model.layers.44.block_sparse_moe.experts.111.w3", "model.layers.44.block_sparse_moe.experts.112.w3", "model.layers.44.block_sparse_moe.experts.113.w3", "model.layers.44.block_sparse_moe.experts.114.w3", "model.layers.44.block_sparse_moe.experts.115.w3", "model.layers.44.block_sparse_moe.experts.116.w3", "model.layers.44.block_sparse_moe.experts.117.w3", "model.layers.44.block_sparse_moe.experts.118.w3", "model.layers.44.block_sparse_moe.experts.119.w3", "model.layers.44.block_sparse_moe.experts.120.w3", "model.layers.44.block_sparse_moe.experts.121.w3", "model.layers.44.block_sparse_moe.experts.122.w3", "model.layers.44.block_sparse_moe.experts.123.w3", "model.layers.44.block_sparse_moe.experts.124.w3", "model.layers.44.block_sparse_moe.experts.125.w3", "model.layers.44.block_sparse_moe.experts.126.w3", "model.layers.44.block_sparse_moe.experts.127.w3", "model.layers.44.block_sparse_moe.experts.128.w3", "model.layers.44.block_sparse_moe.experts.129.w3", "model.layers.44.block_sparse_moe.experts.130.w3", "model.layers.44.block_sparse_moe.experts.131.w3", "model.layers.44.block_sparse_moe.experts.132.w3", "model.layers.44.block_sparse_moe.experts.133.w3", "model.layers.44.block_sparse_moe.experts.134.w3", "model.layers.44.block_sparse_moe.experts.135.w3", "model.layers.44.block_sparse_moe.experts.136.w3", "model.layers.44.block_sparse_moe.experts.137.w3", "model.layers.44.block_sparse_moe.experts.138.w3", "model.layers.44.block_sparse_moe.experts.139.w3", "model.layers.44.block_sparse_moe.experts.140.w3", "model.layers.44.block_sparse_moe.experts.141.w3", "model.layers.44.block_sparse_moe.experts.142.w3", "model.layers.44.block_sparse_moe.experts.143.w3", "model.layers.44.block_sparse_moe.experts.144.w3", "model.layers.44.block_sparse_moe.experts.145.w3", "model.layers.44.block_sparse_moe.experts.146.w3", "model.layers.44.block_sparse_moe.experts.147.w3", "model.layers.44.block_sparse_moe.experts.148.w3", "model.layers.44.block_sparse_moe.experts.149.w3", "model.layers.44.block_sparse_moe.experts.150.w3", "model.layers.44.block_sparse_moe.experts.151.w3", "model.layers.44.block_sparse_moe.experts.152.w3", "model.layers.44.block_sparse_moe.experts.153.w3", "model.layers.44.block_sparse_moe.experts.154.w3", "model.layers.44.block_sparse_moe.experts.155.w3", "model.layers.44.block_sparse_moe.experts.156.w3", "model.layers.44.block_sparse_moe.experts.157.w3", "model.layers.44.block_sparse_moe.experts.158.w3", "model.layers.44.block_sparse_moe.experts.159.w3", "model.layers.44.block_sparse_moe.experts.160.w3", "model.layers.44.block_sparse_moe.experts.161.w3", "model.layers.44.block_sparse_moe.experts.162.w3", "model.layers.44.block_sparse_moe.experts.163.w3", "model.layers.44.block_sparse_moe.experts.164.w3", "model.layers.44.block_sparse_moe.experts.165.w3", "model.layers.44.block_sparse_moe.experts.166.w3", "model.layers.44.block_sparse_moe.experts.167.w3", "model.layers.44.block_sparse_moe.experts.168.w3", "model.layers.44.block_sparse_moe.experts.169.w3", "model.layers.44.block_sparse_moe.experts.170.w3", "model.layers.44.block_sparse_moe.experts.171.w3", "model.layers.44.block_sparse_moe.experts.172.w3", "model.layers.44.block_sparse_moe.experts.173.w3", "model.layers.44.block_sparse_moe.experts.174.w3", "model.layers.44.block_sparse_moe.experts.175.w3", "model.layers.44.block_sparse_moe.experts.176.w3", "model.layers.44.block_sparse_moe.experts.177.w3", "model.layers.44.block_sparse_moe.experts.178.w3", "model.layers.44.block_sparse_moe.experts.179.w3", "model.layers.44.block_sparse_moe.experts.180.w3", "model.layers.44.block_sparse_moe.experts.181.w3", "model.layers.44.block_sparse_moe.experts.182.w3", "model.layers.44.block_sparse_moe.experts.183.w3", "model.layers.44.block_sparse_moe.experts.184.w3", "model.layers.44.block_sparse_moe.experts.185.w3", "model.layers.44.block_sparse_moe.experts.186.w3", "model.layers.44.block_sparse_moe.experts.187.w3", "model.layers.44.block_sparse_moe.experts.188.w3", "model.layers.44.block_sparse_moe.experts.189.w3", "model.layers.44.block_sparse_moe.experts.190.w3", "model.layers.44.block_sparse_moe.experts.191.w3", "model.layers.44.block_sparse_moe.experts.192.w3", "model.layers.44.block_sparse_moe.experts.193.w3", "model.layers.44.block_sparse_moe.experts.194.w3", "model.layers.44.block_sparse_moe.experts.195.w3", "model.layers.44.block_sparse_moe.experts.196.w3", "model.layers.44.block_sparse_moe.experts.197.w3", "model.layers.44.block_sparse_moe.experts.198.w3", "model.layers.44.block_sparse_moe.experts.199.w3", "model.layers.44.block_sparse_moe.experts.200.w3", "model.layers.44.block_sparse_moe.experts.201.w3", "model.layers.44.block_sparse_moe.experts.202.w3", "model.layers.44.block_sparse_moe.experts.203.w3", "model.layers.44.block_sparse_moe.experts.204.w3", "model.layers.44.block_sparse_moe.experts.205.w3", "model.layers.44.block_sparse_moe.experts.206.w3", "model.layers.44.block_sparse_moe.experts.207.w3", "model.layers.44.block_sparse_moe.experts.208.w3", "model.layers.44.block_sparse_moe.experts.209.w3", "model.layers.44.block_sparse_moe.experts.210.w3", "model.layers.44.block_sparse_moe.experts.211.w3", "model.layers.44.block_sparse_moe.experts.212.w3", "model.layers.44.block_sparse_moe.experts.213.w3", "model.layers.44.block_sparse_moe.experts.214.w3", "model.layers.44.block_sparse_moe.experts.215.w3", "model.layers.44.block_sparse_moe.experts.216.w3", "model.layers.44.block_sparse_moe.experts.217.w3", "model.layers.44.block_sparse_moe.experts.218.w3", "model.layers.44.block_sparse_moe.experts.219.w3", "model.layers.44.block_sparse_moe.experts.220.w3", "model.layers.44.block_sparse_moe.experts.221.w3", "model.layers.44.block_sparse_moe.experts.222.w3", "model.layers.44.block_sparse_moe.experts.223.w3", "model.layers.44.block_sparse_moe.experts.224.w3", "model.layers.44.block_sparse_moe.experts.225.w3", "model.layers.44.block_sparse_moe.experts.226.w3", "model.layers.44.block_sparse_moe.experts.227.w3", "model.layers.44.block_sparse_moe.experts.228.w3", "model.layers.44.block_sparse_moe.experts.229.w3", "model.layers.44.block_sparse_moe.experts.230.w3", "model.layers.44.block_sparse_moe.experts.231.w3", "model.layers.44.block_sparse_moe.experts.232.w3", "model.layers.44.block_sparse_moe.experts.233.w3", "model.layers.44.block_sparse_moe.experts.234.w3", "model.layers.44.block_sparse_moe.experts.235.w3", "model.layers.44.block_sparse_moe.experts.236.w3", "model.layers.44.block_sparse_moe.experts.237.w3", "model.layers.44.block_sparse_moe.experts.238.w3", "model.layers.44.block_sparse_moe.experts.239.w3", "model.layers.44.block_sparse_moe.experts.240.w3", "model.layers.44.block_sparse_moe.experts.241.w3", "model.layers.44.block_sparse_moe.experts.242.w3", "model.layers.44.block_sparse_moe.experts.243.w3", "model.layers.44.block_sparse_moe.experts.244.w3", "model.layers.44.block_sparse_moe.experts.245.w3", "model.layers.44.block_sparse_moe.experts.246.w3", "model.layers.44.block_sparse_moe.experts.247.w3", "model.layers.44.block_sparse_moe.experts.248.w3", "model.layers.44.block_sparse_moe.experts.249.w3", "model.layers.44.block_sparse_moe.experts.250.w3", "model.layers.44.block_sparse_moe.experts.251.w3", "model.layers.44.block_sparse_moe.experts.252.w3", "model.layers.44.block_sparse_moe.experts.253.w3", "model.layers.44.block_sparse_moe.experts.254.w3", "model.layers.44.block_sparse_moe.experts.255.w3", "model.layers.44.block_sparse_moe.experts.0.w2", "model.layers.44.block_sparse_moe.experts.1.w2", "model.layers.44.block_sparse_moe.experts.2.w2", "model.layers.44.block_sparse_moe.experts.3.w2", "model.layers.44.block_sparse_moe.experts.4.w2", "model.layers.44.block_sparse_moe.experts.5.w2", "model.layers.44.block_sparse_moe.experts.6.w2", "model.layers.44.block_sparse_moe.experts.7.w2", "model.layers.44.block_sparse_moe.experts.8.w2", "model.layers.44.block_sparse_moe.experts.9.w2", "model.layers.44.block_sparse_moe.experts.10.w2", "model.layers.44.block_sparse_moe.experts.11.w2", "model.layers.44.block_sparse_moe.experts.12.w2", "model.layers.44.block_sparse_moe.experts.13.w2", "model.layers.44.block_sparse_moe.experts.14.w2", "model.layers.44.block_sparse_moe.experts.15.w2", "model.layers.44.block_sparse_moe.experts.16.w2", "model.layers.44.block_sparse_moe.experts.17.w2", "model.layers.44.block_sparse_moe.experts.18.w2", "model.layers.44.block_sparse_moe.experts.19.w2", "model.layers.44.block_sparse_moe.experts.20.w2", "model.layers.44.block_sparse_moe.experts.21.w2", "model.layers.44.block_sparse_moe.experts.22.w2", "model.layers.44.block_sparse_moe.experts.23.w2", "model.layers.44.block_sparse_moe.experts.24.w2", "model.layers.44.block_sparse_moe.experts.25.w2", "model.layers.44.block_sparse_moe.experts.26.w2", "model.layers.44.block_sparse_moe.experts.27.w2", "model.layers.44.block_sparse_moe.experts.28.w2", "model.layers.44.block_sparse_moe.experts.29.w2", "model.layers.44.block_sparse_moe.experts.30.w2", "model.layers.44.block_sparse_moe.experts.31.w2", "model.layers.44.block_sparse_moe.experts.32.w2", "model.layers.44.block_sparse_moe.experts.33.w2", "model.layers.44.block_sparse_moe.experts.34.w2", "model.layers.44.block_sparse_moe.experts.35.w2", "model.layers.44.block_sparse_moe.experts.36.w2", "model.layers.44.block_sparse_moe.experts.37.w2", "model.layers.44.block_sparse_moe.experts.38.w2", "model.layers.44.block_sparse_moe.experts.39.w2", "model.layers.44.block_sparse_moe.experts.40.w2", "model.layers.44.block_sparse_moe.experts.41.w2", "model.layers.44.block_sparse_moe.experts.42.w2", "model.layers.44.block_sparse_moe.experts.43.w2", "model.layers.44.block_sparse_moe.experts.44.w2", "model.layers.44.block_sparse_moe.experts.45.w2", "model.layers.44.block_sparse_moe.experts.46.w2", "model.layers.44.block_sparse_moe.experts.47.w2", "model.layers.44.block_sparse_moe.experts.48.w2", "model.layers.44.block_sparse_moe.experts.49.w2", "model.layers.44.block_sparse_moe.experts.50.w2", "model.layers.44.block_sparse_moe.experts.51.w2", "model.layers.44.block_sparse_moe.experts.52.w2", "model.layers.44.block_sparse_moe.experts.53.w2", "model.layers.44.block_sparse_moe.experts.54.w2", "model.layers.44.block_sparse_moe.experts.55.w2", "model.layers.44.block_sparse_moe.experts.56.w2", "model.layers.44.block_sparse_moe.experts.57.w2", "model.layers.44.block_sparse_moe.experts.58.w2", "model.layers.44.block_sparse_moe.experts.59.w2", "model.layers.44.block_sparse_moe.experts.60.w2", "model.layers.44.block_sparse_moe.experts.61.w2", "model.layers.44.block_sparse_moe.experts.62.w2", "model.layers.44.block_sparse_moe.experts.63.w2", "model.layers.44.block_sparse_moe.experts.64.w2", "model.layers.44.block_sparse_moe.experts.65.w2", "model.layers.44.block_sparse_moe.experts.66.w2", "model.layers.44.block_sparse_moe.experts.67.w2", "model.layers.44.block_sparse_moe.experts.68.w2", "model.layers.44.block_sparse_moe.experts.69.w2", "model.layers.44.block_sparse_moe.experts.70.w2", "model.layers.44.block_sparse_moe.experts.71.w2", "model.layers.44.block_sparse_moe.experts.72.w2", "model.layers.44.block_sparse_moe.experts.73.w2", "model.layers.44.block_sparse_moe.experts.74.w2", "model.layers.44.block_sparse_moe.experts.75.w2", "model.layers.44.block_sparse_moe.experts.76.w2", "model.layers.44.block_sparse_moe.experts.77.w2", "model.layers.44.block_sparse_moe.experts.78.w2", "model.layers.44.block_sparse_moe.experts.79.w2", "model.layers.44.block_sparse_moe.experts.80.w2", "model.layers.44.block_sparse_moe.experts.81.w2", "model.layers.44.block_sparse_moe.experts.82.w2", "model.layers.44.block_sparse_moe.experts.83.w2", "model.layers.44.block_sparse_moe.experts.84.w2", "model.layers.44.block_sparse_moe.experts.85.w2", "model.layers.44.block_sparse_moe.experts.86.w2", "model.layers.44.block_sparse_moe.experts.87.w2", "model.layers.44.block_sparse_moe.experts.88.w2", "model.layers.44.block_sparse_moe.experts.89.w2", "model.layers.44.block_sparse_moe.experts.90.w2", "model.layers.44.block_sparse_moe.experts.91.w2", "model.layers.44.block_sparse_moe.experts.92.w2", "model.layers.44.block_sparse_moe.experts.93.w2", "model.layers.44.block_sparse_moe.experts.94.w2", "model.layers.44.block_sparse_moe.experts.95.w2", "model.layers.44.block_sparse_moe.experts.96.w2", "model.layers.44.block_sparse_moe.experts.97.w2", "model.layers.44.block_sparse_moe.experts.98.w2", "model.layers.44.block_sparse_moe.experts.99.w2", "model.layers.44.block_sparse_moe.experts.100.w2", "model.layers.44.block_sparse_moe.experts.101.w2", "model.layers.44.block_sparse_moe.experts.102.w2", "model.layers.44.block_sparse_moe.experts.103.w2", "model.layers.44.block_sparse_moe.experts.104.w2", "model.layers.44.block_sparse_moe.experts.105.w2", "model.layers.44.block_sparse_moe.experts.106.w2", "model.layers.44.block_sparse_moe.experts.107.w2", "model.layers.44.block_sparse_moe.experts.108.w2", "model.layers.44.block_sparse_moe.experts.109.w2", "model.layers.44.block_sparse_moe.experts.110.w2", "model.layers.44.block_sparse_moe.experts.111.w2", "model.layers.44.block_sparse_moe.experts.112.w2", "model.layers.44.block_sparse_moe.experts.113.w2", "model.layers.44.block_sparse_moe.experts.114.w2", "model.layers.44.block_sparse_moe.experts.115.w2", "model.layers.44.block_sparse_moe.experts.116.w2", "model.layers.44.block_sparse_moe.experts.117.w2", "model.layers.44.block_sparse_moe.experts.118.w2", "model.layers.44.block_sparse_moe.experts.119.w2", "model.layers.44.block_sparse_moe.experts.120.w2", "model.layers.44.block_sparse_moe.experts.121.w2", "model.layers.44.block_sparse_moe.experts.122.w2", "model.layers.44.block_sparse_moe.experts.123.w2", "model.layers.44.block_sparse_moe.experts.124.w2", "model.layers.44.block_sparse_moe.experts.125.w2", "model.layers.44.block_sparse_moe.experts.126.w2", "model.layers.44.block_sparse_moe.experts.127.w2", "model.layers.44.block_sparse_moe.experts.128.w2", "model.layers.44.block_sparse_moe.experts.129.w2", "model.layers.44.block_sparse_moe.experts.130.w2", "model.layers.44.block_sparse_moe.experts.131.w2", "model.layers.44.block_sparse_moe.experts.132.w2", "model.layers.44.block_sparse_moe.experts.133.w2", "model.layers.44.block_sparse_moe.experts.134.w2", "model.layers.44.block_sparse_moe.experts.135.w2", "model.layers.44.block_sparse_moe.experts.136.w2", "model.layers.44.block_sparse_moe.experts.137.w2", "model.layers.44.block_sparse_moe.experts.138.w2", "model.layers.44.block_sparse_moe.experts.139.w2", "model.layers.44.block_sparse_moe.experts.140.w2", "model.layers.44.block_sparse_moe.experts.141.w2", "model.layers.44.block_sparse_moe.experts.142.w2", "model.layers.44.block_sparse_moe.experts.143.w2", "model.layers.44.block_sparse_moe.experts.144.w2", "model.layers.44.block_sparse_moe.experts.145.w2", "model.layers.44.block_sparse_moe.experts.146.w2", "model.layers.44.block_sparse_moe.experts.147.w2", "model.layers.44.block_sparse_moe.experts.148.w2", "model.layers.44.block_sparse_moe.experts.149.w2", "model.layers.44.block_sparse_moe.experts.150.w2", "model.layers.44.block_sparse_moe.experts.151.w2", "model.layers.44.block_sparse_moe.experts.152.w2", "model.layers.44.block_sparse_moe.experts.153.w2", "model.layers.44.block_sparse_moe.experts.154.w2", "model.layers.44.block_sparse_moe.experts.155.w2", "model.layers.44.block_sparse_moe.experts.156.w2", "model.layers.44.block_sparse_moe.experts.157.w2", "model.layers.44.block_sparse_moe.experts.158.w2", "model.layers.44.block_sparse_moe.experts.159.w2", "model.layers.44.block_sparse_moe.experts.160.w2", "model.layers.44.block_sparse_moe.experts.161.w2", "model.layers.44.block_sparse_moe.experts.162.w2", "model.layers.44.block_sparse_moe.experts.163.w2", "model.layers.44.block_sparse_moe.experts.164.w2", "model.layers.44.block_sparse_moe.experts.165.w2", "model.layers.44.block_sparse_moe.experts.166.w2", "model.layers.44.block_sparse_moe.experts.167.w2", "model.layers.44.block_sparse_moe.experts.168.w2", "model.layers.44.block_sparse_moe.experts.169.w2", "model.layers.44.block_sparse_moe.experts.170.w2", "model.layers.44.block_sparse_moe.experts.171.w2", "model.layers.44.block_sparse_moe.experts.172.w2", "model.layers.44.block_sparse_moe.experts.173.w2", "model.layers.44.block_sparse_moe.experts.174.w2", "model.layers.44.block_sparse_moe.experts.175.w2", "model.layers.44.block_sparse_moe.experts.176.w2", "model.layers.44.block_sparse_moe.experts.177.w2", "model.layers.44.block_sparse_moe.experts.178.w2", "model.layers.44.block_sparse_moe.experts.179.w2", "model.layers.44.block_sparse_moe.experts.180.w2", "model.layers.44.block_sparse_moe.experts.181.w2", "model.layers.44.block_sparse_moe.experts.182.w2", "model.layers.44.block_sparse_moe.experts.183.w2", "model.layers.44.block_sparse_moe.experts.184.w2", "model.layers.44.block_sparse_moe.experts.185.w2", "model.layers.44.block_sparse_moe.experts.186.w2", "model.layers.44.block_sparse_moe.experts.187.w2", "model.layers.44.block_sparse_moe.experts.188.w2", "model.layers.44.block_sparse_moe.experts.189.w2", "model.layers.44.block_sparse_moe.experts.190.w2", "model.layers.44.block_sparse_moe.experts.191.w2", "model.layers.44.block_sparse_moe.experts.192.w2", "model.layers.44.block_sparse_moe.experts.193.w2", "model.layers.44.block_sparse_moe.experts.194.w2", "model.layers.44.block_sparse_moe.experts.195.w2", "model.layers.44.block_sparse_moe.experts.196.w2", "model.layers.44.block_sparse_moe.experts.197.w2", "model.layers.44.block_sparse_moe.experts.198.w2", "model.layers.44.block_sparse_moe.experts.199.w2", "model.layers.44.block_sparse_moe.experts.200.w2", "model.layers.44.block_sparse_moe.experts.201.w2", "model.layers.44.block_sparse_moe.experts.202.w2", "model.layers.44.block_sparse_moe.experts.203.w2", "model.layers.44.block_sparse_moe.experts.204.w2", "model.layers.44.block_sparse_moe.experts.205.w2", "model.layers.44.block_sparse_moe.experts.206.w2", "model.layers.44.block_sparse_moe.experts.207.w2", "model.layers.44.block_sparse_moe.experts.208.w2", "model.layers.44.block_sparse_moe.experts.209.w2", "model.layers.44.block_sparse_moe.experts.210.w2", "model.layers.44.block_sparse_moe.experts.211.w2", "model.layers.44.block_sparse_moe.experts.212.w2", "model.layers.44.block_sparse_moe.experts.213.w2", "model.layers.44.block_sparse_moe.experts.214.w2", "model.layers.44.block_sparse_moe.experts.215.w2", "model.layers.44.block_sparse_moe.experts.216.w2", "model.layers.44.block_sparse_moe.experts.217.w2", "model.layers.44.block_sparse_moe.experts.218.w2", "model.layers.44.block_sparse_moe.experts.219.w2", "model.layers.44.block_sparse_moe.experts.220.w2", "model.layers.44.block_sparse_moe.experts.221.w2", "model.layers.44.block_sparse_moe.experts.222.w2", "model.layers.44.block_sparse_moe.experts.223.w2", "model.layers.44.block_sparse_moe.experts.224.w2", "model.layers.44.block_sparse_moe.experts.225.w2", "model.layers.44.block_sparse_moe.experts.226.w2", "model.layers.44.block_sparse_moe.experts.227.w2", "model.layers.44.block_sparse_moe.experts.228.w2", "model.layers.44.block_sparse_moe.experts.229.w2", "model.layers.44.block_sparse_moe.experts.230.w2", "model.layers.44.block_sparse_moe.experts.231.w2", "model.layers.44.block_sparse_moe.experts.232.w2", "model.layers.44.block_sparse_moe.experts.233.w2", "model.layers.44.block_sparse_moe.experts.234.w2", "model.layers.44.block_sparse_moe.experts.235.w2", "model.layers.44.block_sparse_moe.experts.236.w2", "model.layers.44.block_sparse_moe.experts.237.w2", "model.layers.44.block_sparse_moe.experts.238.w2", "model.layers.44.block_sparse_moe.experts.239.w2", "model.layers.44.block_sparse_moe.experts.240.w2", "model.layers.44.block_sparse_moe.experts.241.w2", "model.layers.44.block_sparse_moe.experts.242.w2", "model.layers.44.block_sparse_moe.experts.243.w2", "model.layers.44.block_sparse_moe.experts.244.w2", "model.layers.44.block_sparse_moe.experts.245.w2", "model.layers.44.block_sparse_moe.experts.246.w2", "model.layers.44.block_sparse_moe.experts.247.w2", "model.layers.44.block_sparse_moe.experts.248.w2", "model.layers.44.block_sparse_moe.experts.249.w2", "model.layers.44.block_sparse_moe.experts.250.w2", "model.layers.44.block_sparse_moe.experts.251.w2", "model.layers.44.block_sparse_moe.experts.252.w2", "model.layers.44.block_sparse_moe.experts.253.w2", "model.layers.44.block_sparse_moe.experts.254.w2", "model.layers.44.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -6.0755759477615356e-05, "dbits": 3623878656 } ] }, { "idx": 90, "layers": [ "model.layers.45.self_attn.q_proj", "model.layers.45.self_attn.k_proj", "model.layers.45.self_attn.v_proj", "model.layers.45.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004110577329993259, "dbits": 44040192 } ] }, { "idx": 91, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w1", "model.layers.45.block_sparse_moe.experts.1.w1", "model.layers.45.block_sparse_moe.experts.2.w1", "model.layers.45.block_sparse_moe.experts.3.w1", "model.layers.45.block_sparse_moe.experts.4.w1", "model.layers.45.block_sparse_moe.experts.5.w1", "model.layers.45.block_sparse_moe.experts.6.w1", "model.layers.45.block_sparse_moe.experts.7.w1", "model.layers.45.block_sparse_moe.experts.8.w1", "model.layers.45.block_sparse_moe.experts.9.w1", "model.layers.45.block_sparse_moe.experts.10.w1", "model.layers.45.block_sparse_moe.experts.11.w1", "model.layers.45.block_sparse_moe.experts.12.w1", "model.layers.45.block_sparse_moe.experts.13.w1", "model.layers.45.block_sparse_moe.experts.14.w1", "model.layers.45.block_sparse_moe.experts.15.w1", "model.layers.45.block_sparse_moe.experts.16.w1", "model.layers.45.block_sparse_moe.experts.17.w1", "model.layers.45.block_sparse_moe.experts.18.w1", "model.layers.45.block_sparse_moe.experts.19.w1", "model.layers.45.block_sparse_moe.experts.20.w1", "model.layers.45.block_sparse_moe.experts.21.w1", "model.layers.45.block_sparse_moe.experts.22.w1", "model.layers.45.block_sparse_moe.experts.23.w1", "model.layers.45.block_sparse_moe.experts.24.w1", "model.layers.45.block_sparse_moe.experts.25.w1", "model.layers.45.block_sparse_moe.experts.26.w1", "model.layers.45.block_sparse_moe.experts.27.w1", "model.layers.45.block_sparse_moe.experts.28.w1", "model.layers.45.block_sparse_moe.experts.29.w1", "model.layers.45.block_sparse_moe.experts.30.w1", "model.layers.45.block_sparse_moe.experts.31.w1", "model.layers.45.block_sparse_moe.experts.32.w1", "model.layers.45.block_sparse_moe.experts.33.w1", "model.layers.45.block_sparse_moe.experts.34.w1", "model.layers.45.block_sparse_moe.experts.35.w1", "model.layers.45.block_sparse_moe.experts.36.w1", "model.layers.45.block_sparse_moe.experts.37.w1", "model.layers.45.block_sparse_moe.experts.38.w1", "model.layers.45.block_sparse_moe.experts.39.w1", "model.layers.45.block_sparse_moe.experts.40.w1", "model.layers.45.block_sparse_moe.experts.41.w1", "model.layers.45.block_sparse_moe.experts.42.w1", "model.layers.45.block_sparse_moe.experts.43.w1", "model.layers.45.block_sparse_moe.experts.44.w1", "model.layers.45.block_sparse_moe.experts.45.w1", "model.layers.45.block_sparse_moe.experts.46.w1", "model.layers.45.block_sparse_moe.experts.47.w1", "model.layers.45.block_sparse_moe.experts.48.w1", "model.layers.45.block_sparse_moe.experts.49.w1", "model.layers.45.block_sparse_moe.experts.50.w1", "model.layers.45.block_sparse_moe.experts.51.w1", "model.layers.45.block_sparse_moe.experts.52.w1", "model.layers.45.block_sparse_moe.experts.53.w1", "model.layers.45.block_sparse_moe.experts.54.w1", "model.layers.45.block_sparse_moe.experts.55.w1", "model.layers.45.block_sparse_moe.experts.56.w1", "model.layers.45.block_sparse_moe.experts.57.w1", "model.layers.45.block_sparse_moe.experts.58.w1", "model.layers.45.block_sparse_moe.experts.59.w1", "model.layers.45.block_sparse_moe.experts.60.w1", "model.layers.45.block_sparse_moe.experts.61.w1", "model.layers.45.block_sparse_moe.experts.62.w1", "model.layers.45.block_sparse_moe.experts.63.w1", "model.layers.45.block_sparse_moe.experts.64.w1", "model.layers.45.block_sparse_moe.experts.65.w1", "model.layers.45.block_sparse_moe.experts.66.w1", "model.layers.45.block_sparse_moe.experts.67.w1", "model.layers.45.block_sparse_moe.experts.68.w1", "model.layers.45.block_sparse_moe.experts.69.w1", "model.layers.45.block_sparse_moe.experts.70.w1", "model.layers.45.block_sparse_moe.experts.71.w1", "model.layers.45.block_sparse_moe.experts.72.w1", "model.layers.45.block_sparse_moe.experts.73.w1", "model.layers.45.block_sparse_moe.experts.74.w1", "model.layers.45.block_sparse_moe.experts.75.w1", "model.layers.45.block_sparse_moe.experts.76.w1", "model.layers.45.block_sparse_moe.experts.77.w1", "model.layers.45.block_sparse_moe.experts.78.w1", "model.layers.45.block_sparse_moe.experts.79.w1", "model.layers.45.block_sparse_moe.experts.80.w1", "model.layers.45.block_sparse_moe.experts.81.w1", "model.layers.45.block_sparse_moe.experts.82.w1", "model.layers.45.block_sparse_moe.experts.83.w1", "model.layers.45.block_sparse_moe.experts.84.w1", "model.layers.45.block_sparse_moe.experts.85.w1", "model.layers.45.block_sparse_moe.experts.86.w1", "model.layers.45.block_sparse_moe.experts.87.w1", "model.layers.45.block_sparse_moe.experts.88.w1", "model.layers.45.block_sparse_moe.experts.89.w1", "model.layers.45.block_sparse_moe.experts.90.w1", "model.layers.45.block_sparse_moe.experts.91.w1", "model.layers.45.block_sparse_moe.experts.92.w1", "model.layers.45.block_sparse_moe.experts.93.w1", "model.layers.45.block_sparse_moe.experts.94.w1", "model.layers.45.block_sparse_moe.experts.95.w1", "model.layers.45.block_sparse_moe.experts.96.w1", "model.layers.45.block_sparse_moe.experts.97.w1", "model.layers.45.block_sparse_moe.experts.98.w1", "model.layers.45.block_sparse_moe.experts.99.w1", "model.layers.45.block_sparse_moe.experts.100.w1", "model.layers.45.block_sparse_moe.experts.101.w1", "model.layers.45.block_sparse_moe.experts.102.w1", "model.layers.45.block_sparse_moe.experts.103.w1", "model.layers.45.block_sparse_moe.experts.104.w1", "model.layers.45.block_sparse_moe.experts.105.w1", "model.layers.45.block_sparse_moe.experts.106.w1", "model.layers.45.block_sparse_moe.experts.107.w1", "model.layers.45.block_sparse_moe.experts.108.w1", "model.layers.45.block_sparse_moe.experts.109.w1", "model.layers.45.block_sparse_moe.experts.110.w1", "model.layers.45.block_sparse_moe.experts.111.w1", "model.layers.45.block_sparse_moe.experts.112.w1", "model.layers.45.block_sparse_moe.experts.113.w1", "model.layers.45.block_sparse_moe.experts.114.w1", "model.layers.45.block_sparse_moe.experts.115.w1", "model.layers.45.block_sparse_moe.experts.116.w1", "model.layers.45.block_sparse_moe.experts.117.w1", "model.layers.45.block_sparse_moe.experts.118.w1", "model.layers.45.block_sparse_moe.experts.119.w1", "model.layers.45.block_sparse_moe.experts.120.w1", "model.layers.45.block_sparse_moe.experts.121.w1", "model.layers.45.block_sparse_moe.experts.122.w1", "model.layers.45.block_sparse_moe.experts.123.w1", "model.layers.45.block_sparse_moe.experts.124.w1", "model.layers.45.block_sparse_moe.experts.125.w1", "model.layers.45.block_sparse_moe.experts.126.w1", "model.layers.45.block_sparse_moe.experts.127.w1", "model.layers.45.block_sparse_moe.experts.128.w1", "model.layers.45.block_sparse_moe.experts.129.w1", "model.layers.45.block_sparse_moe.experts.130.w1", "model.layers.45.block_sparse_moe.experts.131.w1", "model.layers.45.block_sparse_moe.experts.132.w1", "model.layers.45.block_sparse_moe.experts.133.w1", "model.layers.45.block_sparse_moe.experts.134.w1", "model.layers.45.block_sparse_moe.experts.135.w1", "model.layers.45.block_sparse_moe.experts.136.w1", "model.layers.45.block_sparse_moe.experts.137.w1", "model.layers.45.block_sparse_moe.experts.138.w1", "model.layers.45.block_sparse_moe.experts.139.w1", "model.layers.45.block_sparse_moe.experts.140.w1", "model.layers.45.block_sparse_moe.experts.141.w1", "model.layers.45.block_sparse_moe.experts.142.w1", "model.layers.45.block_sparse_moe.experts.143.w1", "model.layers.45.block_sparse_moe.experts.144.w1", "model.layers.45.block_sparse_moe.experts.145.w1", "model.layers.45.block_sparse_moe.experts.146.w1", "model.layers.45.block_sparse_moe.experts.147.w1", "model.layers.45.block_sparse_moe.experts.148.w1", "model.layers.45.block_sparse_moe.experts.149.w1", "model.layers.45.block_sparse_moe.experts.150.w1", "model.layers.45.block_sparse_moe.experts.151.w1", "model.layers.45.block_sparse_moe.experts.152.w1", "model.layers.45.block_sparse_moe.experts.153.w1", "model.layers.45.block_sparse_moe.experts.154.w1", "model.layers.45.block_sparse_moe.experts.155.w1", "model.layers.45.block_sparse_moe.experts.156.w1", "model.layers.45.block_sparse_moe.experts.157.w1", "model.layers.45.block_sparse_moe.experts.158.w1", "model.layers.45.block_sparse_moe.experts.159.w1", "model.layers.45.block_sparse_moe.experts.160.w1", "model.layers.45.block_sparse_moe.experts.161.w1", "model.layers.45.block_sparse_moe.experts.162.w1", "model.layers.45.block_sparse_moe.experts.163.w1", "model.layers.45.block_sparse_moe.experts.164.w1", "model.layers.45.block_sparse_moe.experts.165.w1", "model.layers.45.block_sparse_moe.experts.166.w1", "model.layers.45.block_sparse_moe.experts.167.w1", "model.layers.45.block_sparse_moe.experts.168.w1", "model.layers.45.block_sparse_moe.experts.169.w1", "model.layers.45.block_sparse_moe.experts.170.w1", "model.layers.45.block_sparse_moe.experts.171.w1", "model.layers.45.block_sparse_moe.experts.172.w1", "model.layers.45.block_sparse_moe.experts.173.w1", "model.layers.45.block_sparse_moe.experts.174.w1", "model.layers.45.block_sparse_moe.experts.175.w1", "model.layers.45.block_sparse_moe.experts.176.w1", "model.layers.45.block_sparse_moe.experts.177.w1", "model.layers.45.block_sparse_moe.experts.178.w1", "model.layers.45.block_sparse_moe.experts.179.w1", "model.layers.45.block_sparse_moe.experts.180.w1", "model.layers.45.block_sparse_moe.experts.181.w1", "model.layers.45.block_sparse_moe.experts.182.w1", "model.layers.45.block_sparse_moe.experts.183.w1", "model.layers.45.block_sparse_moe.experts.184.w1", "model.layers.45.block_sparse_moe.experts.185.w1", "model.layers.45.block_sparse_moe.experts.186.w1", "model.layers.45.block_sparse_moe.experts.187.w1", "model.layers.45.block_sparse_moe.experts.188.w1", "model.layers.45.block_sparse_moe.experts.189.w1", "model.layers.45.block_sparse_moe.experts.190.w1", "model.layers.45.block_sparse_moe.experts.191.w1", "model.layers.45.block_sparse_moe.experts.192.w1", "model.layers.45.block_sparse_moe.experts.193.w1", "model.layers.45.block_sparse_moe.experts.194.w1", "model.layers.45.block_sparse_moe.experts.195.w1", "model.layers.45.block_sparse_moe.experts.196.w1", "model.layers.45.block_sparse_moe.experts.197.w1", "model.layers.45.block_sparse_moe.experts.198.w1", "model.layers.45.block_sparse_moe.experts.199.w1", "model.layers.45.block_sparse_moe.experts.200.w1", "model.layers.45.block_sparse_moe.experts.201.w1", "model.layers.45.block_sparse_moe.experts.202.w1", "model.layers.45.block_sparse_moe.experts.203.w1", "model.layers.45.block_sparse_moe.experts.204.w1", "model.layers.45.block_sparse_moe.experts.205.w1", "model.layers.45.block_sparse_moe.experts.206.w1", "model.layers.45.block_sparse_moe.experts.207.w1", "model.layers.45.block_sparse_moe.experts.208.w1", "model.layers.45.block_sparse_moe.experts.209.w1", "model.layers.45.block_sparse_moe.experts.210.w1", "model.layers.45.block_sparse_moe.experts.211.w1", "model.layers.45.block_sparse_moe.experts.212.w1", "model.layers.45.block_sparse_moe.experts.213.w1", "model.layers.45.block_sparse_moe.experts.214.w1", "model.layers.45.block_sparse_moe.experts.215.w1", "model.layers.45.block_sparse_moe.experts.216.w1", "model.layers.45.block_sparse_moe.experts.217.w1", "model.layers.45.block_sparse_moe.experts.218.w1", "model.layers.45.block_sparse_moe.experts.219.w1", "model.layers.45.block_sparse_moe.experts.220.w1", "model.layers.45.block_sparse_moe.experts.221.w1", "model.layers.45.block_sparse_moe.experts.222.w1", "model.layers.45.block_sparse_moe.experts.223.w1", "model.layers.45.block_sparse_moe.experts.224.w1", "model.layers.45.block_sparse_moe.experts.225.w1", "model.layers.45.block_sparse_moe.experts.226.w1", "model.layers.45.block_sparse_moe.experts.227.w1", "model.layers.45.block_sparse_moe.experts.228.w1", "model.layers.45.block_sparse_moe.experts.229.w1", "model.layers.45.block_sparse_moe.experts.230.w1", "model.layers.45.block_sparse_moe.experts.231.w1", "model.layers.45.block_sparse_moe.experts.232.w1", "model.layers.45.block_sparse_moe.experts.233.w1", "model.layers.45.block_sparse_moe.experts.234.w1", "model.layers.45.block_sparse_moe.experts.235.w1", "model.layers.45.block_sparse_moe.experts.236.w1", "model.layers.45.block_sparse_moe.experts.237.w1", "model.layers.45.block_sparse_moe.experts.238.w1", "model.layers.45.block_sparse_moe.experts.239.w1", "model.layers.45.block_sparse_moe.experts.240.w1", "model.layers.45.block_sparse_moe.experts.241.w1", "model.layers.45.block_sparse_moe.experts.242.w1", "model.layers.45.block_sparse_moe.experts.243.w1", "model.layers.45.block_sparse_moe.experts.244.w1", "model.layers.45.block_sparse_moe.experts.245.w1", "model.layers.45.block_sparse_moe.experts.246.w1", "model.layers.45.block_sparse_moe.experts.247.w1", "model.layers.45.block_sparse_moe.experts.248.w1", "model.layers.45.block_sparse_moe.experts.249.w1", "model.layers.45.block_sparse_moe.experts.250.w1", "model.layers.45.block_sparse_moe.experts.251.w1", "model.layers.45.block_sparse_moe.experts.252.w1", "model.layers.45.block_sparse_moe.experts.253.w1", "model.layers.45.block_sparse_moe.experts.254.w1", "model.layers.45.block_sparse_moe.experts.255.w1", "model.layers.45.block_sparse_moe.experts.0.w3", "model.layers.45.block_sparse_moe.experts.1.w3", "model.layers.45.block_sparse_moe.experts.2.w3", "model.layers.45.block_sparse_moe.experts.3.w3", "model.layers.45.block_sparse_moe.experts.4.w3", "model.layers.45.block_sparse_moe.experts.5.w3", "model.layers.45.block_sparse_moe.experts.6.w3", "model.layers.45.block_sparse_moe.experts.7.w3", "model.layers.45.block_sparse_moe.experts.8.w3", "model.layers.45.block_sparse_moe.experts.9.w3", "model.layers.45.block_sparse_moe.experts.10.w3", "model.layers.45.block_sparse_moe.experts.11.w3", "model.layers.45.block_sparse_moe.experts.12.w3", "model.layers.45.block_sparse_moe.experts.13.w3", "model.layers.45.block_sparse_moe.experts.14.w3", "model.layers.45.block_sparse_moe.experts.15.w3", "model.layers.45.block_sparse_moe.experts.16.w3", "model.layers.45.block_sparse_moe.experts.17.w3", "model.layers.45.block_sparse_moe.experts.18.w3", "model.layers.45.block_sparse_moe.experts.19.w3", "model.layers.45.block_sparse_moe.experts.20.w3", "model.layers.45.block_sparse_moe.experts.21.w3", "model.layers.45.block_sparse_moe.experts.22.w3", "model.layers.45.block_sparse_moe.experts.23.w3", "model.layers.45.block_sparse_moe.experts.24.w3", "model.layers.45.block_sparse_moe.experts.25.w3", "model.layers.45.block_sparse_moe.experts.26.w3", "model.layers.45.block_sparse_moe.experts.27.w3", "model.layers.45.block_sparse_moe.experts.28.w3", "model.layers.45.block_sparse_moe.experts.29.w3", "model.layers.45.block_sparse_moe.experts.30.w3", "model.layers.45.block_sparse_moe.experts.31.w3", "model.layers.45.block_sparse_moe.experts.32.w3", "model.layers.45.block_sparse_moe.experts.33.w3", "model.layers.45.block_sparse_moe.experts.34.w3", "model.layers.45.block_sparse_moe.experts.35.w3", "model.layers.45.block_sparse_moe.experts.36.w3", "model.layers.45.block_sparse_moe.experts.37.w3", "model.layers.45.block_sparse_moe.experts.38.w3", "model.layers.45.block_sparse_moe.experts.39.w3", "model.layers.45.block_sparse_moe.experts.40.w3", "model.layers.45.block_sparse_moe.experts.41.w3", "model.layers.45.block_sparse_moe.experts.42.w3", "model.layers.45.block_sparse_moe.experts.43.w3", "model.layers.45.block_sparse_moe.experts.44.w3", "model.layers.45.block_sparse_moe.experts.45.w3", "model.layers.45.block_sparse_moe.experts.46.w3", "model.layers.45.block_sparse_moe.experts.47.w3", "model.layers.45.block_sparse_moe.experts.48.w3", "model.layers.45.block_sparse_moe.experts.49.w3", "model.layers.45.block_sparse_moe.experts.50.w3", "model.layers.45.block_sparse_moe.experts.51.w3", "model.layers.45.block_sparse_moe.experts.52.w3", "model.layers.45.block_sparse_moe.experts.53.w3", "model.layers.45.block_sparse_moe.experts.54.w3", "model.layers.45.block_sparse_moe.experts.55.w3", "model.layers.45.block_sparse_moe.experts.56.w3", "model.layers.45.block_sparse_moe.experts.57.w3", "model.layers.45.block_sparse_moe.experts.58.w3", "model.layers.45.block_sparse_moe.experts.59.w3", "model.layers.45.block_sparse_moe.experts.60.w3", "model.layers.45.block_sparse_moe.experts.61.w3", "model.layers.45.block_sparse_moe.experts.62.w3", "model.layers.45.block_sparse_moe.experts.63.w3", "model.layers.45.block_sparse_moe.experts.64.w3", "model.layers.45.block_sparse_moe.experts.65.w3", "model.layers.45.block_sparse_moe.experts.66.w3", "model.layers.45.block_sparse_moe.experts.67.w3", "model.layers.45.block_sparse_moe.experts.68.w3", "model.layers.45.block_sparse_moe.experts.69.w3", "model.layers.45.block_sparse_moe.experts.70.w3", "model.layers.45.block_sparse_moe.experts.71.w3", "model.layers.45.block_sparse_moe.experts.72.w3", "model.layers.45.block_sparse_moe.experts.73.w3", "model.layers.45.block_sparse_moe.experts.74.w3", "model.layers.45.block_sparse_moe.experts.75.w3", "model.layers.45.block_sparse_moe.experts.76.w3", "model.layers.45.block_sparse_moe.experts.77.w3", "model.layers.45.block_sparse_moe.experts.78.w3", "model.layers.45.block_sparse_moe.experts.79.w3", "model.layers.45.block_sparse_moe.experts.80.w3", "model.layers.45.block_sparse_moe.experts.81.w3", "model.layers.45.block_sparse_moe.experts.82.w3", "model.layers.45.block_sparse_moe.experts.83.w3", "model.layers.45.block_sparse_moe.experts.84.w3", "model.layers.45.block_sparse_moe.experts.85.w3", "model.layers.45.block_sparse_moe.experts.86.w3", "model.layers.45.block_sparse_moe.experts.87.w3", "model.layers.45.block_sparse_moe.experts.88.w3", "model.layers.45.block_sparse_moe.experts.89.w3", "model.layers.45.block_sparse_moe.experts.90.w3", "model.layers.45.block_sparse_moe.experts.91.w3", "model.layers.45.block_sparse_moe.experts.92.w3", "model.layers.45.block_sparse_moe.experts.93.w3", "model.layers.45.block_sparse_moe.experts.94.w3", "model.layers.45.block_sparse_moe.experts.95.w3", "model.layers.45.block_sparse_moe.experts.96.w3", "model.layers.45.block_sparse_moe.experts.97.w3", "model.layers.45.block_sparse_moe.experts.98.w3", "model.layers.45.block_sparse_moe.experts.99.w3", "model.layers.45.block_sparse_moe.experts.100.w3", "model.layers.45.block_sparse_moe.experts.101.w3", "model.layers.45.block_sparse_moe.experts.102.w3", "model.layers.45.block_sparse_moe.experts.103.w3", "model.layers.45.block_sparse_moe.experts.104.w3", "model.layers.45.block_sparse_moe.experts.105.w3", "model.layers.45.block_sparse_moe.experts.106.w3", "model.layers.45.block_sparse_moe.experts.107.w3", "model.layers.45.block_sparse_moe.experts.108.w3", "model.layers.45.block_sparse_moe.experts.109.w3", "model.layers.45.block_sparse_moe.experts.110.w3", "model.layers.45.block_sparse_moe.experts.111.w3", "model.layers.45.block_sparse_moe.experts.112.w3", "model.layers.45.block_sparse_moe.experts.113.w3", "model.layers.45.block_sparse_moe.experts.114.w3", "model.layers.45.block_sparse_moe.experts.115.w3", "model.layers.45.block_sparse_moe.experts.116.w3", "model.layers.45.block_sparse_moe.experts.117.w3", "model.layers.45.block_sparse_moe.experts.118.w3", "model.layers.45.block_sparse_moe.experts.119.w3", "model.layers.45.block_sparse_moe.experts.120.w3", "model.layers.45.block_sparse_moe.experts.121.w3", "model.layers.45.block_sparse_moe.experts.122.w3", "model.layers.45.block_sparse_moe.experts.123.w3", "model.layers.45.block_sparse_moe.experts.124.w3", "model.layers.45.block_sparse_moe.experts.125.w3", "model.layers.45.block_sparse_moe.experts.126.w3", "model.layers.45.block_sparse_moe.experts.127.w3", "model.layers.45.block_sparse_moe.experts.128.w3", "model.layers.45.block_sparse_moe.experts.129.w3", "model.layers.45.block_sparse_moe.experts.130.w3", "model.layers.45.block_sparse_moe.experts.131.w3", "model.layers.45.block_sparse_moe.experts.132.w3", "model.layers.45.block_sparse_moe.experts.133.w3", "model.layers.45.block_sparse_moe.experts.134.w3", "model.layers.45.block_sparse_moe.experts.135.w3", "model.layers.45.block_sparse_moe.experts.136.w3", "model.layers.45.block_sparse_moe.experts.137.w3", "model.layers.45.block_sparse_moe.experts.138.w3", "model.layers.45.block_sparse_moe.experts.139.w3", "model.layers.45.block_sparse_moe.experts.140.w3", "model.layers.45.block_sparse_moe.experts.141.w3", "model.layers.45.block_sparse_moe.experts.142.w3", "model.layers.45.block_sparse_moe.experts.143.w3", "model.layers.45.block_sparse_moe.experts.144.w3", "model.layers.45.block_sparse_moe.experts.145.w3", "model.layers.45.block_sparse_moe.experts.146.w3", "model.layers.45.block_sparse_moe.experts.147.w3", "model.layers.45.block_sparse_moe.experts.148.w3", "model.layers.45.block_sparse_moe.experts.149.w3", "model.layers.45.block_sparse_moe.experts.150.w3", "model.layers.45.block_sparse_moe.experts.151.w3", "model.layers.45.block_sparse_moe.experts.152.w3", "model.layers.45.block_sparse_moe.experts.153.w3", "model.layers.45.block_sparse_moe.experts.154.w3", "model.layers.45.block_sparse_moe.experts.155.w3", "model.layers.45.block_sparse_moe.experts.156.w3", "model.layers.45.block_sparse_moe.experts.157.w3", "model.layers.45.block_sparse_moe.experts.158.w3", "model.layers.45.block_sparse_moe.experts.159.w3", "model.layers.45.block_sparse_moe.experts.160.w3", "model.layers.45.block_sparse_moe.experts.161.w3", "model.layers.45.block_sparse_moe.experts.162.w3", "model.layers.45.block_sparse_moe.experts.163.w3", "model.layers.45.block_sparse_moe.experts.164.w3", "model.layers.45.block_sparse_moe.experts.165.w3", "model.layers.45.block_sparse_moe.experts.166.w3", "model.layers.45.block_sparse_moe.experts.167.w3", "model.layers.45.block_sparse_moe.experts.168.w3", "model.layers.45.block_sparse_moe.experts.169.w3", "model.layers.45.block_sparse_moe.experts.170.w3", "model.layers.45.block_sparse_moe.experts.171.w3", "model.layers.45.block_sparse_moe.experts.172.w3", "model.layers.45.block_sparse_moe.experts.173.w3", "model.layers.45.block_sparse_moe.experts.174.w3", "model.layers.45.block_sparse_moe.experts.175.w3", "model.layers.45.block_sparse_moe.experts.176.w3", "model.layers.45.block_sparse_moe.experts.177.w3", "model.layers.45.block_sparse_moe.experts.178.w3", "model.layers.45.block_sparse_moe.experts.179.w3", "model.layers.45.block_sparse_moe.experts.180.w3", "model.layers.45.block_sparse_moe.experts.181.w3", "model.layers.45.block_sparse_moe.experts.182.w3", "model.layers.45.block_sparse_moe.experts.183.w3", "model.layers.45.block_sparse_moe.experts.184.w3", "model.layers.45.block_sparse_moe.experts.185.w3", "model.layers.45.block_sparse_moe.experts.186.w3", "model.layers.45.block_sparse_moe.experts.187.w3", "model.layers.45.block_sparse_moe.experts.188.w3", "model.layers.45.block_sparse_moe.experts.189.w3", "model.layers.45.block_sparse_moe.experts.190.w3", "model.layers.45.block_sparse_moe.experts.191.w3", "model.layers.45.block_sparse_moe.experts.192.w3", "model.layers.45.block_sparse_moe.experts.193.w3", "model.layers.45.block_sparse_moe.experts.194.w3", "model.layers.45.block_sparse_moe.experts.195.w3", "model.layers.45.block_sparse_moe.experts.196.w3", "model.layers.45.block_sparse_moe.experts.197.w3", "model.layers.45.block_sparse_moe.experts.198.w3", "model.layers.45.block_sparse_moe.experts.199.w3", "model.layers.45.block_sparse_moe.experts.200.w3", "model.layers.45.block_sparse_moe.experts.201.w3", "model.layers.45.block_sparse_moe.experts.202.w3", "model.layers.45.block_sparse_moe.experts.203.w3", "model.layers.45.block_sparse_moe.experts.204.w3", "model.layers.45.block_sparse_moe.experts.205.w3", "model.layers.45.block_sparse_moe.experts.206.w3", "model.layers.45.block_sparse_moe.experts.207.w3", "model.layers.45.block_sparse_moe.experts.208.w3", "model.layers.45.block_sparse_moe.experts.209.w3", "model.layers.45.block_sparse_moe.experts.210.w3", "model.layers.45.block_sparse_moe.experts.211.w3", "model.layers.45.block_sparse_moe.experts.212.w3", "model.layers.45.block_sparse_moe.experts.213.w3", "model.layers.45.block_sparse_moe.experts.214.w3", "model.layers.45.block_sparse_moe.experts.215.w3", "model.layers.45.block_sparse_moe.experts.216.w3", "model.layers.45.block_sparse_moe.experts.217.w3", "model.layers.45.block_sparse_moe.experts.218.w3", "model.layers.45.block_sparse_moe.experts.219.w3", "model.layers.45.block_sparse_moe.experts.220.w3", "model.layers.45.block_sparse_moe.experts.221.w3", "model.layers.45.block_sparse_moe.experts.222.w3", "model.layers.45.block_sparse_moe.experts.223.w3", "model.layers.45.block_sparse_moe.experts.224.w3", "model.layers.45.block_sparse_moe.experts.225.w3", "model.layers.45.block_sparse_moe.experts.226.w3", "model.layers.45.block_sparse_moe.experts.227.w3", "model.layers.45.block_sparse_moe.experts.228.w3", "model.layers.45.block_sparse_moe.experts.229.w3", "model.layers.45.block_sparse_moe.experts.230.w3", "model.layers.45.block_sparse_moe.experts.231.w3", "model.layers.45.block_sparse_moe.experts.232.w3", "model.layers.45.block_sparse_moe.experts.233.w3", "model.layers.45.block_sparse_moe.experts.234.w3", "model.layers.45.block_sparse_moe.experts.235.w3", "model.layers.45.block_sparse_moe.experts.236.w3", "model.layers.45.block_sparse_moe.experts.237.w3", "model.layers.45.block_sparse_moe.experts.238.w3", "model.layers.45.block_sparse_moe.experts.239.w3", "model.layers.45.block_sparse_moe.experts.240.w3", "model.layers.45.block_sparse_moe.experts.241.w3", "model.layers.45.block_sparse_moe.experts.242.w3", "model.layers.45.block_sparse_moe.experts.243.w3", "model.layers.45.block_sparse_moe.experts.244.w3", "model.layers.45.block_sparse_moe.experts.245.w3", "model.layers.45.block_sparse_moe.experts.246.w3", "model.layers.45.block_sparse_moe.experts.247.w3", "model.layers.45.block_sparse_moe.experts.248.w3", "model.layers.45.block_sparse_moe.experts.249.w3", "model.layers.45.block_sparse_moe.experts.250.w3", "model.layers.45.block_sparse_moe.experts.251.w3", "model.layers.45.block_sparse_moe.experts.252.w3", "model.layers.45.block_sparse_moe.experts.253.w3", "model.layers.45.block_sparse_moe.experts.254.w3", "model.layers.45.block_sparse_moe.experts.255.w3", "model.layers.45.block_sparse_moe.experts.0.w2", "model.layers.45.block_sparse_moe.experts.1.w2", "model.layers.45.block_sparse_moe.experts.2.w2", "model.layers.45.block_sparse_moe.experts.3.w2", "model.layers.45.block_sparse_moe.experts.4.w2", "model.layers.45.block_sparse_moe.experts.5.w2", "model.layers.45.block_sparse_moe.experts.6.w2", "model.layers.45.block_sparse_moe.experts.7.w2", "model.layers.45.block_sparse_moe.experts.8.w2", "model.layers.45.block_sparse_moe.experts.9.w2", "model.layers.45.block_sparse_moe.experts.10.w2", "model.layers.45.block_sparse_moe.experts.11.w2", "model.layers.45.block_sparse_moe.experts.12.w2", "model.layers.45.block_sparse_moe.experts.13.w2", "model.layers.45.block_sparse_moe.experts.14.w2", "model.layers.45.block_sparse_moe.experts.15.w2", "model.layers.45.block_sparse_moe.experts.16.w2", "model.layers.45.block_sparse_moe.experts.17.w2", "model.layers.45.block_sparse_moe.experts.18.w2", "model.layers.45.block_sparse_moe.experts.19.w2", "model.layers.45.block_sparse_moe.experts.20.w2", "model.layers.45.block_sparse_moe.experts.21.w2", "model.layers.45.block_sparse_moe.experts.22.w2", "model.layers.45.block_sparse_moe.experts.23.w2", "model.layers.45.block_sparse_moe.experts.24.w2", "model.layers.45.block_sparse_moe.experts.25.w2", "model.layers.45.block_sparse_moe.experts.26.w2", "model.layers.45.block_sparse_moe.experts.27.w2", "model.layers.45.block_sparse_moe.experts.28.w2", "model.layers.45.block_sparse_moe.experts.29.w2", "model.layers.45.block_sparse_moe.experts.30.w2", "model.layers.45.block_sparse_moe.experts.31.w2", "model.layers.45.block_sparse_moe.experts.32.w2", "model.layers.45.block_sparse_moe.experts.33.w2", "model.layers.45.block_sparse_moe.experts.34.w2", "model.layers.45.block_sparse_moe.experts.35.w2", "model.layers.45.block_sparse_moe.experts.36.w2", "model.layers.45.block_sparse_moe.experts.37.w2", "model.layers.45.block_sparse_moe.experts.38.w2", "model.layers.45.block_sparse_moe.experts.39.w2", "model.layers.45.block_sparse_moe.experts.40.w2", "model.layers.45.block_sparse_moe.experts.41.w2", "model.layers.45.block_sparse_moe.experts.42.w2", "model.layers.45.block_sparse_moe.experts.43.w2", "model.layers.45.block_sparse_moe.experts.44.w2", "model.layers.45.block_sparse_moe.experts.45.w2", "model.layers.45.block_sparse_moe.experts.46.w2", "model.layers.45.block_sparse_moe.experts.47.w2", "model.layers.45.block_sparse_moe.experts.48.w2", "model.layers.45.block_sparse_moe.experts.49.w2", "model.layers.45.block_sparse_moe.experts.50.w2", "model.layers.45.block_sparse_moe.experts.51.w2", "model.layers.45.block_sparse_moe.experts.52.w2", "model.layers.45.block_sparse_moe.experts.53.w2", "model.layers.45.block_sparse_moe.experts.54.w2", "model.layers.45.block_sparse_moe.experts.55.w2", "model.layers.45.block_sparse_moe.experts.56.w2", "model.layers.45.block_sparse_moe.experts.57.w2", "model.layers.45.block_sparse_moe.experts.58.w2", "model.layers.45.block_sparse_moe.experts.59.w2", "model.layers.45.block_sparse_moe.experts.60.w2", "model.layers.45.block_sparse_moe.experts.61.w2", "model.layers.45.block_sparse_moe.experts.62.w2", "model.layers.45.block_sparse_moe.experts.63.w2", "model.layers.45.block_sparse_moe.experts.64.w2", "model.layers.45.block_sparse_moe.experts.65.w2", "model.layers.45.block_sparse_moe.experts.66.w2", "model.layers.45.block_sparse_moe.experts.67.w2", "model.layers.45.block_sparse_moe.experts.68.w2", "model.layers.45.block_sparse_moe.experts.69.w2", "model.layers.45.block_sparse_moe.experts.70.w2", "model.layers.45.block_sparse_moe.experts.71.w2", "model.layers.45.block_sparse_moe.experts.72.w2", "model.layers.45.block_sparse_moe.experts.73.w2", "model.layers.45.block_sparse_moe.experts.74.w2", "model.layers.45.block_sparse_moe.experts.75.w2", "model.layers.45.block_sparse_moe.experts.76.w2", "model.layers.45.block_sparse_moe.experts.77.w2", "model.layers.45.block_sparse_moe.experts.78.w2", "model.layers.45.block_sparse_moe.experts.79.w2", "model.layers.45.block_sparse_moe.experts.80.w2", "model.layers.45.block_sparse_moe.experts.81.w2", "model.layers.45.block_sparse_moe.experts.82.w2", "model.layers.45.block_sparse_moe.experts.83.w2", "model.layers.45.block_sparse_moe.experts.84.w2", "model.layers.45.block_sparse_moe.experts.85.w2", "model.layers.45.block_sparse_moe.experts.86.w2", "model.layers.45.block_sparse_moe.experts.87.w2", "model.layers.45.block_sparse_moe.experts.88.w2", "model.layers.45.block_sparse_moe.experts.89.w2", "model.layers.45.block_sparse_moe.experts.90.w2", "model.layers.45.block_sparse_moe.experts.91.w2", "model.layers.45.block_sparse_moe.experts.92.w2", "model.layers.45.block_sparse_moe.experts.93.w2", "model.layers.45.block_sparse_moe.experts.94.w2", "model.layers.45.block_sparse_moe.experts.95.w2", "model.layers.45.block_sparse_moe.experts.96.w2", "model.layers.45.block_sparse_moe.experts.97.w2", "model.layers.45.block_sparse_moe.experts.98.w2", "model.layers.45.block_sparse_moe.experts.99.w2", "model.layers.45.block_sparse_moe.experts.100.w2", "model.layers.45.block_sparse_moe.experts.101.w2", "model.layers.45.block_sparse_moe.experts.102.w2", "model.layers.45.block_sparse_moe.experts.103.w2", "model.layers.45.block_sparse_moe.experts.104.w2", "model.layers.45.block_sparse_moe.experts.105.w2", "model.layers.45.block_sparse_moe.experts.106.w2", "model.layers.45.block_sparse_moe.experts.107.w2", "model.layers.45.block_sparse_moe.experts.108.w2", "model.layers.45.block_sparse_moe.experts.109.w2", "model.layers.45.block_sparse_moe.experts.110.w2", "model.layers.45.block_sparse_moe.experts.111.w2", "model.layers.45.block_sparse_moe.experts.112.w2", "model.layers.45.block_sparse_moe.experts.113.w2", "model.layers.45.block_sparse_moe.experts.114.w2", "model.layers.45.block_sparse_moe.experts.115.w2", "model.layers.45.block_sparse_moe.experts.116.w2", "model.layers.45.block_sparse_moe.experts.117.w2", "model.layers.45.block_sparse_moe.experts.118.w2", "model.layers.45.block_sparse_moe.experts.119.w2", "model.layers.45.block_sparse_moe.experts.120.w2", "model.layers.45.block_sparse_moe.experts.121.w2", "model.layers.45.block_sparse_moe.experts.122.w2", "model.layers.45.block_sparse_moe.experts.123.w2", "model.layers.45.block_sparse_moe.experts.124.w2", "model.layers.45.block_sparse_moe.experts.125.w2", "model.layers.45.block_sparse_moe.experts.126.w2", "model.layers.45.block_sparse_moe.experts.127.w2", "model.layers.45.block_sparse_moe.experts.128.w2", "model.layers.45.block_sparse_moe.experts.129.w2", "model.layers.45.block_sparse_moe.experts.130.w2", "model.layers.45.block_sparse_moe.experts.131.w2", "model.layers.45.block_sparse_moe.experts.132.w2", "model.layers.45.block_sparse_moe.experts.133.w2", "model.layers.45.block_sparse_moe.experts.134.w2", "model.layers.45.block_sparse_moe.experts.135.w2", "model.layers.45.block_sparse_moe.experts.136.w2", "model.layers.45.block_sparse_moe.experts.137.w2", "model.layers.45.block_sparse_moe.experts.138.w2", "model.layers.45.block_sparse_moe.experts.139.w2", "model.layers.45.block_sparse_moe.experts.140.w2", "model.layers.45.block_sparse_moe.experts.141.w2", "model.layers.45.block_sparse_moe.experts.142.w2", "model.layers.45.block_sparse_moe.experts.143.w2", "model.layers.45.block_sparse_moe.experts.144.w2", "model.layers.45.block_sparse_moe.experts.145.w2", "model.layers.45.block_sparse_moe.experts.146.w2", "model.layers.45.block_sparse_moe.experts.147.w2", "model.layers.45.block_sparse_moe.experts.148.w2", "model.layers.45.block_sparse_moe.experts.149.w2", "model.layers.45.block_sparse_moe.experts.150.w2", "model.layers.45.block_sparse_moe.experts.151.w2", "model.layers.45.block_sparse_moe.experts.152.w2", "model.layers.45.block_sparse_moe.experts.153.w2", "model.layers.45.block_sparse_moe.experts.154.w2", "model.layers.45.block_sparse_moe.experts.155.w2", "model.layers.45.block_sparse_moe.experts.156.w2", "model.layers.45.block_sparse_moe.experts.157.w2", "model.layers.45.block_sparse_moe.experts.158.w2", "model.layers.45.block_sparse_moe.experts.159.w2", "model.layers.45.block_sparse_moe.experts.160.w2", "model.layers.45.block_sparse_moe.experts.161.w2", "model.layers.45.block_sparse_moe.experts.162.w2", "model.layers.45.block_sparse_moe.experts.163.w2", "model.layers.45.block_sparse_moe.experts.164.w2", "model.layers.45.block_sparse_moe.experts.165.w2", "model.layers.45.block_sparse_moe.experts.166.w2", "model.layers.45.block_sparse_moe.experts.167.w2", "model.layers.45.block_sparse_moe.experts.168.w2", "model.layers.45.block_sparse_moe.experts.169.w2", "model.layers.45.block_sparse_moe.experts.170.w2", "model.layers.45.block_sparse_moe.experts.171.w2", "model.layers.45.block_sparse_moe.experts.172.w2", "model.layers.45.block_sparse_moe.experts.173.w2", "model.layers.45.block_sparse_moe.experts.174.w2", "model.layers.45.block_sparse_moe.experts.175.w2", "model.layers.45.block_sparse_moe.experts.176.w2", "model.layers.45.block_sparse_moe.experts.177.w2", "model.layers.45.block_sparse_moe.experts.178.w2", "model.layers.45.block_sparse_moe.experts.179.w2", "model.layers.45.block_sparse_moe.experts.180.w2", "model.layers.45.block_sparse_moe.experts.181.w2", "model.layers.45.block_sparse_moe.experts.182.w2", "model.layers.45.block_sparse_moe.experts.183.w2", "model.layers.45.block_sparse_moe.experts.184.w2", "model.layers.45.block_sparse_moe.experts.185.w2", "model.layers.45.block_sparse_moe.experts.186.w2", "model.layers.45.block_sparse_moe.experts.187.w2", "model.layers.45.block_sparse_moe.experts.188.w2", "model.layers.45.block_sparse_moe.experts.189.w2", "model.layers.45.block_sparse_moe.experts.190.w2", "model.layers.45.block_sparse_moe.experts.191.w2", "model.layers.45.block_sparse_moe.experts.192.w2", "model.layers.45.block_sparse_moe.experts.193.w2", "model.layers.45.block_sparse_moe.experts.194.w2", "model.layers.45.block_sparse_moe.experts.195.w2", "model.layers.45.block_sparse_moe.experts.196.w2", "model.layers.45.block_sparse_moe.experts.197.w2", "model.layers.45.block_sparse_moe.experts.198.w2", "model.layers.45.block_sparse_moe.experts.199.w2", "model.layers.45.block_sparse_moe.experts.200.w2", "model.layers.45.block_sparse_moe.experts.201.w2", "model.layers.45.block_sparse_moe.experts.202.w2", "model.layers.45.block_sparse_moe.experts.203.w2", "model.layers.45.block_sparse_moe.experts.204.w2", "model.layers.45.block_sparse_moe.experts.205.w2", "model.layers.45.block_sparse_moe.experts.206.w2", "model.layers.45.block_sparse_moe.experts.207.w2", "model.layers.45.block_sparse_moe.experts.208.w2", "model.layers.45.block_sparse_moe.experts.209.w2", "model.layers.45.block_sparse_moe.experts.210.w2", "model.layers.45.block_sparse_moe.experts.211.w2", "model.layers.45.block_sparse_moe.experts.212.w2", "model.layers.45.block_sparse_moe.experts.213.w2", "model.layers.45.block_sparse_moe.experts.214.w2", "model.layers.45.block_sparse_moe.experts.215.w2", "model.layers.45.block_sparse_moe.experts.216.w2", "model.layers.45.block_sparse_moe.experts.217.w2", "model.layers.45.block_sparse_moe.experts.218.w2", "model.layers.45.block_sparse_moe.experts.219.w2", "model.layers.45.block_sparse_moe.experts.220.w2", "model.layers.45.block_sparse_moe.experts.221.w2", "model.layers.45.block_sparse_moe.experts.222.w2", "model.layers.45.block_sparse_moe.experts.223.w2", "model.layers.45.block_sparse_moe.experts.224.w2", "model.layers.45.block_sparse_moe.experts.225.w2", "model.layers.45.block_sparse_moe.experts.226.w2", "model.layers.45.block_sparse_moe.experts.227.w2", "model.layers.45.block_sparse_moe.experts.228.w2", "model.layers.45.block_sparse_moe.experts.229.w2", "model.layers.45.block_sparse_moe.experts.230.w2", "model.layers.45.block_sparse_moe.experts.231.w2", "model.layers.45.block_sparse_moe.experts.232.w2", "model.layers.45.block_sparse_moe.experts.233.w2", "model.layers.45.block_sparse_moe.experts.234.w2", "model.layers.45.block_sparse_moe.experts.235.w2", "model.layers.45.block_sparse_moe.experts.236.w2", "model.layers.45.block_sparse_moe.experts.237.w2", "model.layers.45.block_sparse_moe.experts.238.w2", "model.layers.45.block_sparse_moe.experts.239.w2", "model.layers.45.block_sparse_moe.experts.240.w2", "model.layers.45.block_sparse_moe.experts.241.w2", "model.layers.45.block_sparse_moe.experts.242.w2", "model.layers.45.block_sparse_moe.experts.243.w2", "model.layers.45.block_sparse_moe.experts.244.w2", "model.layers.45.block_sparse_moe.experts.245.w2", "model.layers.45.block_sparse_moe.experts.246.w2", "model.layers.45.block_sparse_moe.experts.247.w2", "model.layers.45.block_sparse_moe.experts.248.w2", "model.layers.45.block_sparse_moe.experts.249.w2", "model.layers.45.block_sparse_moe.experts.250.w2", "model.layers.45.block_sparse_moe.experts.251.w2", "model.layers.45.block_sparse_moe.experts.252.w2", "model.layers.45.block_sparse_moe.experts.253.w2", "model.layers.45.block_sparse_moe.experts.254.w2", "model.layers.45.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0002056984230875969, "dbits": 3623878656 } ] }, { "idx": 92, "layers": [ "model.layers.46.self_attn.q_proj", "model.layers.46.self_attn.k_proj", "model.layers.46.self_attn.v_proj", "model.layers.46.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001450568996369861, "dbits": 44040192 } ] }, { "idx": 93, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w1", "model.layers.46.block_sparse_moe.experts.1.w1", "model.layers.46.block_sparse_moe.experts.2.w1", "model.layers.46.block_sparse_moe.experts.3.w1", "model.layers.46.block_sparse_moe.experts.4.w1", "model.layers.46.block_sparse_moe.experts.5.w1", "model.layers.46.block_sparse_moe.experts.6.w1", "model.layers.46.block_sparse_moe.experts.7.w1", "model.layers.46.block_sparse_moe.experts.8.w1", "model.layers.46.block_sparse_moe.experts.9.w1", "model.layers.46.block_sparse_moe.experts.10.w1", "model.layers.46.block_sparse_moe.experts.11.w1", "model.layers.46.block_sparse_moe.experts.12.w1", "model.layers.46.block_sparse_moe.experts.13.w1", "model.layers.46.block_sparse_moe.experts.14.w1", "model.layers.46.block_sparse_moe.experts.15.w1", "model.layers.46.block_sparse_moe.experts.16.w1", "model.layers.46.block_sparse_moe.experts.17.w1", "model.layers.46.block_sparse_moe.experts.18.w1", "model.layers.46.block_sparse_moe.experts.19.w1", "model.layers.46.block_sparse_moe.experts.20.w1", "model.layers.46.block_sparse_moe.experts.21.w1", "model.layers.46.block_sparse_moe.experts.22.w1", "model.layers.46.block_sparse_moe.experts.23.w1", "model.layers.46.block_sparse_moe.experts.24.w1", "model.layers.46.block_sparse_moe.experts.25.w1", "model.layers.46.block_sparse_moe.experts.26.w1", "model.layers.46.block_sparse_moe.experts.27.w1", "model.layers.46.block_sparse_moe.experts.28.w1", "model.layers.46.block_sparse_moe.experts.29.w1", "model.layers.46.block_sparse_moe.experts.30.w1", "model.layers.46.block_sparse_moe.experts.31.w1", "model.layers.46.block_sparse_moe.experts.32.w1", "model.layers.46.block_sparse_moe.experts.33.w1", "model.layers.46.block_sparse_moe.experts.34.w1", "model.layers.46.block_sparse_moe.experts.35.w1", "model.layers.46.block_sparse_moe.experts.36.w1", "model.layers.46.block_sparse_moe.experts.37.w1", "model.layers.46.block_sparse_moe.experts.38.w1", "model.layers.46.block_sparse_moe.experts.39.w1", "model.layers.46.block_sparse_moe.experts.40.w1", "model.layers.46.block_sparse_moe.experts.41.w1", "model.layers.46.block_sparse_moe.experts.42.w1", "model.layers.46.block_sparse_moe.experts.43.w1", "model.layers.46.block_sparse_moe.experts.44.w1", "model.layers.46.block_sparse_moe.experts.45.w1", "model.layers.46.block_sparse_moe.experts.46.w1", "model.layers.46.block_sparse_moe.experts.47.w1", "model.layers.46.block_sparse_moe.experts.48.w1", "model.layers.46.block_sparse_moe.experts.49.w1", "model.layers.46.block_sparse_moe.experts.50.w1", "model.layers.46.block_sparse_moe.experts.51.w1", "model.layers.46.block_sparse_moe.experts.52.w1", "model.layers.46.block_sparse_moe.experts.53.w1", "model.layers.46.block_sparse_moe.experts.54.w1", "model.layers.46.block_sparse_moe.experts.55.w1", "model.layers.46.block_sparse_moe.experts.56.w1", "model.layers.46.block_sparse_moe.experts.57.w1", "model.layers.46.block_sparse_moe.experts.58.w1", "model.layers.46.block_sparse_moe.experts.59.w1", "model.layers.46.block_sparse_moe.experts.60.w1", "model.layers.46.block_sparse_moe.experts.61.w1", "model.layers.46.block_sparse_moe.experts.62.w1", "model.layers.46.block_sparse_moe.experts.63.w1", "model.layers.46.block_sparse_moe.experts.64.w1", "model.layers.46.block_sparse_moe.experts.65.w1", "model.layers.46.block_sparse_moe.experts.66.w1", "model.layers.46.block_sparse_moe.experts.67.w1", "model.layers.46.block_sparse_moe.experts.68.w1", "model.layers.46.block_sparse_moe.experts.69.w1", "model.layers.46.block_sparse_moe.experts.70.w1", "model.layers.46.block_sparse_moe.experts.71.w1", "model.layers.46.block_sparse_moe.experts.72.w1", "model.layers.46.block_sparse_moe.experts.73.w1", "model.layers.46.block_sparse_moe.experts.74.w1", "model.layers.46.block_sparse_moe.experts.75.w1", "model.layers.46.block_sparse_moe.experts.76.w1", "model.layers.46.block_sparse_moe.experts.77.w1", "model.layers.46.block_sparse_moe.experts.78.w1", "model.layers.46.block_sparse_moe.experts.79.w1", "model.layers.46.block_sparse_moe.experts.80.w1", "model.layers.46.block_sparse_moe.experts.81.w1", "model.layers.46.block_sparse_moe.experts.82.w1", "model.layers.46.block_sparse_moe.experts.83.w1", "model.layers.46.block_sparse_moe.experts.84.w1", "model.layers.46.block_sparse_moe.experts.85.w1", "model.layers.46.block_sparse_moe.experts.86.w1", "model.layers.46.block_sparse_moe.experts.87.w1", "model.layers.46.block_sparse_moe.experts.88.w1", "model.layers.46.block_sparse_moe.experts.89.w1", "model.layers.46.block_sparse_moe.experts.90.w1", "model.layers.46.block_sparse_moe.experts.91.w1", "model.layers.46.block_sparse_moe.experts.92.w1", "model.layers.46.block_sparse_moe.experts.93.w1", "model.layers.46.block_sparse_moe.experts.94.w1", "model.layers.46.block_sparse_moe.experts.95.w1", "model.layers.46.block_sparse_moe.experts.96.w1", "model.layers.46.block_sparse_moe.experts.97.w1", "model.layers.46.block_sparse_moe.experts.98.w1", "model.layers.46.block_sparse_moe.experts.99.w1", "model.layers.46.block_sparse_moe.experts.100.w1", "model.layers.46.block_sparse_moe.experts.101.w1", "model.layers.46.block_sparse_moe.experts.102.w1", "model.layers.46.block_sparse_moe.experts.103.w1", "model.layers.46.block_sparse_moe.experts.104.w1", "model.layers.46.block_sparse_moe.experts.105.w1", "model.layers.46.block_sparse_moe.experts.106.w1", "model.layers.46.block_sparse_moe.experts.107.w1", "model.layers.46.block_sparse_moe.experts.108.w1", "model.layers.46.block_sparse_moe.experts.109.w1", "model.layers.46.block_sparse_moe.experts.110.w1", "model.layers.46.block_sparse_moe.experts.111.w1", "model.layers.46.block_sparse_moe.experts.112.w1", "model.layers.46.block_sparse_moe.experts.113.w1", "model.layers.46.block_sparse_moe.experts.114.w1", "model.layers.46.block_sparse_moe.experts.115.w1", "model.layers.46.block_sparse_moe.experts.116.w1", "model.layers.46.block_sparse_moe.experts.117.w1", "model.layers.46.block_sparse_moe.experts.118.w1", "model.layers.46.block_sparse_moe.experts.119.w1", "model.layers.46.block_sparse_moe.experts.120.w1", "model.layers.46.block_sparse_moe.experts.121.w1", "model.layers.46.block_sparse_moe.experts.122.w1", "model.layers.46.block_sparse_moe.experts.123.w1", "model.layers.46.block_sparse_moe.experts.124.w1", "model.layers.46.block_sparse_moe.experts.125.w1", "model.layers.46.block_sparse_moe.experts.126.w1", "model.layers.46.block_sparse_moe.experts.127.w1", "model.layers.46.block_sparse_moe.experts.128.w1", "model.layers.46.block_sparse_moe.experts.129.w1", "model.layers.46.block_sparse_moe.experts.130.w1", "model.layers.46.block_sparse_moe.experts.131.w1", "model.layers.46.block_sparse_moe.experts.132.w1", "model.layers.46.block_sparse_moe.experts.133.w1", "model.layers.46.block_sparse_moe.experts.134.w1", "model.layers.46.block_sparse_moe.experts.135.w1", "model.layers.46.block_sparse_moe.experts.136.w1", "model.layers.46.block_sparse_moe.experts.137.w1", "model.layers.46.block_sparse_moe.experts.138.w1", "model.layers.46.block_sparse_moe.experts.139.w1", "model.layers.46.block_sparse_moe.experts.140.w1", "model.layers.46.block_sparse_moe.experts.141.w1", "model.layers.46.block_sparse_moe.experts.142.w1", "model.layers.46.block_sparse_moe.experts.143.w1", "model.layers.46.block_sparse_moe.experts.144.w1", "model.layers.46.block_sparse_moe.experts.145.w1", "model.layers.46.block_sparse_moe.experts.146.w1", "model.layers.46.block_sparse_moe.experts.147.w1", "model.layers.46.block_sparse_moe.experts.148.w1", "model.layers.46.block_sparse_moe.experts.149.w1", "model.layers.46.block_sparse_moe.experts.150.w1", "model.layers.46.block_sparse_moe.experts.151.w1", "model.layers.46.block_sparse_moe.experts.152.w1", "model.layers.46.block_sparse_moe.experts.153.w1", "model.layers.46.block_sparse_moe.experts.154.w1", "model.layers.46.block_sparse_moe.experts.155.w1", "model.layers.46.block_sparse_moe.experts.156.w1", "model.layers.46.block_sparse_moe.experts.157.w1", "model.layers.46.block_sparse_moe.experts.158.w1", "model.layers.46.block_sparse_moe.experts.159.w1", "model.layers.46.block_sparse_moe.experts.160.w1", "model.layers.46.block_sparse_moe.experts.161.w1", "model.layers.46.block_sparse_moe.experts.162.w1", "model.layers.46.block_sparse_moe.experts.163.w1", "model.layers.46.block_sparse_moe.experts.164.w1", "model.layers.46.block_sparse_moe.experts.165.w1", "model.layers.46.block_sparse_moe.experts.166.w1", "model.layers.46.block_sparse_moe.experts.167.w1", "model.layers.46.block_sparse_moe.experts.168.w1", "model.layers.46.block_sparse_moe.experts.169.w1", "model.layers.46.block_sparse_moe.experts.170.w1", "model.layers.46.block_sparse_moe.experts.171.w1", "model.layers.46.block_sparse_moe.experts.172.w1", "model.layers.46.block_sparse_moe.experts.173.w1", "model.layers.46.block_sparse_moe.experts.174.w1", "model.layers.46.block_sparse_moe.experts.175.w1", "model.layers.46.block_sparse_moe.experts.176.w1", "model.layers.46.block_sparse_moe.experts.177.w1", "model.layers.46.block_sparse_moe.experts.178.w1", "model.layers.46.block_sparse_moe.experts.179.w1", "model.layers.46.block_sparse_moe.experts.180.w1", "model.layers.46.block_sparse_moe.experts.181.w1", "model.layers.46.block_sparse_moe.experts.182.w1", "model.layers.46.block_sparse_moe.experts.183.w1", "model.layers.46.block_sparse_moe.experts.184.w1", "model.layers.46.block_sparse_moe.experts.185.w1", "model.layers.46.block_sparse_moe.experts.186.w1", "model.layers.46.block_sparse_moe.experts.187.w1", "model.layers.46.block_sparse_moe.experts.188.w1", "model.layers.46.block_sparse_moe.experts.189.w1", "model.layers.46.block_sparse_moe.experts.190.w1", "model.layers.46.block_sparse_moe.experts.191.w1", "model.layers.46.block_sparse_moe.experts.192.w1", "model.layers.46.block_sparse_moe.experts.193.w1", "model.layers.46.block_sparse_moe.experts.194.w1", "model.layers.46.block_sparse_moe.experts.195.w1", "model.layers.46.block_sparse_moe.experts.196.w1", "model.layers.46.block_sparse_moe.experts.197.w1", "model.layers.46.block_sparse_moe.experts.198.w1", "model.layers.46.block_sparse_moe.experts.199.w1", "model.layers.46.block_sparse_moe.experts.200.w1", "model.layers.46.block_sparse_moe.experts.201.w1", "model.layers.46.block_sparse_moe.experts.202.w1", "model.layers.46.block_sparse_moe.experts.203.w1", "model.layers.46.block_sparse_moe.experts.204.w1", "model.layers.46.block_sparse_moe.experts.205.w1", "model.layers.46.block_sparse_moe.experts.206.w1", "model.layers.46.block_sparse_moe.experts.207.w1", "model.layers.46.block_sparse_moe.experts.208.w1", "model.layers.46.block_sparse_moe.experts.209.w1", "model.layers.46.block_sparse_moe.experts.210.w1", "model.layers.46.block_sparse_moe.experts.211.w1", "model.layers.46.block_sparse_moe.experts.212.w1", "model.layers.46.block_sparse_moe.experts.213.w1", "model.layers.46.block_sparse_moe.experts.214.w1", "model.layers.46.block_sparse_moe.experts.215.w1", "model.layers.46.block_sparse_moe.experts.216.w1", "model.layers.46.block_sparse_moe.experts.217.w1", "model.layers.46.block_sparse_moe.experts.218.w1", "model.layers.46.block_sparse_moe.experts.219.w1", "model.layers.46.block_sparse_moe.experts.220.w1", "model.layers.46.block_sparse_moe.experts.221.w1", "model.layers.46.block_sparse_moe.experts.222.w1", "model.layers.46.block_sparse_moe.experts.223.w1", "model.layers.46.block_sparse_moe.experts.224.w1", "model.layers.46.block_sparse_moe.experts.225.w1", "model.layers.46.block_sparse_moe.experts.226.w1", "model.layers.46.block_sparse_moe.experts.227.w1", "model.layers.46.block_sparse_moe.experts.228.w1", "model.layers.46.block_sparse_moe.experts.229.w1", "model.layers.46.block_sparse_moe.experts.230.w1", "model.layers.46.block_sparse_moe.experts.231.w1", "model.layers.46.block_sparse_moe.experts.232.w1", "model.layers.46.block_sparse_moe.experts.233.w1", "model.layers.46.block_sparse_moe.experts.234.w1", "model.layers.46.block_sparse_moe.experts.235.w1", "model.layers.46.block_sparse_moe.experts.236.w1", "model.layers.46.block_sparse_moe.experts.237.w1", "model.layers.46.block_sparse_moe.experts.238.w1", "model.layers.46.block_sparse_moe.experts.239.w1", "model.layers.46.block_sparse_moe.experts.240.w1", "model.layers.46.block_sparse_moe.experts.241.w1", "model.layers.46.block_sparse_moe.experts.242.w1", "model.layers.46.block_sparse_moe.experts.243.w1", "model.layers.46.block_sparse_moe.experts.244.w1", "model.layers.46.block_sparse_moe.experts.245.w1", "model.layers.46.block_sparse_moe.experts.246.w1", "model.layers.46.block_sparse_moe.experts.247.w1", "model.layers.46.block_sparse_moe.experts.248.w1", "model.layers.46.block_sparse_moe.experts.249.w1", "model.layers.46.block_sparse_moe.experts.250.w1", "model.layers.46.block_sparse_moe.experts.251.w1", "model.layers.46.block_sparse_moe.experts.252.w1", "model.layers.46.block_sparse_moe.experts.253.w1", "model.layers.46.block_sparse_moe.experts.254.w1", "model.layers.46.block_sparse_moe.experts.255.w1", "model.layers.46.block_sparse_moe.experts.0.w3", "model.layers.46.block_sparse_moe.experts.1.w3", "model.layers.46.block_sparse_moe.experts.2.w3", "model.layers.46.block_sparse_moe.experts.3.w3", "model.layers.46.block_sparse_moe.experts.4.w3", "model.layers.46.block_sparse_moe.experts.5.w3", "model.layers.46.block_sparse_moe.experts.6.w3", "model.layers.46.block_sparse_moe.experts.7.w3", "model.layers.46.block_sparse_moe.experts.8.w3", "model.layers.46.block_sparse_moe.experts.9.w3", "model.layers.46.block_sparse_moe.experts.10.w3", "model.layers.46.block_sparse_moe.experts.11.w3", "model.layers.46.block_sparse_moe.experts.12.w3", "model.layers.46.block_sparse_moe.experts.13.w3", "model.layers.46.block_sparse_moe.experts.14.w3", "model.layers.46.block_sparse_moe.experts.15.w3", "model.layers.46.block_sparse_moe.experts.16.w3", "model.layers.46.block_sparse_moe.experts.17.w3", "model.layers.46.block_sparse_moe.experts.18.w3", "model.layers.46.block_sparse_moe.experts.19.w3", "model.layers.46.block_sparse_moe.experts.20.w3", "model.layers.46.block_sparse_moe.experts.21.w3", "model.layers.46.block_sparse_moe.experts.22.w3", "model.layers.46.block_sparse_moe.experts.23.w3", "model.layers.46.block_sparse_moe.experts.24.w3", "model.layers.46.block_sparse_moe.experts.25.w3", "model.layers.46.block_sparse_moe.experts.26.w3", "model.layers.46.block_sparse_moe.experts.27.w3", "model.layers.46.block_sparse_moe.experts.28.w3", "model.layers.46.block_sparse_moe.experts.29.w3", "model.layers.46.block_sparse_moe.experts.30.w3", "model.layers.46.block_sparse_moe.experts.31.w3", "model.layers.46.block_sparse_moe.experts.32.w3", "model.layers.46.block_sparse_moe.experts.33.w3", "model.layers.46.block_sparse_moe.experts.34.w3", "model.layers.46.block_sparse_moe.experts.35.w3", "model.layers.46.block_sparse_moe.experts.36.w3", "model.layers.46.block_sparse_moe.experts.37.w3", "model.layers.46.block_sparse_moe.experts.38.w3", "model.layers.46.block_sparse_moe.experts.39.w3", "model.layers.46.block_sparse_moe.experts.40.w3", "model.layers.46.block_sparse_moe.experts.41.w3", "model.layers.46.block_sparse_moe.experts.42.w3", "model.layers.46.block_sparse_moe.experts.43.w3", "model.layers.46.block_sparse_moe.experts.44.w3", "model.layers.46.block_sparse_moe.experts.45.w3", "model.layers.46.block_sparse_moe.experts.46.w3", "model.layers.46.block_sparse_moe.experts.47.w3", "model.layers.46.block_sparse_moe.experts.48.w3", "model.layers.46.block_sparse_moe.experts.49.w3", "model.layers.46.block_sparse_moe.experts.50.w3", "model.layers.46.block_sparse_moe.experts.51.w3", "model.layers.46.block_sparse_moe.experts.52.w3", "model.layers.46.block_sparse_moe.experts.53.w3", "model.layers.46.block_sparse_moe.experts.54.w3", "model.layers.46.block_sparse_moe.experts.55.w3", "model.layers.46.block_sparse_moe.experts.56.w3", "model.layers.46.block_sparse_moe.experts.57.w3", "model.layers.46.block_sparse_moe.experts.58.w3", "model.layers.46.block_sparse_moe.experts.59.w3", "model.layers.46.block_sparse_moe.experts.60.w3", "model.layers.46.block_sparse_moe.experts.61.w3", "model.layers.46.block_sparse_moe.experts.62.w3", "model.layers.46.block_sparse_moe.experts.63.w3", "model.layers.46.block_sparse_moe.experts.64.w3", "model.layers.46.block_sparse_moe.experts.65.w3", "model.layers.46.block_sparse_moe.experts.66.w3", "model.layers.46.block_sparse_moe.experts.67.w3", "model.layers.46.block_sparse_moe.experts.68.w3", "model.layers.46.block_sparse_moe.experts.69.w3", "model.layers.46.block_sparse_moe.experts.70.w3", "model.layers.46.block_sparse_moe.experts.71.w3", "model.layers.46.block_sparse_moe.experts.72.w3", "model.layers.46.block_sparse_moe.experts.73.w3", "model.layers.46.block_sparse_moe.experts.74.w3", "model.layers.46.block_sparse_moe.experts.75.w3", "model.layers.46.block_sparse_moe.experts.76.w3", "model.layers.46.block_sparse_moe.experts.77.w3", "model.layers.46.block_sparse_moe.experts.78.w3", "model.layers.46.block_sparse_moe.experts.79.w3", "model.layers.46.block_sparse_moe.experts.80.w3", "model.layers.46.block_sparse_moe.experts.81.w3", "model.layers.46.block_sparse_moe.experts.82.w3", "model.layers.46.block_sparse_moe.experts.83.w3", "model.layers.46.block_sparse_moe.experts.84.w3", "model.layers.46.block_sparse_moe.experts.85.w3", "model.layers.46.block_sparse_moe.experts.86.w3", "model.layers.46.block_sparse_moe.experts.87.w3", "model.layers.46.block_sparse_moe.experts.88.w3", "model.layers.46.block_sparse_moe.experts.89.w3", "model.layers.46.block_sparse_moe.experts.90.w3", "model.layers.46.block_sparse_moe.experts.91.w3", "model.layers.46.block_sparse_moe.experts.92.w3", "model.layers.46.block_sparse_moe.experts.93.w3", "model.layers.46.block_sparse_moe.experts.94.w3", "model.layers.46.block_sparse_moe.experts.95.w3", "model.layers.46.block_sparse_moe.experts.96.w3", "model.layers.46.block_sparse_moe.experts.97.w3", "model.layers.46.block_sparse_moe.experts.98.w3", "model.layers.46.block_sparse_moe.experts.99.w3", "model.layers.46.block_sparse_moe.experts.100.w3", "model.layers.46.block_sparse_moe.experts.101.w3", "model.layers.46.block_sparse_moe.experts.102.w3", "model.layers.46.block_sparse_moe.experts.103.w3", "model.layers.46.block_sparse_moe.experts.104.w3", "model.layers.46.block_sparse_moe.experts.105.w3", "model.layers.46.block_sparse_moe.experts.106.w3", "model.layers.46.block_sparse_moe.experts.107.w3", "model.layers.46.block_sparse_moe.experts.108.w3", "model.layers.46.block_sparse_moe.experts.109.w3", "model.layers.46.block_sparse_moe.experts.110.w3", "model.layers.46.block_sparse_moe.experts.111.w3", "model.layers.46.block_sparse_moe.experts.112.w3", "model.layers.46.block_sparse_moe.experts.113.w3", "model.layers.46.block_sparse_moe.experts.114.w3", "model.layers.46.block_sparse_moe.experts.115.w3", "model.layers.46.block_sparse_moe.experts.116.w3", "model.layers.46.block_sparse_moe.experts.117.w3", "model.layers.46.block_sparse_moe.experts.118.w3", "model.layers.46.block_sparse_moe.experts.119.w3", "model.layers.46.block_sparse_moe.experts.120.w3", "model.layers.46.block_sparse_moe.experts.121.w3", "model.layers.46.block_sparse_moe.experts.122.w3", "model.layers.46.block_sparse_moe.experts.123.w3", "model.layers.46.block_sparse_moe.experts.124.w3", "model.layers.46.block_sparse_moe.experts.125.w3", "model.layers.46.block_sparse_moe.experts.126.w3", "model.layers.46.block_sparse_moe.experts.127.w3", "model.layers.46.block_sparse_moe.experts.128.w3", "model.layers.46.block_sparse_moe.experts.129.w3", "model.layers.46.block_sparse_moe.experts.130.w3", "model.layers.46.block_sparse_moe.experts.131.w3", "model.layers.46.block_sparse_moe.experts.132.w3", "model.layers.46.block_sparse_moe.experts.133.w3", "model.layers.46.block_sparse_moe.experts.134.w3", "model.layers.46.block_sparse_moe.experts.135.w3", "model.layers.46.block_sparse_moe.experts.136.w3", "model.layers.46.block_sparse_moe.experts.137.w3", "model.layers.46.block_sparse_moe.experts.138.w3", "model.layers.46.block_sparse_moe.experts.139.w3", "model.layers.46.block_sparse_moe.experts.140.w3", "model.layers.46.block_sparse_moe.experts.141.w3", "model.layers.46.block_sparse_moe.experts.142.w3", "model.layers.46.block_sparse_moe.experts.143.w3", "model.layers.46.block_sparse_moe.experts.144.w3", "model.layers.46.block_sparse_moe.experts.145.w3", "model.layers.46.block_sparse_moe.experts.146.w3", "model.layers.46.block_sparse_moe.experts.147.w3", "model.layers.46.block_sparse_moe.experts.148.w3", "model.layers.46.block_sparse_moe.experts.149.w3", "model.layers.46.block_sparse_moe.experts.150.w3", "model.layers.46.block_sparse_moe.experts.151.w3", "model.layers.46.block_sparse_moe.experts.152.w3", "model.layers.46.block_sparse_moe.experts.153.w3", "model.layers.46.block_sparse_moe.experts.154.w3", "model.layers.46.block_sparse_moe.experts.155.w3", "model.layers.46.block_sparse_moe.experts.156.w3", "model.layers.46.block_sparse_moe.experts.157.w3", "model.layers.46.block_sparse_moe.experts.158.w3", "model.layers.46.block_sparse_moe.experts.159.w3", "model.layers.46.block_sparse_moe.experts.160.w3", "model.layers.46.block_sparse_moe.experts.161.w3", "model.layers.46.block_sparse_moe.experts.162.w3", "model.layers.46.block_sparse_moe.experts.163.w3", "model.layers.46.block_sparse_moe.experts.164.w3", "model.layers.46.block_sparse_moe.experts.165.w3", "model.layers.46.block_sparse_moe.experts.166.w3", "model.layers.46.block_sparse_moe.experts.167.w3", "model.layers.46.block_sparse_moe.experts.168.w3", "model.layers.46.block_sparse_moe.experts.169.w3", "model.layers.46.block_sparse_moe.experts.170.w3", "model.layers.46.block_sparse_moe.experts.171.w3", "model.layers.46.block_sparse_moe.experts.172.w3", "model.layers.46.block_sparse_moe.experts.173.w3", "model.layers.46.block_sparse_moe.experts.174.w3", "model.layers.46.block_sparse_moe.experts.175.w3", "model.layers.46.block_sparse_moe.experts.176.w3", "model.layers.46.block_sparse_moe.experts.177.w3", "model.layers.46.block_sparse_moe.experts.178.w3", "model.layers.46.block_sparse_moe.experts.179.w3", "model.layers.46.block_sparse_moe.experts.180.w3", "model.layers.46.block_sparse_moe.experts.181.w3", "model.layers.46.block_sparse_moe.experts.182.w3", "model.layers.46.block_sparse_moe.experts.183.w3", "model.layers.46.block_sparse_moe.experts.184.w3", "model.layers.46.block_sparse_moe.experts.185.w3", "model.layers.46.block_sparse_moe.experts.186.w3", "model.layers.46.block_sparse_moe.experts.187.w3", "model.layers.46.block_sparse_moe.experts.188.w3", "model.layers.46.block_sparse_moe.experts.189.w3", "model.layers.46.block_sparse_moe.experts.190.w3", "model.layers.46.block_sparse_moe.experts.191.w3", "model.layers.46.block_sparse_moe.experts.192.w3", "model.layers.46.block_sparse_moe.experts.193.w3", "model.layers.46.block_sparse_moe.experts.194.w3", "model.layers.46.block_sparse_moe.experts.195.w3", "model.layers.46.block_sparse_moe.experts.196.w3", "model.layers.46.block_sparse_moe.experts.197.w3", "model.layers.46.block_sparse_moe.experts.198.w3", "model.layers.46.block_sparse_moe.experts.199.w3", "model.layers.46.block_sparse_moe.experts.200.w3", "model.layers.46.block_sparse_moe.experts.201.w3", "model.layers.46.block_sparse_moe.experts.202.w3", "model.layers.46.block_sparse_moe.experts.203.w3", "model.layers.46.block_sparse_moe.experts.204.w3", "model.layers.46.block_sparse_moe.experts.205.w3", "model.layers.46.block_sparse_moe.experts.206.w3", "model.layers.46.block_sparse_moe.experts.207.w3", "model.layers.46.block_sparse_moe.experts.208.w3", "model.layers.46.block_sparse_moe.experts.209.w3", "model.layers.46.block_sparse_moe.experts.210.w3", "model.layers.46.block_sparse_moe.experts.211.w3", "model.layers.46.block_sparse_moe.experts.212.w3", "model.layers.46.block_sparse_moe.experts.213.w3", "model.layers.46.block_sparse_moe.experts.214.w3", "model.layers.46.block_sparse_moe.experts.215.w3", "model.layers.46.block_sparse_moe.experts.216.w3", "model.layers.46.block_sparse_moe.experts.217.w3", "model.layers.46.block_sparse_moe.experts.218.w3", "model.layers.46.block_sparse_moe.experts.219.w3", "model.layers.46.block_sparse_moe.experts.220.w3", "model.layers.46.block_sparse_moe.experts.221.w3", "model.layers.46.block_sparse_moe.experts.222.w3", "model.layers.46.block_sparse_moe.experts.223.w3", "model.layers.46.block_sparse_moe.experts.224.w3", "model.layers.46.block_sparse_moe.experts.225.w3", "model.layers.46.block_sparse_moe.experts.226.w3", "model.layers.46.block_sparse_moe.experts.227.w3", "model.layers.46.block_sparse_moe.experts.228.w3", "model.layers.46.block_sparse_moe.experts.229.w3", "model.layers.46.block_sparse_moe.experts.230.w3", "model.layers.46.block_sparse_moe.experts.231.w3", "model.layers.46.block_sparse_moe.experts.232.w3", "model.layers.46.block_sparse_moe.experts.233.w3", "model.layers.46.block_sparse_moe.experts.234.w3", "model.layers.46.block_sparse_moe.experts.235.w3", "model.layers.46.block_sparse_moe.experts.236.w3", "model.layers.46.block_sparse_moe.experts.237.w3", "model.layers.46.block_sparse_moe.experts.238.w3", "model.layers.46.block_sparse_moe.experts.239.w3", "model.layers.46.block_sparse_moe.experts.240.w3", "model.layers.46.block_sparse_moe.experts.241.w3", "model.layers.46.block_sparse_moe.experts.242.w3", "model.layers.46.block_sparse_moe.experts.243.w3", "model.layers.46.block_sparse_moe.experts.244.w3", "model.layers.46.block_sparse_moe.experts.245.w3", "model.layers.46.block_sparse_moe.experts.246.w3", "model.layers.46.block_sparse_moe.experts.247.w3", "model.layers.46.block_sparse_moe.experts.248.w3", "model.layers.46.block_sparse_moe.experts.249.w3", "model.layers.46.block_sparse_moe.experts.250.w3", "model.layers.46.block_sparse_moe.experts.251.w3", "model.layers.46.block_sparse_moe.experts.252.w3", "model.layers.46.block_sparse_moe.experts.253.w3", "model.layers.46.block_sparse_moe.experts.254.w3", "model.layers.46.block_sparse_moe.experts.255.w3", "model.layers.46.block_sparse_moe.experts.0.w2", "model.layers.46.block_sparse_moe.experts.1.w2", "model.layers.46.block_sparse_moe.experts.2.w2", "model.layers.46.block_sparse_moe.experts.3.w2", "model.layers.46.block_sparse_moe.experts.4.w2", "model.layers.46.block_sparse_moe.experts.5.w2", "model.layers.46.block_sparse_moe.experts.6.w2", "model.layers.46.block_sparse_moe.experts.7.w2", "model.layers.46.block_sparse_moe.experts.8.w2", "model.layers.46.block_sparse_moe.experts.9.w2", "model.layers.46.block_sparse_moe.experts.10.w2", "model.layers.46.block_sparse_moe.experts.11.w2", "model.layers.46.block_sparse_moe.experts.12.w2", "model.layers.46.block_sparse_moe.experts.13.w2", "model.layers.46.block_sparse_moe.experts.14.w2", "model.layers.46.block_sparse_moe.experts.15.w2", "model.layers.46.block_sparse_moe.experts.16.w2", "model.layers.46.block_sparse_moe.experts.17.w2", "model.layers.46.block_sparse_moe.experts.18.w2", "model.layers.46.block_sparse_moe.experts.19.w2", "model.layers.46.block_sparse_moe.experts.20.w2", "model.layers.46.block_sparse_moe.experts.21.w2", "model.layers.46.block_sparse_moe.experts.22.w2", "model.layers.46.block_sparse_moe.experts.23.w2", "model.layers.46.block_sparse_moe.experts.24.w2", "model.layers.46.block_sparse_moe.experts.25.w2", "model.layers.46.block_sparse_moe.experts.26.w2", "model.layers.46.block_sparse_moe.experts.27.w2", "model.layers.46.block_sparse_moe.experts.28.w2", "model.layers.46.block_sparse_moe.experts.29.w2", "model.layers.46.block_sparse_moe.experts.30.w2", "model.layers.46.block_sparse_moe.experts.31.w2", "model.layers.46.block_sparse_moe.experts.32.w2", "model.layers.46.block_sparse_moe.experts.33.w2", "model.layers.46.block_sparse_moe.experts.34.w2", "model.layers.46.block_sparse_moe.experts.35.w2", "model.layers.46.block_sparse_moe.experts.36.w2", "model.layers.46.block_sparse_moe.experts.37.w2", "model.layers.46.block_sparse_moe.experts.38.w2", "model.layers.46.block_sparse_moe.experts.39.w2", "model.layers.46.block_sparse_moe.experts.40.w2", "model.layers.46.block_sparse_moe.experts.41.w2", "model.layers.46.block_sparse_moe.experts.42.w2", "model.layers.46.block_sparse_moe.experts.43.w2", "model.layers.46.block_sparse_moe.experts.44.w2", "model.layers.46.block_sparse_moe.experts.45.w2", "model.layers.46.block_sparse_moe.experts.46.w2", "model.layers.46.block_sparse_moe.experts.47.w2", "model.layers.46.block_sparse_moe.experts.48.w2", "model.layers.46.block_sparse_moe.experts.49.w2", "model.layers.46.block_sparse_moe.experts.50.w2", "model.layers.46.block_sparse_moe.experts.51.w2", "model.layers.46.block_sparse_moe.experts.52.w2", "model.layers.46.block_sparse_moe.experts.53.w2", "model.layers.46.block_sparse_moe.experts.54.w2", "model.layers.46.block_sparse_moe.experts.55.w2", "model.layers.46.block_sparse_moe.experts.56.w2", "model.layers.46.block_sparse_moe.experts.57.w2", "model.layers.46.block_sparse_moe.experts.58.w2", "model.layers.46.block_sparse_moe.experts.59.w2", "model.layers.46.block_sparse_moe.experts.60.w2", "model.layers.46.block_sparse_moe.experts.61.w2", "model.layers.46.block_sparse_moe.experts.62.w2", "model.layers.46.block_sparse_moe.experts.63.w2", "model.layers.46.block_sparse_moe.experts.64.w2", "model.layers.46.block_sparse_moe.experts.65.w2", "model.layers.46.block_sparse_moe.experts.66.w2", "model.layers.46.block_sparse_moe.experts.67.w2", "model.layers.46.block_sparse_moe.experts.68.w2", "model.layers.46.block_sparse_moe.experts.69.w2", "model.layers.46.block_sparse_moe.experts.70.w2", "model.layers.46.block_sparse_moe.experts.71.w2", "model.layers.46.block_sparse_moe.experts.72.w2", "model.layers.46.block_sparse_moe.experts.73.w2", "model.layers.46.block_sparse_moe.experts.74.w2", "model.layers.46.block_sparse_moe.experts.75.w2", "model.layers.46.block_sparse_moe.experts.76.w2", "model.layers.46.block_sparse_moe.experts.77.w2", "model.layers.46.block_sparse_moe.experts.78.w2", "model.layers.46.block_sparse_moe.experts.79.w2", "model.layers.46.block_sparse_moe.experts.80.w2", "model.layers.46.block_sparse_moe.experts.81.w2", "model.layers.46.block_sparse_moe.experts.82.w2", "model.layers.46.block_sparse_moe.experts.83.w2", "model.layers.46.block_sparse_moe.experts.84.w2", "model.layers.46.block_sparse_moe.experts.85.w2", "model.layers.46.block_sparse_moe.experts.86.w2", "model.layers.46.block_sparse_moe.experts.87.w2", "model.layers.46.block_sparse_moe.experts.88.w2", "model.layers.46.block_sparse_moe.experts.89.w2", "model.layers.46.block_sparse_moe.experts.90.w2", "model.layers.46.block_sparse_moe.experts.91.w2", "model.layers.46.block_sparse_moe.experts.92.w2", "model.layers.46.block_sparse_moe.experts.93.w2", "model.layers.46.block_sparse_moe.experts.94.w2", "model.layers.46.block_sparse_moe.experts.95.w2", "model.layers.46.block_sparse_moe.experts.96.w2", "model.layers.46.block_sparse_moe.experts.97.w2", "model.layers.46.block_sparse_moe.experts.98.w2", "model.layers.46.block_sparse_moe.experts.99.w2", "model.layers.46.block_sparse_moe.experts.100.w2", "model.layers.46.block_sparse_moe.experts.101.w2", "model.layers.46.block_sparse_moe.experts.102.w2", "model.layers.46.block_sparse_moe.experts.103.w2", "model.layers.46.block_sparse_moe.experts.104.w2", "model.layers.46.block_sparse_moe.experts.105.w2", "model.layers.46.block_sparse_moe.experts.106.w2", "model.layers.46.block_sparse_moe.experts.107.w2", "model.layers.46.block_sparse_moe.experts.108.w2", "model.layers.46.block_sparse_moe.experts.109.w2", "model.layers.46.block_sparse_moe.experts.110.w2", "model.layers.46.block_sparse_moe.experts.111.w2", "model.layers.46.block_sparse_moe.experts.112.w2", "model.layers.46.block_sparse_moe.experts.113.w2", "model.layers.46.block_sparse_moe.experts.114.w2", "model.layers.46.block_sparse_moe.experts.115.w2", "model.layers.46.block_sparse_moe.experts.116.w2", "model.layers.46.block_sparse_moe.experts.117.w2", "model.layers.46.block_sparse_moe.experts.118.w2", "model.layers.46.block_sparse_moe.experts.119.w2", "model.layers.46.block_sparse_moe.experts.120.w2", "model.layers.46.block_sparse_moe.experts.121.w2", "model.layers.46.block_sparse_moe.experts.122.w2", "model.layers.46.block_sparse_moe.experts.123.w2", "model.layers.46.block_sparse_moe.experts.124.w2", "model.layers.46.block_sparse_moe.experts.125.w2", "model.layers.46.block_sparse_moe.experts.126.w2", "model.layers.46.block_sparse_moe.experts.127.w2", "model.layers.46.block_sparse_moe.experts.128.w2", "model.layers.46.block_sparse_moe.experts.129.w2", "model.layers.46.block_sparse_moe.experts.130.w2", "model.layers.46.block_sparse_moe.experts.131.w2", "model.layers.46.block_sparse_moe.experts.132.w2", "model.layers.46.block_sparse_moe.experts.133.w2", "model.layers.46.block_sparse_moe.experts.134.w2", "model.layers.46.block_sparse_moe.experts.135.w2", "model.layers.46.block_sparse_moe.experts.136.w2", "model.layers.46.block_sparse_moe.experts.137.w2", "model.layers.46.block_sparse_moe.experts.138.w2", "model.layers.46.block_sparse_moe.experts.139.w2", "model.layers.46.block_sparse_moe.experts.140.w2", "model.layers.46.block_sparse_moe.experts.141.w2", "model.layers.46.block_sparse_moe.experts.142.w2", "model.layers.46.block_sparse_moe.experts.143.w2", "model.layers.46.block_sparse_moe.experts.144.w2", "model.layers.46.block_sparse_moe.experts.145.w2", "model.layers.46.block_sparse_moe.experts.146.w2", "model.layers.46.block_sparse_moe.experts.147.w2", "model.layers.46.block_sparse_moe.experts.148.w2", "model.layers.46.block_sparse_moe.experts.149.w2", "model.layers.46.block_sparse_moe.experts.150.w2", "model.layers.46.block_sparse_moe.experts.151.w2", "model.layers.46.block_sparse_moe.experts.152.w2", "model.layers.46.block_sparse_moe.experts.153.w2", "model.layers.46.block_sparse_moe.experts.154.w2", "model.layers.46.block_sparse_moe.experts.155.w2", "model.layers.46.block_sparse_moe.experts.156.w2", "model.layers.46.block_sparse_moe.experts.157.w2", "model.layers.46.block_sparse_moe.experts.158.w2", "model.layers.46.block_sparse_moe.experts.159.w2", "model.layers.46.block_sparse_moe.experts.160.w2", "model.layers.46.block_sparse_moe.experts.161.w2", "model.layers.46.block_sparse_moe.experts.162.w2", "model.layers.46.block_sparse_moe.experts.163.w2", "model.layers.46.block_sparse_moe.experts.164.w2", "model.layers.46.block_sparse_moe.experts.165.w2", "model.layers.46.block_sparse_moe.experts.166.w2", "model.layers.46.block_sparse_moe.experts.167.w2", "model.layers.46.block_sparse_moe.experts.168.w2", "model.layers.46.block_sparse_moe.experts.169.w2", "model.layers.46.block_sparse_moe.experts.170.w2", "model.layers.46.block_sparse_moe.experts.171.w2", "model.layers.46.block_sparse_moe.experts.172.w2", "model.layers.46.block_sparse_moe.experts.173.w2", "model.layers.46.block_sparse_moe.experts.174.w2", "model.layers.46.block_sparse_moe.experts.175.w2", "model.layers.46.block_sparse_moe.experts.176.w2", "model.layers.46.block_sparse_moe.experts.177.w2", "model.layers.46.block_sparse_moe.experts.178.w2", "model.layers.46.block_sparse_moe.experts.179.w2", "model.layers.46.block_sparse_moe.experts.180.w2", "model.layers.46.block_sparse_moe.experts.181.w2", "model.layers.46.block_sparse_moe.experts.182.w2", "model.layers.46.block_sparse_moe.experts.183.w2", "model.layers.46.block_sparse_moe.experts.184.w2", "model.layers.46.block_sparse_moe.experts.185.w2", "model.layers.46.block_sparse_moe.experts.186.w2", "model.layers.46.block_sparse_moe.experts.187.w2", "model.layers.46.block_sparse_moe.experts.188.w2", "model.layers.46.block_sparse_moe.experts.189.w2", "model.layers.46.block_sparse_moe.experts.190.w2", "model.layers.46.block_sparse_moe.experts.191.w2", "model.layers.46.block_sparse_moe.experts.192.w2", "model.layers.46.block_sparse_moe.experts.193.w2", "model.layers.46.block_sparse_moe.experts.194.w2", "model.layers.46.block_sparse_moe.experts.195.w2", "model.layers.46.block_sparse_moe.experts.196.w2", "model.layers.46.block_sparse_moe.experts.197.w2", "model.layers.46.block_sparse_moe.experts.198.w2", "model.layers.46.block_sparse_moe.experts.199.w2", "model.layers.46.block_sparse_moe.experts.200.w2", "model.layers.46.block_sparse_moe.experts.201.w2", "model.layers.46.block_sparse_moe.experts.202.w2", "model.layers.46.block_sparse_moe.experts.203.w2", "model.layers.46.block_sparse_moe.experts.204.w2", "model.layers.46.block_sparse_moe.experts.205.w2", "model.layers.46.block_sparse_moe.experts.206.w2", "model.layers.46.block_sparse_moe.experts.207.w2", "model.layers.46.block_sparse_moe.experts.208.w2", "model.layers.46.block_sparse_moe.experts.209.w2", "model.layers.46.block_sparse_moe.experts.210.w2", "model.layers.46.block_sparse_moe.experts.211.w2", "model.layers.46.block_sparse_moe.experts.212.w2", "model.layers.46.block_sparse_moe.experts.213.w2", "model.layers.46.block_sparse_moe.experts.214.w2", "model.layers.46.block_sparse_moe.experts.215.w2", "model.layers.46.block_sparse_moe.experts.216.w2", "model.layers.46.block_sparse_moe.experts.217.w2", "model.layers.46.block_sparse_moe.experts.218.w2", "model.layers.46.block_sparse_moe.experts.219.w2", "model.layers.46.block_sparse_moe.experts.220.w2", "model.layers.46.block_sparse_moe.experts.221.w2", "model.layers.46.block_sparse_moe.experts.222.w2", "model.layers.46.block_sparse_moe.experts.223.w2", "model.layers.46.block_sparse_moe.experts.224.w2", "model.layers.46.block_sparse_moe.experts.225.w2", "model.layers.46.block_sparse_moe.experts.226.w2", "model.layers.46.block_sparse_moe.experts.227.w2", "model.layers.46.block_sparse_moe.experts.228.w2", "model.layers.46.block_sparse_moe.experts.229.w2", "model.layers.46.block_sparse_moe.experts.230.w2", "model.layers.46.block_sparse_moe.experts.231.w2", "model.layers.46.block_sparse_moe.experts.232.w2", "model.layers.46.block_sparse_moe.experts.233.w2", "model.layers.46.block_sparse_moe.experts.234.w2", "model.layers.46.block_sparse_moe.experts.235.w2", "model.layers.46.block_sparse_moe.experts.236.w2", "model.layers.46.block_sparse_moe.experts.237.w2", "model.layers.46.block_sparse_moe.experts.238.w2", "model.layers.46.block_sparse_moe.experts.239.w2", "model.layers.46.block_sparse_moe.experts.240.w2", "model.layers.46.block_sparse_moe.experts.241.w2", "model.layers.46.block_sparse_moe.experts.242.w2", "model.layers.46.block_sparse_moe.experts.243.w2", "model.layers.46.block_sparse_moe.experts.244.w2", "model.layers.46.block_sparse_moe.experts.245.w2", "model.layers.46.block_sparse_moe.experts.246.w2", "model.layers.46.block_sparse_moe.experts.247.w2", "model.layers.46.block_sparse_moe.experts.248.w2", "model.layers.46.block_sparse_moe.experts.249.w2", "model.layers.46.block_sparse_moe.experts.250.w2", "model.layers.46.block_sparse_moe.experts.251.w2", "model.layers.46.block_sparse_moe.experts.252.w2", "model.layers.46.block_sparse_moe.experts.253.w2", "model.layers.46.block_sparse_moe.experts.254.w2", "model.layers.46.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 1.7035938799370154e-05, "dbits": 3623878656 } ] }, { "idx": 94, "layers": [ "model.layers.47.self_attn.q_proj", "model.layers.47.self_attn.k_proj", "model.layers.47.self_attn.v_proj", "model.layers.47.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004019751958549034, "dbits": 44040192 } ] }, { "idx": 95, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w1", "model.layers.47.block_sparse_moe.experts.1.w1", "model.layers.47.block_sparse_moe.experts.2.w1", "model.layers.47.block_sparse_moe.experts.3.w1", "model.layers.47.block_sparse_moe.experts.4.w1", "model.layers.47.block_sparse_moe.experts.5.w1", "model.layers.47.block_sparse_moe.experts.6.w1", "model.layers.47.block_sparse_moe.experts.7.w1", "model.layers.47.block_sparse_moe.experts.8.w1", "model.layers.47.block_sparse_moe.experts.9.w1", "model.layers.47.block_sparse_moe.experts.10.w1", "model.layers.47.block_sparse_moe.experts.11.w1", "model.layers.47.block_sparse_moe.experts.12.w1", "model.layers.47.block_sparse_moe.experts.13.w1", "model.layers.47.block_sparse_moe.experts.14.w1", "model.layers.47.block_sparse_moe.experts.15.w1", "model.layers.47.block_sparse_moe.experts.16.w1", "model.layers.47.block_sparse_moe.experts.17.w1", "model.layers.47.block_sparse_moe.experts.18.w1", "model.layers.47.block_sparse_moe.experts.19.w1", "model.layers.47.block_sparse_moe.experts.20.w1", "model.layers.47.block_sparse_moe.experts.21.w1", "model.layers.47.block_sparse_moe.experts.22.w1", "model.layers.47.block_sparse_moe.experts.23.w1", "model.layers.47.block_sparse_moe.experts.24.w1", "model.layers.47.block_sparse_moe.experts.25.w1", "model.layers.47.block_sparse_moe.experts.26.w1", "model.layers.47.block_sparse_moe.experts.27.w1", "model.layers.47.block_sparse_moe.experts.28.w1", "model.layers.47.block_sparse_moe.experts.29.w1", "model.layers.47.block_sparse_moe.experts.30.w1", "model.layers.47.block_sparse_moe.experts.31.w1", "model.layers.47.block_sparse_moe.experts.32.w1", "model.layers.47.block_sparse_moe.experts.33.w1", "model.layers.47.block_sparse_moe.experts.34.w1", "model.layers.47.block_sparse_moe.experts.35.w1", "model.layers.47.block_sparse_moe.experts.36.w1", "model.layers.47.block_sparse_moe.experts.37.w1", "model.layers.47.block_sparse_moe.experts.38.w1", "model.layers.47.block_sparse_moe.experts.39.w1", "model.layers.47.block_sparse_moe.experts.40.w1", "model.layers.47.block_sparse_moe.experts.41.w1", "model.layers.47.block_sparse_moe.experts.42.w1", "model.layers.47.block_sparse_moe.experts.43.w1", "model.layers.47.block_sparse_moe.experts.44.w1", "model.layers.47.block_sparse_moe.experts.45.w1", "model.layers.47.block_sparse_moe.experts.46.w1", "model.layers.47.block_sparse_moe.experts.47.w1", "model.layers.47.block_sparse_moe.experts.48.w1", "model.layers.47.block_sparse_moe.experts.49.w1", "model.layers.47.block_sparse_moe.experts.50.w1", "model.layers.47.block_sparse_moe.experts.51.w1", "model.layers.47.block_sparse_moe.experts.52.w1", "model.layers.47.block_sparse_moe.experts.53.w1", "model.layers.47.block_sparse_moe.experts.54.w1", "model.layers.47.block_sparse_moe.experts.55.w1", "model.layers.47.block_sparse_moe.experts.56.w1", "model.layers.47.block_sparse_moe.experts.57.w1", "model.layers.47.block_sparse_moe.experts.58.w1", "model.layers.47.block_sparse_moe.experts.59.w1", "model.layers.47.block_sparse_moe.experts.60.w1", "model.layers.47.block_sparse_moe.experts.61.w1", "model.layers.47.block_sparse_moe.experts.62.w1", "model.layers.47.block_sparse_moe.experts.63.w1", "model.layers.47.block_sparse_moe.experts.64.w1", "model.layers.47.block_sparse_moe.experts.65.w1", "model.layers.47.block_sparse_moe.experts.66.w1", "model.layers.47.block_sparse_moe.experts.67.w1", "model.layers.47.block_sparse_moe.experts.68.w1", "model.layers.47.block_sparse_moe.experts.69.w1", "model.layers.47.block_sparse_moe.experts.70.w1", "model.layers.47.block_sparse_moe.experts.71.w1", "model.layers.47.block_sparse_moe.experts.72.w1", "model.layers.47.block_sparse_moe.experts.73.w1", "model.layers.47.block_sparse_moe.experts.74.w1", "model.layers.47.block_sparse_moe.experts.75.w1", "model.layers.47.block_sparse_moe.experts.76.w1", "model.layers.47.block_sparse_moe.experts.77.w1", "model.layers.47.block_sparse_moe.experts.78.w1", "model.layers.47.block_sparse_moe.experts.79.w1", "model.layers.47.block_sparse_moe.experts.80.w1", "model.layers.47.block_sparse_moe.experts.81.w1", "model.layers.47.block_sparse_moe.experts.82.w1", "model.layers.47.block_sparse_moe.experts.83.w1", "model.layers.47.block_sparse_moe.experts.84.w1", "model.layers.47.block_sparse_moe.experts.85.w1", "model.layers.47.block_sparse_moe.experts.86.w1", "model.layers.47.block_sparse_moe.experts.87.w1", "model.layers.47.block_sparse_moe.experts.88.w1", "model.layers.47.block_sparse_moe.experts.89.w1", "model.layers.47.block_sparse_moe.experts.90.w1", "model.layers.47.block_sparse_moe.experts.91.w1", "model.layers.47.block_sparse_moe.experts.92.w1", "model.layers.47.block_sparse_moe.experts.93.w1", "model.layers.47.block_sparse_moe.experts.94.w1", "model.layers.47.block_sparse_moe.experts.95.w1", "model.layers.47.block_sparse_moe.experts.96.w1", "model.layers.47.block_sparse_moe.experts.97.w1", "model.layers.47.block_sparse_moe.experts.98.w1", "model.layers.47.block_sparse_moe.experts.99.w1", "model.layers.47.block_sparse_moe.experts.100.w1", "model.layers.47.block_sparse_moe.experts.101.w1", "model.layers.47.block_sparse_moe.experts.102.w1", "model.layers.47.block_sparse_moe.experts.103.w1", "model.layers.47.block_sparse_moe.experts.104.w1", "model.layers.47.block_sparse_moe.experts.105.w1", "model.layers.47.block_sparse_moe.experts.106.w1", "model.layers.47.block_sparse_moe.experts.107.w1", "model.layers.47.block_sparse_moe.experts.108.w1", "model.layers.47.block_sparse_moe.experts.109.w1", "model.layers.47.block_sparse_moe.experts.110.w1", "model.layers.47.block_sparse_moe.experts.111.w1", "model.layers.47.block_sparse_moe.experts.112.w1", "model.layers.47.block_sparse_moe.experts.113.w1", "model.layers.47.block_sparse_moe.experts.114.w1", "model.layers.47.block_sparse_moe.experts.115.w1", "model.layers.47.block_sparse_moe.experts.116.w1", "model.layers.47.block_sparse_moe.experts.117.w1", "model.layers.47.block_sparse_moe.experts.118.w1", "model.layers.47.block_sparse_moe.experts.119.w1", "model.layers.47.block_sparse_moe.experts.120.w1", "model.layers.47.block_sparse_moe.experts.121.w1", "model.layers.47.block_sparse_moe.experts.122.w1", "model.layers.47.block_sparse_moe.experts.123.w1", "model.layers.47.block_sparse_moe.experts.124.w1", "model.layers.47.block_sparse_moe.experts.125.w1", "model.layers.47.block_sparse_moe.experts.126.w1", "model.layers.47.block_sparse_moe.experts.127.w1", "model.layers.47.block_sparse_moe.experts.128.w1", "model.layers.47.block_sparse_moe.experts.129.w1", "model.layers.47.block_sparse_moe.experts.130.w1", "model.layers.47.block_sparse_moe.experts.131.w1", "model.layers.47.block_sparse_moe.experts.132.w1", "model.layers.47.block_sparse_moe.experts.133.w1", "model.layers.47.block_sparse_moe.experts.134.w1", "model.layers.47.block_sparse_moe.experts.135.w1", "model.layers.47.block_sparse_moe.experts.136.w1", "model.layers.47.block_sparse_moe.experts.137.w1", "model.layers.47.block_sparse_moe.experts.138.w1", "model.layers.47.block_sparse_moe.experts.139.w1", "model.layers.47.block_sparse_moe.experts.140.w1", "model.layers.47.block_sparse_moe.experts.141.w1", "model.layers.47.block_sparse_moe.experts.142.w1", "model.layers.47.block_sparse_moe.experts.143.w1", "model.layers.47.block_sparse_moe.experts.144.w1", "model.layers.47.block_sparse_moe.experts.145.w1", "model.layers.47.block_sparse_moe.experts.146.w1", "model.layers.47.block_sparse_moe.experts.147.w1", "model.layers.47.block_sparse_moe.experts.148.w1", "model.layers.47.block_sparse_moe.experts.149.w1", "model.layers.47.block_sparse_moe.experts.150.w1", "model.layers.47.block_sparse_moe.experts.151.w1", "model.layers.47.block_sparse_moe.experts.152.w1", "model.layers.47.block_sparse_moe.experts.153.w1", "model.layers.47.block_sparse_moe.experts.154.w1", "model.layers.47.block_sparse_moe.experts.155.w1", "model.layers.47.block_sparse_moe.experts.156.w1", "model.layers.47.block_sparse_moe.experts.157.w1", "model.layers.47.block_sparse_moe.experts.158.w1", "model.layers.47.block_sparse_moe.experts.159.w1", "model.layers.47.block_sparse_moe.experts.160.w1", "model.layers.47.block_sparse_moe.experts.161.w1", "model.layers.47.block_sparse_moe.experts.162.w1", "model.layers.47.block_sparse_moe.experts.163.w1", "model.layers.47.block_sparse_moe.experts.164.w1", "model.layers.47.block_sparse_moe.experts.165.w1", "model.layers.47.block_sparse_moe.experts.166.w1", "model.layers.47.block_sparse_moe.experts.167.w1", "model.layers.47.block_sparse_moe.experts.168.w1", "model.layers.47.block_sparse_moe.experts.169.w1", "model.layers.47.block_sparse_moe.experts.170.w1", "model.layers.47.block_sparse_moe.experts.171.w1", "model.layers.47.block_sparse_moe.experts.172.w1", "model.layers.47.block_sparse_moe.experts.173.w1", "model.layers.47.block_sparse_moe.experts.174.w1", "model.layers.47.block_sparse_moe.experts.175.w1", "model.layers.47.block_sparse_moe.experts.176.w1", "model.layers.47.block_sparse_moe.experts.177.w1", "model.layers.47.block_sparse_moe.experts.178.w1", "model.layers.47.block_sparse_moe.experts.179.w1", "model.layers.47.block_sparse_moe.experts.180.w1", "model.layers.47.block_sparse_moe.experts.181.w1", "model.layers.47.block_sparse_moe.experts.182.w1", "model.layers.47.block_sparse_moe.experts.183.w1", "model.layers.47.block_sparse_moe.experts.184.w1", "model.layers.47.block_sparse_moe.experts.185.w1", "model.layers.47.block_sparse_moe.experts.186.w1", "model.layers.47.block_sparse_moe.experts.187.w1", "model.layers.47.block_sparse_moe.experts.188.w1", "model.layers.47.block_sparse_moe.experts.189.w1", "model.layers.47.block_sparse_moe.experts.190.w1", "model.layers.47.block_sparse_moe.experts.191.w1", "model.layers.47.block_sparse_moe.experts.192.w1", "model.layers.47.block_sparse_moe.experts.193.w1", "model.layers.47.block_sparse_moe.experts.194.w1", "model.layers.47.block_sparse_moe.experts.195.w1", "model.layers.47.block_sparse_moe.experts.196.w1", "model.layers.47.block_sparse_moe.experts.197.w1", "model.layers.47.block_sparse_moe.experts.198.w1", "model.layers.47.block_sparse_moe.experts.199.w1", "model.layers.47.block_sparse_moe.experts.200.w1", "model.layers.47.block_sparse_moe.experts.201.w1", "model.layers.47.block_sparse_moe.experts.202.w1", "model.layers.47.block_sparse_moe.experts.203.w1", "model.layers.47.block_sparse_moe.experts.204.w1", "model.layers.47.block_sparse_moe.experts.205.w1", "model.layers.47.block_sparse_moe.experts.206.w1", "model.layers.47.block_sparse_moe.experts.207.w1", "model.layers.47.block_sparse_moe.experts.208.w1", "model.layers.47.block_sparse_moe.experts.209.w1", "model.layers.47.block_sparse_moe.experts.210.w1", "model.layers.47.block_sparse_moe.experts.211.w1", "model.layers.47.block_sparse_moe.experts.212.w1", "model.layers.47.block_sparse_moe.experts.213.w1", "model.layers.47.block_sparse_moe.experts.214.w1", "model.layers.47.block_sparse_moe.experts.215.w1", "model.layers.47.block_sparse_moe.experts.216.w1", "model.layers.47.block_sparse_moe.experts.217.w1", "model.layers.47.block_sparse_moe.experts.218.w1", "model.layers.47.block_sparse_moe.experts.219.w1", "model.layers.47.block_sparse_moe.experts.220.w1", "model.layers.47.block_sparse_moe.experts.221.w1", "model.layers.47.block_sparse_moe.experts.222.w1", "model.layers.47.block_sparse_moe.experts.223.w1", "model.layers.47.block_sparse_moe.experts.224.w1", "model.layers.47.block_sparse_moe.experts.225.w1", "model.layers.47.block_sparse_moe.experts.226.w1", "model.layers.47.block_sparse_moe.experts.227.w1", "model.layers.47.block_sparse_moe.experts.228.w1", "model.layers.47.block_sparse_moe.experts.229.w1", "model.layers.47.block_sparse_moe.experts.230.w1", "model.layers.47.block_sparse_moe.experts.231.w1", "model.layers.47.block_sparse_moe.experts.232.w1", "model.layers.47.block_sparse_moe.experts.233.w1", "model.layers.47.block_sparse_moe.experts.234.w1", "model.layers.47.block_sparse_moe.experts.235.w1", "model.layers.47.block_sparse_moe.experts.236.w1", "model.layers.47.block_sparse_moe.experts.237.w1", "model.layers.47.block_sparse_moe.experts.238.w1", "model.layers.47.block_sparse_moe.experts.239.w1", "model.layers.47.block_sparse_moe.experts.240.w1", "model.layers.47.block_sparse_moe.experts.241.w1", "model.layers.47.block_sparse_moe.experts.242.w1", "model.layers.47.block_sparse_moe.experts.243.w1", "model.layers.47.block_sparse_moe.experts.244.w1", "model.layers.47.block_sparse_moe.experts.245.w1", "model.layers.47.block_sparse_moe.experts.246.w1", "model.layers.47.block_sparse_moe.experts.247.w1", "model.layers.47.block_sparse_moe.experts.248.w1", "model.layers.47.block_sparse_moe.experts.249.w1", "model.layers.47.block_sparse_moe.experts.250.w1", "model.layers.47.block_sparse_moe.experts.251.w1", "model.layers.47.block_sparse_moe.experts.252.w1", "model.layers.47.block_sparse_moe.experts.253.w1", "model.layers.47.block_sparse_moe.experts.254.w1", "model.layers.47.block_sparse_moe.experts.255.w1", "model.layers.47.block_sparse_moe.experts.0.w3", "model.layers.47.block_sparse_moe.experts.1.w3", "model.layers.47.block_sparse_moe.experts.2.w3", "model.layers.47.block_sparse_moe.experts.3.w3", "model.layers.47.block_sparse_moe.experts.4.w3", "model.layers.47.block_sparse_moe.experts.5.w3", "model.layers.47.block_sparse_moe.experts.6.w3", "model.layers.47.block_sparse_moe.experts.7.w3", "model.layers.47.block_sparse_moe.experts.8.w3", "model.layers.47.block_sparse_moe.experts.9.w3", "model.layers.47.block_sparse_moe.experts.10.w3", "model.layers.47.block_sparse_moe.experts.11.w3", "model.layers.47.block_sparse_moe.experts.12.w3", "model.layers.47.block_sparse_moe.experts.13.w3", "model.layers.47.block_sparse_moe.experts.14.w3", "model.layers.47.block_sparse_moe.experts.15.w3", "model.layers.47.block_sparse_moe.experts.16.w3", "model.layers.47.block_sparse_moe.experts.17.w3", "model.layers.47.block_sparse_moe.experts.18.w3", "model.layers.47.block_sparse_moe.experts.19.w3", "model.layers.47.block_sparse_moe.experts.20.w3", "model.layers.47.block_sparse_moe.experts.21.w3", "model.layers.47.block_sparse_moe.experts.22.w3", "model.layers.47.block_sparse_moe.experts.23.w3", "model.layers.47.block_sparse_moe.experts.24.w3", "model.layers.47.block_sparse_moe.experts.25.w3", "model.layers.47.block_sparse_moe.experts.26.w3", "model.layers.47.block_sparse_moe.experts.27.w3", "model.layers.47.block_sparse_moe.experts.28.w3", "model.layers.47.block_sparse_moe.experts.29.w3", "model.layers.47.block_sparse_moe.experts.30.w3", "model.layers.47.block_sparse_moe.experts.31.w3", "model.layers.47.block_sparse_moe.experts.32.w3", "model.layers.47.block_sparse_moe.experts.33.w3", "model.layers.47.block_sparse_moe.experts.34.w3", "model.layers.47.block_sparse_moe.experts.35.w3", "model.layers.47.block_sparse_moe.experts.36.w3", "model.layers.47.block_sparse_moe.experts.37.w3", "model.layers.47.block_sparse_moe.experts.38.w3", "model.layers.47.block_sparse_moe.experts.39.w3", "model.layers.47.block_sparse_moe.experts.40.w3", "model.layers.47.block_sparse_moe.experts.41.w3", "model.layers.47.block_sparse_moe.experts.42.w3", "model.layers.47.block_sparse_moe.experts.43.w3", "model.layers.47.block_sparse_moe.experts.44.w3", "model.layers.47.block_sparse_moe.experts.45.w3", "model.layers.47.block_sparse_moe.experts.46.w3", "model.layers.47.block_sparse_moe.experts.47.w3", "model.layers.47.block_sparse_moe.experts.48.w3", "model.layers.47.block_sparse_moe.experts.49.w3", "model.layers.47.block_sparse_moe.experts.50.w3", "model.layers.47.block_sparse_moe.experts.51.w3", "model.layers.47.block_sparse_moe.experts.52.w3", "model.layers.47.block_sparse_moe.experts.53.w3", "model.layers.47.block_sparse_moe.experts.54.w3", "model.layers.47.block_sparse_moe.experts.55.w3", "model.layers.47.block_sparse_moe.experts.56.w3", "model.layers.47.block_sparse_moe.experts.57.w3", "model.layers.47.block_sparse_moe.experts.58.w3", "model.layers.47.block_sparse_moe.experts.59.w3", "model.layers.47.block_sparse_moe.experts.60.w3", "model.layers.47.block_sparse_moe.experts.61.w3", "model.layers.47.block_sparse_moe.experts.62.w3", "model.layers.47.block_sparse_moe.experts.63.w3", "model.layers.47.block_sparse_moe.experts.64.w3", "model.layers.47.block_sparse_moe.experts.65.w3", "model.layers.47.block_sparse_moe.experts.66.w3", "model.layers.47.block_sparse_moe.experts.67.w3", "model.layers.47.block_sparse_moe.experts.68.w3", "model.layers.47.block_sparse_moe.experts.69.w3", "model.layers.47.block_sparse_moe.experts.70.w3", "model.layers.47.block_sparse_moe.experts.71.w3", "model.layers.47.block_sparse_moe.experts.72.w3", "model.layers.47.block_sparse_moe.experts.73.w3", "model.layers.47.block_sparse_moe.experts.74.w3", "model.layers.47.block_sparse_moe.experts.75.w3", "model.layers.47.block_sparse_moe.experts.76.w3", "model.layers.47.block_sparse_moe.experts.77.w3", "model.layers.47.block_sparse_moe.experts.78.w3", "model.layers.47.block_sparse_moe.experts.79.w3", "model.layers.47.block_sparse_moe.experts.80.w3", "model.layers.47.block_sparse_moe.experts.81.w3", "model.layers.47.block_sparse_moe.experts.82.w3", "model.layers.47.block_sparse_moe.experts.83.w3", "model.layers.47.block_sparse_moe.experts.84.w3", "model.layers.47.block_sparse_moe.experts.85.w3", "model.layers.47.block_sparse_moe.experts.86.w3", "model.layers.47.block_sparse_moe.experts.87.w3", "model.layers.47.block_sparse_moe.experts.88.w3", "model.layers.47.block_sparse_moe.experts.89.w3", "model.layers.47.block_sparse_moe.experts.90.w3", "model.layers.47.block_sparse_moe.experts.91.w3", "model.layers.47.block_sparse_moe.experts.92.w3", "model.layers.47.block_sparse_moe.experts.93.w3", "model.layers.47.block_sparse_moe.experts.94.w3", "model.layers.47.block_sparse_moe.experts.95.w3", "model.layers.47.block_sparse_moe.experts.96.w3", "model.layers.47.block_sparse_moe.experts.97.w3", "model.layers.47.block_sparse_moe.experts.98.w3", "model.layers.47.block_sparse_moe.experts.99.w3", "model.layers.47.block_sparse_moe.experts.100.w3", "model.layers.47.block_sparse_moe.experts.101.w3", "model.layers.47.block_sparse_moe.experts.102.w3", "model.layers.47.block_sparse_moe.experts.103.w3", "model.layers.47.block_sparse_moe.experts.104.w3", "model.layers.47.block_sparse_moe.experts.105.w3", "model.layers.47.block_sparse_moe.experts.106.w3", "model.layers.47.block_sparse_moe.experts.107.w3", "model.layers.47.block_sparse_moe.experts.108.w3", "model.layers.47.block_sparse_moe.experts.109.w3", "model.layers.47.block_sparse_moe.experts.110.w3", "model.layers.47.block_sparse_moe.experts.111.w3", "model.layers.47.block_sparse_moe.experts.112.w3", "model.layers.47.block_sparse_moe.experts.113.w3", "model.layers.47.block_sparse_moe.experts.114.w3", "model.layers.47.block_sparse_moe.experts.115.w3", "model.layers.47.block_sparse_moe.experts.116.w3", "model.layers.47.block_sparse_moe.experts.117.w3", "model.layers.47.block_sparse_moe.experts.118.w3", "model.layers.47.block_sparse_moe.experts.119.w3", "model.layers.47.block_sparse_moe.experts.120.w3", "model.layers.47.block_sparse_moe.experts.121.w3", "model.layers.47.block_sparse_moe.experts.122.w3", "model.layers.47.block_sparse_moe.experts.123.w3", "model.layers.47.block_sparse_moe.experts.124.w3", "model.layers.47.block_sparse_moe.experts.125.w3", "model.layers.47.block_sparse_moe.experts.126.w3", "model.layers.47.block_sparse_moe.experts.127.w3", "model.layers.47.block_sparse_moe.experts.128.w3", "model.layers.47.block_sparse_moe.experts.129.w3", "model.layers.47.block_sparse_moe.experts.130.w3", "model.layers.47.block_sparse_moe.experts.131.w3", "model.layers.47.block_sparse_moe.experts.132.w3", "model.layers.47.block_sparse_moe.experts.133.w3", "model.layers.47.block_sparse_moe.experts.134.w3", "model.layers.47.block_sparse_moe.experts.135.w3", "model.layers.47.block_sparse_moe.experts.136.w3", "model.layers.47.block_sparse_moe.experts.137.w3", "model.layers.47.block_sparse_moe.experts.138.w3", "model.layers.47.block_sparse_moe.experts.139.w3", "model.layers.47.block_sparse_moe.experts.140.w3", "model.layers.47.block_sparse_moe.experts.141.w3", "model.layers.47.block_sparse_moe.experts.142.w3", "model.layers.47.block_sparse_moe.experts.143.w3", "model.layers.47.block_sparse_moe.experts.144.w3", "model.layers.47.block_sparse_moe.experts.145.w3", "model.layers.47.block_sparse_moe.experts.146.w3", "model.layers.47.block_sparse_moe.experts.147.w3", "model.layers.47.block_sparse_moe.experts.148.w3", "model.layers.47.block_sparse_moe.experts.149.w3", "model.layers.47.block_sparse_moe.experts.150.w3", "model.layers.47.block_sparse_moe.experts.151.w3", "model.layers.47.block_sparse_moe.experts.152.w3", "model.layers.47.block_sparse_moe.experts.153.w3", "model.layers.47.block_sparse_moe.experts.154.w3", "model.layers.47.block_sparse_moe.experts.155.w3", "model.layers.47.block_sparse_moe.experts.156.w3", "model.layers.47.block_sparse_moe.experts.157.w3", "model.layers.47.block_sparse_moe.experts.158.w3", "model.layers.47.block_sparse_moe.experts.159.w3", "model.layers.47.block_sparse_moe.experts.160.w3", "model.layers.47.block_sparse_moe.experts.161.w3", "model.layers.47.block_sparse_moe.experts.162.w3", "model.layers.47.block_sparse_moe.experts.163.w3", "model.layers.47.block_sparse_moe.experts.164.w3", "model.layers.47.block_sparse_moe.experts.165.w3", "model.layers.47.block_sparse_moe.experts.166.w3", "model.layers.47.block_sparse_moe.experts.167.w3", "model.layers.47.block_sparse_moe.experts.168.w3", "model.layers.47.block_sparse_moe.experts.169.w3", "model.layers.47.block_sparse_moe.experts.170.w3", "model.layers.47.block_sparse_moe.experts.171.w3", "model.layers.47.block_sparse_moe.experts.172.w3", "model.layers.47.block_sparse_moe.experts.173.w3", "model.layers.47.block_sparse_moe.experts.174.w3", "model.layers.47.block_sparse_moe.experts.175.w3", "model.layers.47.block_sparse_moe.experts.176.w3", "model.layers.47.block_sparse_moe.experts.177.w3", "model.layers.47.block_sparse_moe.experts.178.w3", "model.layers.47.block_sparse_moe.experts.179.w3", "model.layers.47.block_sparse_moe.experts.180.w3", "model.layers.47.block_sparse_moe.experts.181.w3", "model.layers.47.block_sparse_moe.experts.182.w3", "model.layers.47.block_sparse_moe.experts.183.w3", "model.layers.47.block_sparse_moe.experts.184.w3", "model.layers.47.block_sparse_moe.experts.185.w3", "model.layers.47.block_sparse_moe.experts.186.w3", "model.layers.47.block_sparse_moe.experts.187.w3", "model.layers.47.block_sparse_moe.experts.188.w3", "model.layers.47.block_sparse_moe.experts.189.w3", "model.layers.47.block_sparse_moe.experts.190.w3", "model.layers.47.block_sparse_moe.experts.191.w3", "model.layers.47.block_sparse_moe.experts.192.w3", "model.layers.47.block_sparse_moe.experts.193.w3", "model.layers.47.block_sparse_moe.experts.194.w3", "model.layers.47.block_sparse_moe.experts.195.w3", "model.layers.47.block_sparse_moe.experts.196.w3", "model.layers.47.block_sparse_moe.experts.197.w3", "model.layers.47.block_sparse_moe.experts.198.w3", "model.layers.47.block_sparse_moe.experts.199.w3", "model.layers.47.block_sparse_moe.experts.200.w3", "model.layers.47.block_sparse_moe.experts.201.w3", "model.layers.47.block_sparse_moe.experts.202.w3", "model.layers.47.block_sparse_moe.experts.203.w3", "model.layers.47.block_sparse_moe.experts.204.w3", "model.layers.47.block_sparse_moe.experts.205.w3", "model.layers.47.block_sparse_moe.experts.206.w3", "model.layers.47.block_sparse_moe.experts.207.w3", "model.layers.47.block_sparse_moe.experts.208.w3", "model.layers.47.block_sparse_moe.experts.209.w3", "model.layers.47.block_sparse_moe.experts.210.w3", "model.layers.47.block_sparse_moe.experts.211.w3", "model.layers.47.block_sparse_moe.experts.212.w3", "model.layers.47.block_sparse_moe.experts.213.w3", "model.layers.47.block_sparse_moe.experts.214.w3", "model.layers.47.block_sparse_moe.experts.215.w3", "model.layers.47.block_sparse_moe.experts.216.w3", "model.layers.47.block_sparse_moe.experts.217.w3", "model.layers.47.block_sparse_moe.experts.218.w3", "model.layers.47.block_sparse_moe.experts.219.w3", "model.layers.47.block_sparse_moe.experts.220.w3", "model.layers.47.block_sparse_moe.experts.221.w3", "model.layers.47.block_sparse_moe.experts.222.w3", "model.layers.47.block_sparse_moe.experts.223.w3", "model.layers.47.block_sparse_moe.experts.224.w3", "model.layers.47.block_sparse_moe.experts.225.w3", "model.layers.47.block_sparse_moe.experts.226.w3", "model.layers.47.block_sparse_moe.experts.227.w3", "model.layers.47.block_sparse_moe.experts.228.w3", "model.layers.47.block_sparse_moe.experts.229.w3", "model.layers.47.block_sparse_moe.experts.230.w3", "model.layers.47.block_sparse_moe.experts.231.w3", "model.layers.47.block_sparse_moe.experts.232.w3", "model.layers.47.block_sparse_moe.experts.233.w3", "model.layers.47.block_sparse_moe.experts.234.w3", "model.layers.47.block_sparse_moe.experts.235.w3", "model.layers.47.block_sparse_moe.experts.236.w3", "model.layers.47.block_sparse_moe.experts.237.w3", "model.layers.47.block_sparse_moe.experts.238.w3", "model.layers.47.block_sparse_moe.experts.239.w3", "model.layers.47.block_sparse_moe.experts.240.w3", "model.layers.47.block_sparse_moe.experts.241.w3", "model.layers.47.block_sparse_moe.experts.242.w3", "model.layers.47.block_sparse_moe.experts.243.w3", "model.layers.47.block_sparse_moe.experts.244.w3", "model.layers.47.block_sparse_moe.experts.245.w3", "model.layers.47.block_sparse_moe.experts.246.w3", "model.layers.47.block_sparse_moe.experts.247.w3", "model.layers.47.block_sparse_moe.experts.248.w3", "model.layers.47.block_sparse_moe.experts.249.w3", "model.layers.47.block_sparse_moe.experts.250.w3", "model.layers.47.block_sparse_moe.experts.251.w3", "model.layers.47.block_sparse_moe.experts.252.w3", "model.layers.47.block_sparse_moe.experts.253.w3", "model.layers.47.block_sparse_moe.experts.254.w3", "model.layers.47.block_sparse_moe.experts.255.w3", "model.layers.47.block_sparse_moe.experts.0.w2", "model.layers.47.block_sparse_moe.experts.1.w2", "model.layers.47.block_sparse_moe.experts.2.w2", "model.layers.47.block_sparse_moe.experts.3.w2", "model.layers.47.block_sparse_moe.experts.4.w2", "model.layers.47.block_sparse_moe.experts.5.w2", "model.layers.47.block_sparse_moe.experts.6.w2", "model.layers.47.block_sparse_moe.experts.7.w2", "model.layers.47.block_sparse_moe.experts.8.w2", "model.layers.47.block_sparse_moe.experts.9.w2", "model.layers.47.block_sparse_moe.experts.10.w2", "model.layers.47.block_sparse_moe.experts.11.w2", "model.layers.47.block_sparse_moe.experts.12.w2", "model.layers.47.block_sparse_moe.experts.13.w2", "model.layers.47.block_sparse_moe.experts.14.w2", "model.layers.47.block_sparse_moe.experts.15.w2", "model.layers.47.block_sparse_moe.experts.16.w2", "model.layers.47.block_sparse_moe.experts.17.w2", "model.layers.47.block_sparse_moe.experts.18.w2", "model.layers.47.block_sparse_moe.experts.19.w2", "model.layers.47.block_sparse_moe.experts.20.w2", "model.layers.47.block_sparse_moe.experts.21.w2", "model.layers.47.block_sparse_moe.experts.22.w2", "model.layers.47.block_sparse_moe.experts.23.w2", "model.layers.47.block_sparse_moe.experts.24.w2", "model.layers.47.block_sparse_moe.experts.25.w2", "model.layers.47.block_sparse_moe.experts.26.w2", "model.layers.47.block_sparse_moe.experts.27.w2", "model.layers.47.block_sparse_moe.experts.28.w2", "model.layers.47.block_sparse_moe.experts.29.w2", "model.layers.47.block_sparse_moe.experts.30.w2", "model.layers.47.block_sparse_moe.experts.31.w2", "model.layers.47.block_sparse_moe.experts.32.w2", "model.layers.47.block_sparse_moe.experts.33.w2", "model.layers.47.block_sparse_moe.experts.34.w2", "model.layers.47.block_sparse_moe.experts.35.w2", "model.layers.47.block_sparse_moe.experts.36.w2", "model.layers.47.block_sparse_moe.experts.37.w2", "model.layers.47.block_sparse_moe.experts.38.w2", "model.layers.47.block_sparse_moe.experts.39.w2", "model.layers.47.block_sparse_moe.experts.40.w2", "model.layers.47.block_sparse_moe.experts.41.w2", "model.layers.47.block_sparse_moe.experts.42.w2", "model.layers.47.block_sparse_moe.experts.43.w2", "model.layers.47.block_sparse_moe.experts.44.w2", "model.layers.47.block_sparse_moe.experts.45.w2", "model.layers.47.block_sparse_moe.experts.46.w2", "model.layers.47.block_sparse_moe.experts.47.w2", "model.layers.47.block_sparse_moe.experts.48.w2", "model.layers.47.block_sparse_moe.experts.49.w2", "model.layers.47.block_sparse_moe.experts.50.w2", "model.layers.47.block_sparse_moe.experts.51.w2", "model.layers.47.block_sparse_moe.experts.52.w2", "model.layers.47.block_sparse_moe.experts.53.w2", "model.layers.47.block_sparse_moe.experts.54.w2", "model.layers.47.block_sparse_moe.experts.55.w2", "model.layers.47.block_sparse_moe.experts.56.w2", "model.layers.47.block_sparse_moe.experts.57.w2", "model.layers.47.block_sparse_moe.experts.58.w2", "model.layers.47.block_sparse_moe.experts.59.w2", "model.layers.47.block_sparse_moe.experts.60.w2", "model.layers.47.block_sparse_moe.experts.61.w2", "model.layers.47.block_sparse_moe.experts.62.w2", "model.layers.47.block_sparse_moe.experts.63.w2", "model.layers.47.block_sparse_moe.experts.64.w2", "model.layers.47.block_sparse_moe.experts.65.w2", "model.layers.47.block_sparse_moe.experts.66.w2", "model.layers.47.block_sparse_moe.experts.67.w2", "model.layers.47.block_sparse_moe.experts.68.w2", "model.layers.47.block_sparse_moe.experts.69.w2", "model.layers.47.block_sparse_moe.experts.70.w2", "model.layers.47.block_sparse_moe.experts.71.w2", "model.layers.47.block_sparse_moe.experts.72.w2", "model.layers.47.block_sparse_moe.experts.73.w2", "model.layers.47.block_sparse_moe.experts.74.w2", "model.layers.47.block_sparse_moe.experts.75.w2", "model.layers.47.block_sparse_moe.experts.76.w2", "model.layers.47.block_sparse_moe.experts.77.w2", "model.layers.47.block_sparse_moe.experts.78.w2", "model.layers.47.block_sparse_moe.experts.79.w2", "model.layers.47.block_sparse_moe.experts.80.w2", "model.layers.47.block_sparse_moe.experts.81.w2", "model.layers.47.block_sparse_moe.experts.82.w2", "model.layers.47.block_sparse_moe.experts.83.w2", "model.layers.47.block_sparse_moe.experts.84.w2", "model.layers.47.block_sparse_moe.experts.85.w2", "model.layers.47.block_sparse_moe.experts.86.w2", "model.layers.47.block_sparse_moe.experts.87.w2", "model.layers.47.block_sparse_moe.experts.88.w2", "model.layers.47.block_sparse_moe.experts.89.w2", "model.layers.47.block_sparse_moe.experts.90.w2", "model.layers.47.block_sparse_moe.experts.91.w2", "model.layers.47.block_sparse_moe.experts.92.w2", "model.layers.47.block_sparse_moe.experts.93.w2", "model.layers.47.block_sparse_moe.experts.94.w2", "model.layers.47.block_sparse_moe.experts.95.w2", "model.layers.47.block_sparse_moe.experts.96.w2", "model.layers.47.block_sparse_moe.experts.97.w2", "model.layers.47.block_sparse_moe.experts.98.w2", "model.layers.47.block_sparse_moe.experts.99.w2", "model.layers.47.block_sparse_moe.experts.100.w2", "model.layers.47.block_sparse_moe.experts.101.w2", "model.layers.47.block_sparse_moe.experts.102.w2", "model.layers.47.block_sparse_moe.experts.103.w2", "model.layers.47.block_sparse_moe.experts.104.w2", "model.layers.47.block_sparse_moe.experts.105.w2", "model.layers.47.block_sparse_moe.experts.106.w2", "model.layers.47.block_sparse_moe.experts.107.w2", "model.layers.47.block_sparse_moe.experts.108.w2", "model.layers.47.block_sparse_moe.experts.109.w2", "model.layers.47.block_sparse_moe.experts.110.w2", "model.layers.47.block_sparse_moe.experts.111.w2", "model.layers.47.block_sparse_moe.experts.112.w2", "model.layers.47.block_sparse_moe.experts.113.w2", "model.layers.47.block_sparse_moe.experts.114.w2", "model.layers.47.block_sparse_moe.experts.115.w2", "model.layers.47.block_sparse_moe.experts.116.w2", "model.layers.47.block_sparse_moe.experts.117.w2", "model.layers.47.block_sparse_moe.experts.118.w2", "model.layers.47.block_sparse_moe.experts.119.w2", "model.layers.47.block_sparse_moe.experts.120.w2", "model.layers.47.block_sparse_moe.experts.121.w2", "model.layers.47.block_sparse_moe.experts.122.w2", "model.layers.47.block_sparse_moe.experts.123.w2", "model.layers.47.block_sparse_moe.experts.124.w2", "model.layers.47.block_sparse_moe.experts.125.w2", "model.layers.47.block_sparse_moe.experts.126.w2", "model.layers.47.block_sparse_moe.experts.127.w2", "model.layers.47.block_sparse_moe.experts.128.w2", "model.layers.47.block_sparse_moe.experts.129.w2", "model.layers.47.block_sparse_moe.experts.130.w2", "model.layers.47.block_sparse_moe.experts.131.w2", "model.layers.47.block_sparse_moe.experts.132.w2", "model.layers.47.block_sparse_moe.experts.133.w2", "model.layers.47.block_sparse_moe.experts.134.w2", "model.layers.47.block_sparse_moe.experts.135.w2", "model.layers.47.block_sparse_moe.experts.136.w2", "model.layers.47.block_sparse_moe.experts.137.w2", "model.layers.47.block_sparse_moe.experts.138.w2", "model.layers.47.block_sparse_moe.experts.139.w2", "model.layers.47.block_sparse_moe.experts.140.w2", "model.layers.47.block_sparse_moe.experts.141.w2", "model.layers.47.block_sparse_moe.experts.142.w2", "model.layers.47.block_sparse_moe.experts.143.w2", "model.layers.47.block_sparse_moe.experts.144.w2", "model.layers.47.block_sparse_moe.experts.145.w2", "model.layers.47.block_sparse_moe.experts.146.w2", "model.layers.47.block_sparse_moe.experts.147.w2", "model.layers.47.block_sparse_moe.experts.148.w2", "model.layers.47.block_sparse_moe.experts.149.w2", "model.layers.47.block_sparse_moe.experts.150.w2", "model.layers.47.block_sparse_moe.experts.151.w2", "model.layers.47.block_sparse_moe.experts.152.w2", "model.layers.47.block_sparse_moe.experts.153.w2", "model.layers.47.block_sparse_moe.experts.154.w2", "model.layers.47.block_sparse_moe.experts.155.w2", "model.layers.47.block_sparse_moe.experts.156.w2", "model.layers.47.block_sparse_moe.experts.157.w2", "model.layers.47.block_sparse_moe.experts.158.w2", "model.layers.47.block_sparse_moe.experts.159.w2", "model.layers.47.block_sparse_moe.experts.160.w2", "model.layers.47.block_sparse_moe.experts.161.w2", "model.layers.47.block_sparse_moe.experts.162.w2", "model.layers.47.block_sparse_moe.experts.163.w2", "model.layers.47.block_sparse_moe.experts.164.w2", "model.layers.47.block_sparse_moe.experts.165.w2", "model.layers.47.block_sparse_moe.experts.166.w2", "model.layers.47.block_sparse_moe.experts.167.w2", "model.layers.47.block_sparse_moe.experts.168.w2", "model.layers.47.block_sparse_moe.experts.169.w2", "model.layers.47.block_sparse_moe.experts.170.w2", "model.layers.47.block_sparse_moe.experts.171.w2", "model.layers.47.block_sparse_moe.experts.172.w2", "model.layers.47.block_sparse_moe.experts.173.w2", "model.layers.47.block_sparse_moe.experts.174.w2", "model.layers.47.block_sparse_moe.experts.175.w2", "model.layers.47.block_sparse_moe.experts.176.w2", "model.layers.47.block_sparse_moe.experts.177.w2", "model.layers.47.block_sparse_moe.experts.178.w2", "model.layers.47.block_sparse_moe.experts.179.w2", "model.layers.47.block_sparse_moe.experts.180.w2", "model.layers.47.block_sparse_moe.experts.181.w2", "model.layers.47.block_sparse_moe.experts.182.w2", "model.layers.47.block_sparse_moe.experts.183.w2", "model.layers.47.block_sparse_moe.experts.184.w2", "model.layers.47.block_sparse_moe.experts.185.w2", "model.layers.47.block_sparse_moe.experts.186.w2", "model.layers.47.block_sparse_moe.experts.187.w2", "model.layers.47.block_sparse_moe.experts.188.w2", "model.layers.47.block_sparse_moe.experts.189.w2", "model.layers.47.block_sparse_moe.experts.190.w2", "model.layers.47.block_sparse_moe.experts.191.w2", "model.layers.47.block_sparse_moe.experts.192.w2", "model.layers.47.block_sparse_moe.experts.193.w2", "model.layers.47.block_sparse_moe.experts.194.w2", "model.layers.47.block_sparse_moe.experts.195.w2", "model.layers.47.block_sparse_moe.experts.196.w2", "model.layers.47.block_sparse_moe.experts.197.w2", "model.layers.47.block_sparse_moe.experts.198.w2", "model.layers.47.block_sparse_moe.experts.199.w2", "model.layers.47.block_sparse_moe.experts.200.w2", "model.layers.47.block_sparse_moe.experts.201.w2", "model.layers.47.block_sparse_moe.experts.202.w2", "model.layers.47.block_sparse_moe.experts.203.w2", "model.layers.47.block_sparse_moe.experts.204.w2", "model.layers.47.block_sparse_moe.experts.205.w2", "model.layers.47.block_sparse_moe.experts.206.w2", "model.layers.47.block_sparse_moe.experts.207.w2", "model.layers.47.block_sparse_moe.experts.208.w2", "model.layers.47.block_sparse_moe.experts.209.w2", "model.layers.47.block_sparse_moe.experts.210.w2", "model.layers.47.block_sparse_moe.experts.211.w2", "model.layers.47.block_sparse_moe.experts.212.w2", "model.layers.47.block_sparse_moe.experts.213.w2", "model.layers.47.block_sparse_moe.experts.214.w2", "model.layers.47.block_sparse_moe.experts.215.w2", "model.layers.47.block_sparse_moe.experts.216.w2", "model.layers.47.block_sparse_moe.experts.217.w2", "model.layers.47.block_sparse_moe.experts.218.w2", "model.layers.47.block_sparse_moe.experts.219.w2", "model.layers.47.block_sparse_moe.experts.220.w2", "model.layers.47.block_sparse_moe.experts.221.w2", "model.layers.47.block_sparse_moe.experts.222.w2", "model.layers.47.block_sparse_moe.experts.223.w2", "model.layers.47.block_sparse_moe.experts.224.w2", "model.layers.47.block_sparse_moe.experts.225.w2", "model.layers.47.block_sparse_moe.experts.226.w2", "model.layers.47.block_sparse_moe.experts.227.w2", "model.layers.47.block_sparse_moe.experts.228.w2", "model.layers.47.block_sparse_moe.experts.229.w2", "model.layers.47.block_sparse_moe.experts.230.w2", "model.layers.47.block_sparse_moe.experts.231.w2", "model.layers.47.block_sparse_moe.experts.232.w2", "model.layers.47.block_sparse_moe.experts.233.w2", "model.layers.47.block_sparse_moe.experts.234.w2", "model.layers.47.block_sparse_moe.experts.235.w2", "model.layers.47.block_sparse_moe.experts.236.w2", "model.layers.47.block_sparse_moe.experts.237.w2", "model.layers.47.block_sparse_moe.experts.238.w2", "model.layers.47.block_sparse_moe.experts.239.w2", "model.layers.47.block_sparse_moe.experts.240.w2", "model.layers.47.block_sparse_moe.experts.241.w2", "model.layers.47.block_sparse_moe.experts.242.w2", "model.layers.47.block_sparse_moe.experts.243.w2", "model.layers.47.block_sparse_moe.experts.244.w2", "model.layers.47.block_sparse_moe.experts.245.w2", "model.layers.47.block_sparse_moe.experts.246.w2", "model.layers.47.block_sparse_moe.experts.247.w2", "model.layers.47.block_sparse_moe.experts.248.w2", "model.layers.47.block_sparse_moe.experts.249.w2", "model.layers.47.block_sparse_moe.experts.250.w2", "model.layers.47.block_sparse_moe.experts.251.w2", "model.layers.47.block_sparse_moe.experts.252.w2", "model.layers.47.block_sparse_moe.experts.253.w2", "model.layers.47.block_sparse_moe.experts.254.w2", "model.layers.47.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.709765970704719e-05, "dbits": 3623878656 } ] }, { "idx": 96, "layers": [ "model.layers.48.self_attn.q_proj", "model.layers.48.self_attn.k_proj", "model.layers.48.self_attn.v_proj", "model.layers.48.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0023302596062421688, "dbits": 44040192 } ] }, { "idx": 97, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w1", "model.layers.48.block_sparse_moe.experts.1.w1", "model.layers.48.block_sparse_moe.experts.2.w1", "model.layers.48.block_sparse_moe.experts.3.w1", "model.layers.48.block_sparse_moe.experts.4.w1", "model.layers.48.block_sparse_moe.experts.5.w1", "model.layers.48.block_sparse_moe.experts.6.w1", "model.layers.48.block_sparse_moe.experts.7.w1", "model.layers.48.block_sparse_moe.experts.8.w1", "model.layers.48.block_sparse_moe.experts.9.w1", "model.layers.48.block_sparse_moe.experts.10.w1", "model.layers.48.block_sparse_moe.experts.11.w1", "model.layers.48.block_sparse_moe.experts.12.w1", "model.layers.48.block_sparse_moe.experts.13.w1", "model.layers.48.block_sparse_moe.experts.14.w1", "model.layers.48.block_sparse_moe.experts.15.w1", "model.layers.48.block_sparse_moe.experts.16.w1", "model.layers.48.block_sparse_moe.experts.17.w1", "model.layers.48.block_sparse_moe.experts.18.w1", "model.layers.48.block_sparse_moe.experts.19.w1", "model.layers.48.block_sparse_moe.experts.20.w1", "model.layers.48.block_sparse_moe.experts.21.w1", "model.layers.48.block_sparse_moe.experts.22.w1", "model.layers.48.block_sparse_moe.experts.23.w1", "model.layers.48.block_sparse_moe.experts.24.w1", "model.layers.48.block_sparse_moe.experts.25.w1", "model.layers.48.block_sparse_moe.experts.26.w1", "model.layers.48.block_sparse_moe.experts.27.w1", "model.layers.48.block_sparse_moe.experts.28.w1", "model.layers.48.block_sparse_moe.experts.29.w1", "model.layers.48.block_sparse_moe.experts.30.w1", "model.layers.48.block_sparse_moe.experts.31.w1", "model.layers.48.block_sparse_moe.experts.32.w1", "model.layers.48.block_sparse_moe.experts.33.w1", "model.layers.48.block_sparse_moe.experts.34.w1", "model.layers.48.block_sparse_moe.experts.35.w1", "model.layers.48.block_sparse_moe.experts.36.w1", "model.layers.48.block_sparse_moe.experts.37.w1", "model.layers.48.block_sparse_moe.experts.38.w1", "model.layers.48.block_sparse_moe.experts.39.w1", "model.layers.48.block_sparse_moe.experts.40.w1", "model.layers.48.block_sparse_moe.experts.41.w1", "model.layers.48.block_sparse_moe.experts.42.w1", "model.layers.48.block_sparse_moe.experts.43.w1", "model.layers.48.block_sparse_moe.experts.44.w1", "model.layers.48.block_sparse_moe.experts.45.w1", "model.layers.48.block_sparse_moe.experts.46.w1", "model.layers.48.block_sparse_moe.experts.47.w1", "model.layers.48.block_sparse_moe.experts.48.w1", "model.layers.48.block_sparse_moe.experts.49.w1", "model.layers.48.block_sparse_moe.experts.50.w1", "model.layers.48.block_sparse_moe.experts.51.w1", "model.layers.48.block_sparse_moe.experts.52.w1", "model.layers.48.block_sparse_moe.experts.53.w1", "model.layers.48.block_sparse_moe.experts.54.w1", "model.layers.48.block_sparse_moe.experts.55.w1", "model.layers.48.block_sparse_moe.experts.56.w1", "model.layers.48.block_sparse_moe.experts.57.w1", "model.layers.48.block_sparse_moe.experts.58.w1", "model.layers.48.block_sparse_moe.experts.59.w1", "model.layers.48.block_sparse_moe.experts.60.w1", "model.layers.48.block_sparse_moe.experts.61.w1", "model.layers.48.block_sparse_moe.experts.62.w1", "model.layers.48.block_sparse_moe.experts.63.w1", "model.layers.48.block_sparse_moe.experts.64.w1", "model.layers.48.block_sparse_moe.experts.65.w1", "model.layers.48.block_sparse_moe.experts.66.w1", "model.layers.48.block_sparse_moe.experts.67.w1", "model.layers.48.block_sparse_moe.experts.68.w1", "model.layers.48.block_sparse_moe.experts.69.w1", "model.layers.48.block_sparse_moe.experts.70.w1", "model.layers.48.block_sparse_moe.experts.71.w1", "model.layers.48.block_sparse_moe.experts.72.w1", "model.layers.48.block_sparse_moe.experts.73.w1", "model.layers.48.block_sparse_moe.experts.74.w1", "model.layers.48.block_sparse_moe.experts.75.w1", "model.layers.48.block_sparse_moe.experts.76.w1", "model.layers.48.block_sparse_moe.experts.77.w1", "model.layers.48.block_sparse_moe.experts.78.w1", "model.layers.48.block_sparse_moe.experts.79.w1", "model.layers.48.block_sparse_moe.experts.80.w1", "model.layers.48.block_sparse_moe.experts.81.w1", "model.layers.48.block_sparse_moe.experts.82.w1", "model.layers.48.block_sparse_moe.experts.83.w1", "model.layers.48.block_sparse_moe.experts.84.w1", "model.layers.48.block_sparse_moe.experts.85.w1", "model.layers.48.block_sparse_moe.experts.86.w1", "model.layers.48.block_sparse_moe.experts.87.w1", "model.layers.48.block_sparse_moe.experts.88.w1", "model.layers.48.block_sparse_moe.experts.89.w1", "model.layers.48.block_sparse_moe.experts.90.w1", "model.layers.48.block_sparse_moe.experts.91.w1", "model.layers.48.block_sparse_moe.experts.92.w1", "model.layers.48.block_sparse_moe.experts.93.w1", "model.layers.48.block_sparse_moe.experts.94.w1", "model.layers.48.block_sparse_moe.experts.95.w1", "model.layers.48.block_sparse_moe.experts.96.w1", "model.layers.48.block_sparse_moe.experts.97.w1", "model.layers.48.block_sparse_moe.experts.98.w1", "model.layers.48.block_sparse_moe.experts.99.w1", "model.layers.48.block_sparse_moe.experts.100.w1", "model.layers.48.block_sparse_moe.experts.101.w1", "model.layers.48.block_sparse_moe.experts.102.w1", "model.layers.48.block_sparse_moe.experts.103.w1", "model.layers.48.block_sparse_moe.experts.104.w1", "model.layers.48.block_sparse_moe.experts.105.w1", "model.layers.48.block_sparse_moe.experts.106.w1", "model.layers.48.block_sparse_moe.experts.107.w1", "model.layers.48.block_sparse_moe.experts.108.w1", "model.layers.48.block_sparse_moe.experts.109.w1", "model.layers.48.block_sparse_moe.experts.110.w1", "model.layers.48.block_sparse_moe.experts.111.w1", "model.layers.48.block_sparse_moe.experts.112.w1", "model.layers.48.block_sparse_moe.experts.113.w1", "model.layers.48.block_sparse_moe.experts.114.w1", "model.layers.48.block_sparse_moe.experts.115.w1", "model.layers.48.block_sparse_moe.experts.116.w1", "model.layers.48.block_sparse_moe.experts.117.w1", "model.layers.48.block_sparse_moe.experts.118.w1", "model.layers.48.block_sparse_moe.experts.119.w1", "model.layers.48.block_sparse_moe.experts.120.w1", "model.layers.48.block_sparse_moe.experts.121.w1", "model.layers.48.block_sparse_moe.experts.122.w1", "model.layers.48.block_sparse_moe.experts.123.w1", "model.layers.48.block_sparse_moe.experts.124.w1", "model.layers.48.block_sparse_moe.experts.125.w1", "model.layers.48.block_sparse_moe.experts.126.w1", "model.layers.48.block_sparse_moe.experts.127.w1", "model.layers.48.block_sparse_moe.experts.128.w1", "model.layers.48.block_sparse_moe.experts.129.w1", "model.layers.48.block_sparse_moe.experts.130.w1", "model.layers.48.block_sparse_moe.experts.131.w1", "model.layers.48.block_sparse_moe.experts.132.w1", "model.layers.48.block_sparse_moe.experts.133.w1", "model.layers.48.block_sparse_moe.experts.134.w1", "model.layers.48.block_sparse_moe.experts.135.w1", "model.layers.48.block_sparse_moe.experts.136.w1", "model.layers.48.block_sparse_moe.experts.137.w1", "model.layers.48.block_sparse_moe.experts.138.w1", "model.layers.48.block_sparse_moe.experts.139.w1", "model.layers.48.block_sparse_moe.experts.140.w1", "model.layers.48.block_sparse_moe.experts.141.w1", "model.layers.48.block_sparse_moe.experts.142.w1", "model.layers.48.block_sparse_moe.experts.143.w1", "model.layers.48.block_sparse_moe.experts.144.w1", "model.layers.48.block_sparse_moe.experts.145.w1", "model.layers.48.block_sparse_moe.experts.146.w1", "model.layers.48.block_sparse_moe.experts.147.w1", "model.layers.48.block_sparse_moe.experts.148.w1", "model.layers.48.block_sparse_moe.experts.149.w1", "model.layers.48.block_sparse_moe.experts.150.w1", "model.layers.48.block_sparse_moe.experts.151.w1", "model.layers.48.block_sparse_moe.experts.152.w1", "model.layers.48.block_sparse_moe.experts.153.w1", "model.layers.48.block_sparse_moe.experts.154.w1", "model.layers.48.block_sparse_moe.experts.155.w1", "model.layers.48.block_sparse_moe.experts.156.w1", "model.layers.48.block_sparse_moe.experts.157.w1", "model.layers.48.block_sparse_moe.experts.158.w1", "model.layers.48.block_sparse_moe.experts.159.w1", "model.layers.48.block_sparse_moe.experts.160.w1", "model.layers.48.block_sparse_moe.experts.161.w1", "model.layers.48.block_sparse_moe.experts.162.w1", "model.layers.48.block_sparse_moe.experts.163.w1", "model.layers.48.block_sparse_moe.experts.164.w1", "model.layers.48.block_sparse_moe.experts.165.w1", "model.layers.48.block_sparse_moe.experts.166.w1", "model.layers.48.block_sparse_moe.experts.167.w1", "model.layers.48.block_sparse_moe.experts.168.w1", "model.layers.48.block_sparse_moe.experts.169.w1", "model.layers.48.block_sparse_moe.experts.170.w1", "model.layers.48.block_sparse_moe.experts.171.w1", "model.layers.48.block_sparse_moe.experts.172.w1", "model.layers.48.block_sparse_moe.experts.173.w1", "model.layers.48.block_sparse_moe.experts.174.w1", "model.layers.48.block_sparse_moe.experts.175.w1", "model.layers.48.block_sparse_moe.experts.176.w1", "model.layers.48.block_sparse_moe.experts.177.w1", "model.layers.48.block_sparse_moe.experts.178.w1", "model.layers.48.block_sparse_moe.experts.179.w1", "model.layers.48.block_sparse_moe.experts.180.w1", "model.layers.48.block_sparse_moe.experts.181.w1", "model.layers.48.block_sparse_moe.experts.182.w1", "model.layers.48.block_sparse_moe.experts.183.w1", "model.layers.48.block_sparse_moe.experts.184.w1", "model.layers.48.block_sparse_moe.experts.185.w1", "model.layers.48.block_sparse_moe.experts.186.w1", "model.layers.48.block_sparse_moe.experts.187.w1", "model.layers.48.block_sparse_moe.experts.188.w1", "model.layers.48.block_sparse_moe.experts.189.w1", "model.layers.48.block_sparse_moe.experts.190.w1", "model.layers.48.block_sparse_moe.experts.191.w1", "model.layers.48.block_sparse_moe.experts.192.w1", "model.layers.48.block_sparse_moe.experts.193.w1", "model.layers.48.block_sparse_moe.experts.194.w1", "model.layers.48.block_sparse_moe.experts.195.w1", "model.layers.48.block_sparse_moe.experts.196.w1", "model.layers.48.block_sparse_moe.experts.197.w1", "model.layers.48.block_sparse_moe.experts.198.w1", "model.layers.48.block_sparse_moe.experts.199.w1", "model.layers.48.block_sparse_moe.experts.200.w1", "model.layers.48.block_sparse_moe.experts.201.w1", "model.layers.48.block_sparse_moe.experts.202.w1", "model.layers.48.block_sparse_moe.experts.203.w1", "model.layers.48.block_sparse_moe.experts.204.w1", "model.layers.48.block_sparse_moe.experts.205.w1", "model.layers.48.block_sparse_moe.experts.206.w1", "model.layers.48.block_sparse_moe.experts.207.w1", "model.layers.48.block_sparse_moe.experts.208.w1", "model.layers.48.block_sparse_moe.experts.209.w1", "model.layers.48.block_sparse_moe.experts.210.w1", "model.layers.48.block_sparse_moe.experts.211.w1", "model.layers.48.block_sparse_moe.experts.212.w1", "model.layers.48.block_sparse_moe.experts.213.w1", "model.layers.48.block_sparse_moe.experts.214.w1", "model.layers.48.block_sparse_moe.experts.215.w1", "model.layers.48.block_sparse_moe.experts.216.w1", "model.layers.48.block_sparse_moe.experts.217.w1", "model.layers.48.block_sparse_moe.experts.218.w1", "model.layers.48.block_sparse_moe.experts.219.w1", "model.layers.48.block_sparse_moe.experts.220.w1", "model.layers.48.block_sparse_moe.experts.221.w1", "model.layers.48.block_sparse_moe.experts.222.w1", "model.layers.48.block_sparse_moe.experts.223.w1", "model.layers.48.block_sparse_moe.experts.224.w1", "model.layers.48.block_sparse_moe.experts.225.w1", "model.layers.48.block_sparse_moe.experts.226.w1", "model.layers.48.block_sparse_moe.experts.227.w1", "model.layers.48.block_sparse_moe.experts.228.w1", "model.layers.48.block_sparse_moe.experts.229.w1", "model.layers.48.block_sparse_moe.experts.230.w1", "model.layers.48.block_sparse_moe.experts.231.w1", "model.layers.48.block_sparse_moe.experts.232.w1", "model.layers.48.block_sparse_moe.experts.233.w1", "model.layers.48.block_sparse_moe.experts.234.w1", "model.layers.48.block_sparse_moe.experts.235.w1", "model.layers.48.block_sparse_moe.experts.236.w1", "model.layers.48.block_sparse_moe.experts.237.w1", "model.layers.48.block_sparse_moe.experts.238.w1", "model.layers.48.block_sparse_moe.experts.239.w1", "model.layers.48.block_sparse_moe.experts.240.w1", "model.layers.48.block_sparse_moe.experts.241.w1", "model.layers.48.block_sparse_moe.experts.242.w1", "model.layers.48.block_sparse_moe.experts.243.w1", "model.layers.48.block_sparse_moe.experts.244.w1", "model.layers.48.block_sparse_moe.experts.245.w1", "model.layers.48.block_sparse_moe.experts.246.w1", "model.layers.48.block_sparse_moe.experts.247.w1", "model.layers.48.block_sparse_moe.experts.248.w1", "model.layers.48.block_sparse_moe.experts.249.w1", "model.layers.48.block_sparse_moe.experts.250.w1", "model.layers.48.block_sparse_moe.experts.251.w1", "model.layers.48.block_sparse_moe.experts.252.w1", "model.layers.48.block_sparse_moe.experts.253.w1", "model.layers.48.block_sparse_moe.experts.254.w1", "model.layers.48.block_sparse_moe.experts.255.w1", "model.layers.48.block_sparse_moe.experts.0.w3", "model.layers.48.block_sparse_moe.experts.1.w3", "model.layers.48.block_sparse_moe.experts.2.w3", "model.layers.48.block_sparse_moe.experts.3.w3", "model.layers.48.block_sparse_moe.experts.4.w3", "model.layers.48.block_sparse_moe.experts.5.w3", "model.layers.48.block_sparse_moe.experts.6.w3", "model.layers.48.block_sparse_moe.experts.7.w3", "model.layers.48.block_sparse_moe.experts.8.w3", "model.layers.48.block_sparse_moe.experts.9.w3", "model.layers.48.block_sparse_moe.experts.10.w3", "model.layers.48.block_sparse_moe.experts.11.w3", "model.layers.48.block_sparse_moe.experts.12.w3", "model.layers.48.block_sparse_moe.experts.13.w3", "model.layers.48.block_sparse_moe.experts.14.w3", "model.layers.48.block_sparse_moe.experts.15.w3", "model.layers.48.block_sparse_moe.experts.16.w3", "model.layers.48.block_sparse_moe.experts.17.w3", "model.layers.48.block_sparse_moe.experts.18.w3", "model.layers.48.block_sparse_moe.experts.19.w3", "model.layers.48.block_sparse_moe.experts.20.w3", "model.layers.48.block_sparse_moe.experts.21.w3", "model.layers.48.block_sparse_moe.experts.22.w3", "model.layers.48.block_sparse_moe.experts.23.w3", "model.layers.48.block_sparse_moe.experts.24.w3", "model.layers.48.block_sparse_moe.experts.25.w3", "model.layers.48.block_sparse_moe.experts.26.w3", "model.layers.48.block_sparse_moe.experts.27.w3", "model.layers.48.block_sparse_moe.experts.28.w3", "model.layers.48.block_sparse_moe.experts.29.w3", "model.layers.48.block_sparse_moe.experts.30.w3", "model.layers.48.block_sparse_moe.experts.31.w3", "model.layers.48.block_sparse_moe.experts.32.w3", "model.layers.48.block_sparse_moe.experts.33.w3", "model.layers.48.block_sparse_moe.experts.34.w3", "model.layers.48.block_sparse_moe.experts.35.w3", "model.layers.48.block_sparse_moe.experts.36.w3", "model.layers.48.block_sparse_moe.experts.37.w3", "model.layers.48.block_sparse_moe.experts.38.w3", "model.layers.48.block_sparse_moe.experts.39.w3", "model.layers.48.block_sparse_moe.experts.40.w3", "model.layers.48.block_sparse_moe.experts.41.w3", "model.layers.48.block_sparse_moe.experts.42.w3", "model.layers.48.block_sparse_moe.experts.43.w3", "model.layers.48.block_sparse_moe.experts.44.w3", "model.layers.48.block_sparse_moe.experts.45.w3", "model.layers.48.block_sparse_moe.experts.46.w3", "model.layers.48.block_sparse_moe.experts.47.w3", "model.layers.48.block_sparse_moe.experts.48.w3", "model.layers.48.block_sparse_moe.experts.49.w3", "model.layers.48.block_sparse_moe.experts.50.w3", "model.layers.48.block_sparse_moe.experts.51.w3", "model.layers.48.block_sparse_moe.experts.52.w3", "model.layers.48.block_sparse_moe.experts.53.w3", "model.layers.48.block_sparse_moe.experts.54.w3", "model.layers.48.block_sparse_moe.experts.55.w3", "model.layers.48.block_sparse_moe.experts.56.w3", "model.layers.48.block_sparse_moe.experts.57.w3", "model.layers.48.block_sparse_moe.experts.58.w3", "model.layers.48.block_sparse_moe.experts.59.w3", "model.layers.48.block_sparse_moe.experts.60.w3", "model.layers.48.block_sparse_moe.experts.61.w3", "model.layers.48.block_sparse_moe.experts.62.w3", "model.layers.48.block_sparse_moe.experts.63.w3", "model.layers.48.block_sparse_moe.experts.64.w3", "model.layers.48.block_sparse_moe.experts.65.w3", "model.layers.48.block_sparse_moe.experts.66.w3", "model.layers.48.block_sparse_moe.experts.67.w3", "model.layers.48.block_sparse_moe.experts.68.w3", "model.layers.48.block_sparse_moe.experts.69.w3", "model.layers.48.block_sparse_moe.experts.70.w3", "model.layers.48.block_sparse_moe.experts.71.w3", "model.layers.48.block_sparse_moe.experts.72.w3", "model.layers.48.block_sparse_moe.experts.73.w3", "model.layers.48.block_sparse_moe.experts.74.w3", "model.layers.48.block_sparse_moe.experts.75.w3", "model.layers.48.block_sparse_moe.experts.76.w3", "model.layers.48.block_sparse_moe.experts.77.w3", "model.layers.48.block_sparse_moe.experts.78.w3", "model.layers.48.block_sparse_moe.experts.79.w3", "model.layers.48.block_sparse_moe.experts.80.w3", "model.layers.48.block_sparse_moe.experts.81.w3", "model.layers.48.block_sparse_moe.experts.82.w3", "model.layers.48.block_sparse_moe.experts.83.w3", "model.layers.48.block_sparse_moe.experts.84.w3", "model.layers.48.block_sparse_moe.experts.85.w3", "model.layers.48.block_sparse_moe.experts.86.w3", "model.layers.48.block_sparse_moe.experts.87.w3", "model.layers.48.block_sparse_moe.experts.88.w3", "model.layers.48.block_sparse_moe.experts.89.w3", "model.layers.48.block_sparse_moe.experts.90.w3", "model.layers.48.block_sparse_moe.experts.91.w3", "model.layers.48.block_sparse_moe.experts.92.w3", "model.layers.48.block_sparse_moe.experts.93.w3", "model.layers.48.block_sparse_moe.experts.94.w3", "model.layers.48.block_sparse_moe.experts.95.w3", "model.layers.48.block_sparse_moe.experts.96.w3", "model.layers.48.block_sparse_moe.experts.97.w3", "model.layers.48.block_sparse_moe.experts.98.w3", "model.layers.48.block_sparse_moe.experts.99.w3", "model.layers.48.block_sparse_moe.experts.100.w3", "model.layers.48.block_sparse_moe.experts.101.w3", "model.layers.48.block_sparse_moe.experts.102.w3", "model.layers.48.block_sparse_moe.experts.103.w3", "model.layers.48.block_sparse_moe.experts.104.w3", "model.layers.48.block_sparse_moe.experts.105.w3", "model.layers.48.block_sparse_moe.experts.106.w3", "model.layers.48.block_sparse_moe.experts.107.w3", "model.layers.48.block_sparse_moe.experts.108.w3", "model.layers.48.block_sparse_moe.experts.109.w3", "model.layers.48.block_sparse_moe.experts.110.w3", "model.layers.48.block_sparse_moe.experts.111.w3", "model.layers.48.block_sparse_moe.experts.112.w3", "model.layers.48.block_sparse_moe.experts.113.w3", "model.layers.48.block_sparse_moe.experts.114.w3", "model.layers.48.block_sparse_moe.experts.115.w3", "model.layers.48.block_sparse_moe.experts.116.w3", "model.layers.48.block_sparse_moe.experts.117.w3", "model.layers.48.block_sparse_moe.experts.118.w3", "model.layers.48.block_sparse_moe.experts.119.w3", "model.layers.48.block_sparse_moe.experts.120.w3", "model.layers.48.block_sparse_moe.experts.121.w3", "model.layers.48.block_sparse_moe.experts.122.w3", "model.layers.48.block_sparse_moe.experts.123.w3", "model.layers.48.block_sparse_moe.experts.124.w3", "model.layers.48.block_sparse_moe.experts.125.w3", "model.layers.48.block_sparse_moe.experts.126.w3", "model.layers.48.block_sparse_moe.experts.127.w3", "model.layers.48.block_sparse_moe.experts.128.w3", "model.layers.48.block_sparse_moe.experts.129.w3", "model.layers.48.block_sparse_moe.experts.130.w3", "model.layers.48.block_sparse_moe.experts.131.w3", "model.layers.48.block_sparse_moe.experts.132.w3", "model.layers.48.block_sparse_moe.experts.133.w3", "model.layers.48.block_sparse_moe.experts.134.w3", "model.layers.48.block_sparse_moe.experts.135.w3", "model.layers.48.block_sparse_moe.experts.136.w3", "model.layers.48.block_sparse_moe.experts.137.w3", "model.layers.48.block_sparse_moe.experts.138.w3", "model.layers.48.block_sparse_moe.experts.139.w3", "model.layers.48.block_sparse_moe.experts.140.w3", "model.layers.48.block_sparse_moe.experts.141.w3", "model.layers.48.block_sparse_moe.experts.142.w3", "model.layers.48.block_sparse_moe.experts.143.w3", "model.layers.48.block_sparse_moe.experts.144.w3", "model.layers.48.block_sparse_moe.experts.145.w3", "model.layers.48.block_sparse_moe.experts.146.w3", "model.layers.48.block_sparse_moe.experts.147.w3", "model.layers.48.block_sparse_moe.experts.148.w3", "model.layers.48.block_sparse_moe.experts.149.w3", "model.layers.48.block_sparse_moe.experts.150.w3", "model.layers.48.block_sparse_moe.experts.151.w3", "model.layers.48.block_sparse_moe.experts.152.w3", "model.layers.48.block_sparse_moe.experts.153.w3", "model.layers.48.block_sparse_moe.experts.154.w3", "model.layers.48.block_sparse_moe.experts.155.w3", "model.layers.48.block_sparse_moe.experts.156.w3", "model.layers.48.block_sparse_moe.experts.157.w3", "model.layers.48.block_sparse_moe.experts.158.w3", "model.layers.48.block_sparse_moe.experts.159.w3", "model.layers.48.block_sparse_moe.experts.160.w3", "model.layers.48.block_sparse_moe.experts.161.w3", "model.layers.48.block_sparse_moe.experts.162.w3", "model.layers.48.block_sparse_moe.experts.163.w3", "model.layers.48.block_sparse_moe.experts.164.w3", "model.layers.48.block_sparse_moe.experts.165.w3", "model.layers.48.block_sparse_moe.experts.166.w3", "model.layers.48.block_sparse_moe.experts.167.w3", "model.layers.48.block_sparse_moe.experts.168.w3", "model.layers.48.block_sparse_moe.experts.169.w3", "model.layers.48.block_sparse_moe.experts.170.w3", "model.layers.48.block_sparse_moe.experts.171.w3", "model.layers.48.block_sparse_moe.experts.172.w3", "model.layers.48.block_sparse_moe.experts.173.w3", "model.layers.48.block_sparse_moe.experts.174.w3", "model.layers.48.block_sparse_moe.experts.175.w3", "model.layers.48.block_sparse_moe.experts.176.w3", "model.layers.48.block_sparse_moe.experts.177.w3", "model.layers.48.block_sparse_moe.experts.178.w3", "model.layers.48.block_sparse_moe.experts.179.w3", "model.layers.48.block_sparse_moe.experts.180.w3", "model.layers.48.block_sparse_moe.experts.181.w3", "model.layers.48.block_sparse_moe.experts.182.w3", "model.layers.48.block_sparse_moe.experts.183.w3", "model.layers.48.block_sparse_moe.experts.184.w3", "model.layers.48.block_sparse_moe.experts.185.w3", "model.layers.48.block_sparse_moe.experts.186.w3", "model.layers.48.block_sparse_moe.experts.187.w3", "model.layers.48.block_sparse_moe.experts.188.w3", "model.layers.48.block_sparse_moe.experts.189.w3", "model.layers.48.block_sparse_moe.experts.190.w3", "model.layers.48.block_sparse_moe.experts.191.w3", "model.layers.48.block_sparse_moe.experts.192.w3", "model.layers.48.block_sparse_moe.experts.193.w3", "model.layers.48.block_sparse_moe.experts.194.w3", "model.layers.48.block_sparse_moe.experts.195.w3", "model.layers.48.block_sparse_moe.experts.196.w3", "model.layers.48.block_sparse_moe.experts.197.w3", "model.layers.48.block_sparse_moe.experts.198.w3", "model.layers.48.block_sparse_moe.experts.199.w3", "model.layers.48.block_sparse_moe.experts.200.w3", "model.layers.48.block_sparse_moe.experts.201.w3", "model.layers.48.block_sparse_moe.experts.202.w3", "model.layers.48.block_sparse_moe.experts.203.w3", "model.layers.48.block_sparse_moe.experts.204.w3", "model.layers.48.block_sparse_moe.experts.205.w3", "model.layers.48.block_sparse_moe.experts.206.w3", "model.layers.48.block_sparse_moe.experts.207.w3", "model.layers.48.block_sparse_moe.experts.208.w3", "model.layers.48.block_sparse_moe.experts.209.w3", "model.layers.48.block_sparse_moe.experts.210.w3", "model.layers.48.block_sparse_moe.experts.211.w3", "model.layers.48.block_sparse_moe.experts.212.w3", "model.layers.48.block_sparse_moe.experts.213.w3", "model.layers.48.block_sparse_moe.experts.214.w3", "model.layers.48.block_sparse_moe.experts.215.w3", "model.layers.48.block_sparse_moe.experts.216.w3", "model.layers.48.block_sparse_moe.experts.217.w3", "model.layers.48.block_sparse_moe.experts.218.w3", "model.layers.48.block_sparse_moe.experts.219.w3", "model.layers.48.block_sparse_moe.experts.220.w3", "model.layers.48.block_sparse_moe.experts.221.w3", "model.layers.48.block_sparse_moe.experts.222.w3", "model.layers.48.block_sparse_moe.experts.223.w3", "model.layers.48.block_sparse_moe.experts.224.w3", "model.layers.48.block_sparse_moe.experts.225.w3", "model.layers.48.block_sparse_moe.experts.226.w3", "model.layers.48.block_sparse_moe.experts.227.w3", "model.layers.48.block_sparse_moe.experts.228.w3", "model.layers.48.block_sparse_moe.experts.229.w3", "model.layers.48.block_sparse_moe.experts.230.w3", "model.layers.48.block_sparse_moe.experts.231.w3", "model.layers.48.block_sparse_moe.experts.232.w3", "model.layers.48.block_sparse_moe.experts.233.w3", "model.layers.48.block_sparse_moe.experts.234.w3", "model.layers.48.block_sparse_moe.experts.235.w3", "model.layers.48.block_sparse_moe.experts.236.w3", "model.layers.48.block_sparse_moe.experts.237.w3", "model.layers.48.block_sparse_moe.experts.238.w3", "model.layers.48.block_sparse_moe.experts.239.w3", "model.layers.48.block_sparse_moe.experts.240.w3", "model.layers.48.block_sparse_moe.experts.241.w3", "model.layers.48.block_sparse_moe.experts.242.w3", "model.layers.48.block_sparse_moe.experts.243.w3", "model.layers.48.block_sparse_moe.experts.244.w3", "model.layers.48.block_sparse_moe.experts.245.w3", "model.layers.48.block_sparse_moe.experts.246.w3", "model.layers.48.block_sparse_moe.experts.247.w3", "model.layers.48.block_sparse_moe.experts.248.w3", "model.layers.48.block_sparse_moe.experts.249.w3", "model.layers.48.block_sparse_moe.experts.250.w3", "model.layers.48.block_sparse_moe.experts.251.w3", "model.layers.48.block_sparse_moe.experts.252.w3", "model.layers.48.block_sparse_moe.experts.253.w3", "model.layers.48.block_sparse_moe.experts.254.w3", "model.layers.48.block_sparse_moe.experts.255.w3", "model.layers.48.block_sparse_moe.experts.0.w2", "model.layers.48.block_sparse_moe.experts.1.w2", "model.layers.48.block_sparse_moe.experts.2.w2", "model.layers.48.block_sparse_moe.experts.3.w2", "model.layers.48.block_sparse_moe.experts.4.w2", "model.layers.48.block_sparse_moe.experts.5.w2", "model.layers.48.block_sparse_moe.experts.6.w2", "model.layers.48.block_sparse_moe.experts.7.w2", "model.layers.48.block_sparse_moe.experts.8.w2", "model.layers.48.block_sparse_moe.experts.9.w2", "model.layers.48.block_sparse_moe.experts.10.w2", "model.layers.48.block_sparse_moe.experts.11.w2", "model.layers.48.block_sparse_moe.experts.12.w2", "model.layers.48.block_sparse_moe.experts.13.w2", "model.layers.48.block_sparse_moe.experts.14.w2", "model.layers.48.block_sparse_moe.experts.15.w2", "model.layers.48.block_sparse_moe.experts.16.w2", "model.layers.48.block_sparse_moe.experts.17.w2", "model.layers.48.block_sparse_moe.experts.18.w2", "model.layers.48.block_sparse_moe.experts.19.w2", "model.layers.48.block_sparse_moe.experts.20.w2", "model.layers.48.block_sparse_moe.experts.21.w2", "model.layers.48.block_sparse_moe.experts.22.w2", "model.layers.48.block_sparse_moe.experts.23.w2", "model.layers.48.block_sparse_moe.experts.24.w2", "model.layers.48.block_sparse_moe.experts.25.w2", "model.layers.48.block_sparse_moe.experts.26.w2", "model.layers.48.block_sparse_moe.experts.27.w2", "model.layers.48.block_sparse_moe.experts.28.w2", "model.layers.48.block_sparse_moe.experts.29.w2", "model.layers.48.block_sparse_moe.experts.30.w2", "model.layers.48.block_sparse_moe.experts.31.w2", "model.layers.48.block_sparse_moe.experts.32.w2", "model.layers.48.block_sparse_moe.experts.33.w2", "model.layers.48.block_sparse_moe.experts.34.w2", "model.layers.48.block_sparse_moe.experts.35.w2", "model.layers.48.block_sparse_moe.experts.36.w2", "model.layers.48.block_sparse_moe.experts.37.w2", "model.layers.48.block_sparse_moe.experts.38.w2", "model.layers.48.block_sparse_moe.experts.39.w2", "model.layers.48.block_sparse_moe.experts.40.w2", "model.layers.48.block_sparse_moe.experts.41.w2", "model.layers.48.block_sparse_moe.experts.42.w2", "model.layers.48.block_sparse_moe.experts.43.w2", "model.layers.48.block_sparse_moe.experts.44.w2", "model.layers.48.block_sparse_moe.experts.45.w2", "model.layers.48.block_sparse_moe.experts.46.w2", "model.layers.48.block_sparse_moe.experts.47.w2", "model.layers.48.block_sparse_moe.experts.48.w2", "model.layers.48.block_sparse_moe.experts.49.w2", "model.layers.48.block_sparse_moe.experts.50.w2", "model.layers.48.block_sparse_moe.experts.51.w2", "model.layers.48.block_sparse_moe.experts.52.w2", "model.layers.48.block_sparse_moe.experts.53.w2", "model.layers.48.block_sparse_moe.experts.54.w2", "model.layers.48.block_sparse_moe.experts.55.w2", "model.layers.48.block_sparse_moe.experts.56.w2", "model.layers.48.block_sparse_moe.experts.57.w2", "model.layers.48.block_sparse_moe.experts.58.w2", "model.layers.48.block_sparse_moe.experts.59.w2", "model.layers.48.block_sparse_moe.experts.60.w2", "model.layers.48.block_sparse_moe.experts.61.w2", "model.layers.48.block_sparse_moe.experts.62.w2", "model.layers.48.block_sparse_moe.experts.63.w2", "model.layers.48.block_sparse_moe.experts.64.w2", "model.layers.48.block_sparse_moe.experts.65.w2", "model.layers.48.block_sparse_moe.experts.66.w2", "model.layers.48.block_sparse_moe.experts.67.w2", "model.layers.48.block_sparse_moe.experts.68.w2", "model.layers.48.block_sparse_moe.experts.69.w2", "model.layers.48.block_sparse_moe.experts.70.w2", "model.layers.48.block_sparse_moe.experts.71.w2", "model.layers.48.block_sparse_moe.experts.72.w2", "model.layers.48.block_sparse_moe.experts.73.w2", "model.layers.48.block_sparse_moe.experts.74.w2", "model.layers.48.block_sparse_moe.experts.75.w2", "model.layers.48.block_sparse_moe.experts.76.w2", "model.layers.48.block_sparse_moe.experts.77.w2", "model.layers.48.block_sparse_moe.experts.78.w2", "model.layers.48.block_sparse_moe.experts.79.w2", "model.layers.48.block_sparse_moe.experts.80.w2", "model.layers.48.block_sparse_moe.experts.81.w2", "model.layers.48.block_sparse_moe.experts.82.w2", "model.layers.48.block_sparse_moe.experts.83.w2", "model.layers.48.block_sparse_moe.experts.84.w2", "model.layers.48.block_sparse_moe.experts.85.w2", "model.layers.48.block_sparse_moe.experts.86.w2", "model.layers.48.block_sparse_moe.experts.87.w2", "model.layers.48.block_sparse_moe.experts.88.w2", "model.layers.48.block_sparse_moe.experts.89.w2", "model.layers.48.block_sparse_moe.experts.90.w2", "model.layers.48.block_sparse_moe.experts.91.w2", "model.layers.48.block_sparse_moe.experts.92.w2", "model.layers.48.block_sparse_moe.experts.93.w2", "model.layers.48.block_sparse_moe.experts.94.w2", "model.layers.48.block_sparse_moe.experts.95.w2", "model.layers.48.block_sparse_moe.experts.96.w2", "model.layers.48.block_sparse_moe.experts.97.w2", "model.layers.48.block_sparse_moe.experts.98.w2", "model.layers.48.block_sparse_moe.experts.99.w2", "model.layers.48.block_sparse_moe.experts.100.w2", "model.layers.48.block_sparse_moe.experts.101.w2", "model.layers.48.block_sparse_moe.experts.102.w2", "model.layers.48.block_sparse_moe.experts.103.w2", "model.layers.48.block_sparse_moe.experts.104.w2", "model.layers.48.block_sparse_moe.experts.105.w2", "model.layers.48.block_sparse_moe.experts.106.w2", "model.layers.48.block_sparse_moe.experts.107.w2", "model.layers.48.block_sparse_moe.experts.108.w2", "model.layers.48.block_sparse_moe.experts.109.w2", "model.layers.48.block_sparse_moe.experts.110.w2", "model.layers.48.block_sparse_moe.experts.111.w2", "model.layers.48.block_sparse_moe.experts.112.w2", "model.layers.48.block_sparse_moe.experts.113.w2", "model.layers.48.block_sparse_moe.experts.114.w2", "model.layers.48.block_sparse_moe.experts.115.w2", "model.layers.48.block_sparse_moe.experts.116.w2", "model.layers.48.block_sparse_moe.experts.117.w2", "model.layers.48.block_sparse_moe.experts.118.w2", "model.layers.48.block_sparse_moe.experts.119.w2", "model.layers.48.block_sparse_moe.experts.120.w2", "model.layers.48.block_sparse_moe.experts.121.w2", "model.layers.48.block_sparse_moe.experts.122.w2", "model.layers.48.block_sparse_moe.experts.123.w2", "model.layers.48.block_sparse_moe.experts.124.w2", "model.layers.48.block_sparse_moe.experts.125.w2", "model.layers.48.block_sparse_moe.experts.126.w2", "model.layers.48.block_sparse_moe.experts.127.w2", "model.layers.48.block_sparse_moe.experts.128.w2", "model.layers.48.block_sparse_moe.experts.129.w2", "model.layers.48.block_sparse_moe.experts.130.w2", "model.layers.48.block_sparse_moe.experts.131.w2", "model.layers.48.block_sparse_moe.experts.132.w2", "model.layers.48.block_sparse_moe.experts.133.w2", "model.layers.48.block_sparse_moe.experts.134.w2", "model.layers.48.block_sparse_moe.experts.135.w2", "model.layers.48.block_sparse_moe.experts.136.w2", "model.layers.48.block_sparse_moe.experts.137.w2", "model.layers.48.block_sparse_moe.experts.138.w2", "model.layers.48.block_sparse_moe.experts.139.w2", "model.layers.48.block_sparse_moe.experts.140.w2", "model.layers.48.block_sparse_moe.experts.141.w2", "model.layers.48.block_sparse_moe.experts.142.w2", "model.layers.48.block_sparse_moe.experts.143.w2", "model.layers.48.block_sparse_moe.experts.144.w2", "model.layers.48.block_sparse_moe.experts.145.w2", "model.layers.48.block_sparse_moe.experts.146.w2", "model.layers.48.block_sparse_moe.experts.147.w2", "model.layers.48.block_sparse_moe.experts.148.w2", "model.layers.48.block_sparse_moe.experts.149.w2", "model.layers.48.block_sparse_moe.experts.150.w2", "model.layers.48.block_sparse_moe.experts.151.w2", "model.layers.48.block_sparse_moe.experts.152.w2", "model.layers.48.block_sparse_moe.experts.153.w2", "model.layers.48.block_sparse_moe.experts.154.w2", "model.layers.48.block_sparse_moe.experts.155.w2", "model.layers.48.block_sparse_moe.experts.156.w2", "model.layers.48.block_sparse_moe.experts.157.w2", "model.layers.48.block_sparse_moe.experts.158.w2", "model.layers.48.block_sparse_moe.experts.159.w2", "model.layers.48.block_sparse_moe.experts.160.w2", "model.layers.48.block_sparse_moe.experts.161.w2", "model.layers.48.block_sparse_moe.experts.162.w2", "model.layers.48.block_sparse_moe.experts.163.w2", "model.layers.48.block_sparse_moe.experts.164.w2", "model.layers.48.block_sparse_moe.experts.165.w2", "model.layers.48.block_sparse_moe.experts.166.w2", "model.layers.48.block_sparse_moe.experts.167.w2", "model.layers.48.block_sparse_moe.experts.168.w2", "model.layers.48.block_sparse_moe.experts.169.w2", "model.layers.48.block_sparse_moe.experts.170.w2", "model.layers.48.block_sparse_moe.experts.171.w2", "model.layers.48.block_sparse_moe.experts.172.w2", "model.layers.48.block_sparse_moe.experts.173.w2", "model.layers.48.block_sparse_moe.experts.174.w2", "model.layers.48.block_sparse_moe.experts.175.w2", "model.layers.48.block_sparse_moe.experts.176.w2", "model.layers.48.block_sparse_moe.experts.177.w2", "model.layers.48.block_sparse_moe.experts.178.w2", "model.layers.48.block_sparse_moe.experts.179.w2", "model.layers.48.block_sparse_moe.experts.180.w2", "model.layers.48.block_sparse_moe.experts.181.w2", "model.layers.48.block_sparse_moe.experts.182.w2", "model.layers.48.block_sparse_moe.experts.183.w2", "model.layers.48.block_sparse_moe.experts.184.w2", "model.layers.48.block_sparse_moe.experts.185.w2", "model.layers.48.block_sparse_moe.experts.186.w2", "model.layers.48.block_sparse_moe.experts.187.w2", "model.layers.48.block_sparse_moe.experts.188.w2", "model.layers.48.block_sparse_moe.experts.189.w2", "model.layers.48.block_sparse_moe.experts.190.w2", "model.layers.48.block_sparse_moe.experts.191.w2", "model.layers.48.block_sparse_moe.experts.192.w2", "model.layers.48.block_sparse_moe.experts.193.w2", "model.layers.48.block_sparse_moe.experts.194.w2", "model.layers.48.block_sparse_moe.experts.195.w2", "model.layers.48.block_sparse_moe.experts.196.w2", "model.layers.48.block_sparse_moe.experts.197.w2", "model.layers.48.block_sparse_moe.experts.198.w2", "model.layers.48.block_sparse_moe.experts.199.w2", "model.layers.48.block_sparse_moe.experts.200.w2", "model.layers.48.block_sparse_moe.experts.201.w2", "model.layers.48.block_sparse_moe.experts.202.w2", "model.layers.48.block_sparse_moe.experts.203.w2", "model.layers.48.block_sparse_moe.experts.204.w2", "model.layers.48.block_sparse_moe.experts.205.w2", "model.layers.48.block_sparse_moe.experts.206.w2", "model.layers.48.block_sparse_moe.experts.207.w2", "model.layers.48.block_sparse_moe.experts.208.w2", "model.layers.48.block_sparse_moe.experts.209.w2", "model.layers.48.block_sparse_moe.experts.210.w2", "model.layers.48.block_sparse_moe.experts.211.w2", "model.layers.48.block_sparse_moe.experts.212.w2", "model.layers.48.block_sparse_moe.experts.213.w2", "model.layers.48.block_sparse_moe.experts.214.w2", "model.layers.48.block_sparse_moe.experts.215.w2", "model.layers.48.block_sparse_moe.experts.216.w2", "model.layers.48.block_sparse_moe.experts.217.w2", "model.layers.48.block_sparse_moe.experts.218.w2", "model.layers.48.block_sparse_moe.experts.219.w2", "model.layers.48.block_sparse_moe.experts.220.w2", "model.layers.48.block_sparse_moe.experts.221.w2", "model.layers.48.block_sparse_moe.experts.222.w2", "model.layers.48.block_sparse_moe.experts.223.w2", "model.layers.48.block_sparse_moe.experts.224.w2", "model.layers.48.block_sparse_moe.experts.225.w2", "model.layers.48.block_sparse_moe.experts.226.w2", "model.layers.48.block_sparse_moe.experts.227.w2", "model.layers.48.block_sparse_moe.experts.228.w2", "model.layers.48.block_sparse_moe.experts.229.w2", "model.layers.48.block_sparse_moe.experts.230.w2", "model.layers.48.block_sparse_moe.experts.231.w2", "model.layers.48.block_sparse_moe.experts.232.w2", "model.layers.48.block_sparse_moe.experts.233.w2", "model.layers.48.block_sparse_moe.experts.234.w2", "model.layers.48.block_sparse_moe.experts.235.w2", "model.layers.48.block_sparse_moe.experts.236.w2", "model.layers.48.block_sparse_moe.experts.237.w2", "model.layers.48.block_sparse_moe.experts.238.w2", "model.layers.48.block_sparse_moe.experts.239.w2", "model.layers.48.block_sparse_moe.experts.240.w2", "model.layers.48.block_sparse_moe.experts.241.w2", "model.layers.48.block_sparse_moe.experts.242.w2", "model.layers.48.block_sparse_moe.experts.243.w2", "model.layers.48.block_sparse_moe.experts.244.w2", "model.layers.48.block_sparse_moe.experts.245.w2", "model.layers.48.block_sparse_moe.experts.246.w2", "model.layers.48.block_sparse_moe.experts.247.w2", "model.layers.48.block_sparse_moe.experts.248.w2", "model.layers.48.block_sparse_moe.experts.249.w2", "model.layers.48.block_sparse_moe.experts.250.w2", "model.layers.48.block_sparse_moe.experts.251.w2", "model.layers.48.block_sparse_moe.experts.252.w2", "model.layers.48.block_sparse_moe.experts.253.w2", "model.layers.48.block_sparse_moe.experts.254.w2", "model.layers.48.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005482599139213229, "dbits": 3623878656 } ] }, { "idx": 98, "layers": [ "model.layers.49.self_attn.q_proj", "model.layers.49.self_attn.k_proj", "model.layers.49.self_attn.v_proj", "model.layers.49.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00016961470246312782, "dbits": 44040192 } ] }, { "idx": 99, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w1", "model.layers.49.block_sparse_moe.experts.1.w1", "model.layers.49.block_sparse_moe.experts.2.w1", "model.layers.49.block_sparse_moe.experts.3.w1", "model.layers.49.block_sparse_moe.experts.4.w1", "model.layers.49.block_sparse_moe.experts.5.w1", "model.layers.49.block_sparse_moe.experts.6.w1", "model.layers.49.block_sparse_moe.experts.7.w1", "model.layers.49.block_sparse_moe.experts.8.w1", "model.layers.49.block_sparse_moe.experts.9.w1", "model.layers.49.block_sparse_moe.experts.10.w1", "model.layers.49.block_sparse_moe.experts.11.w1", "model.layers.49.block_sparse_moe.experts.12.w1", "model.layers.49.block_sparse_moe.experts.13.w1", "model.layers.49.block_sparse_moe.experts.14.w1", "model.layers.49.block_sparse_moe.experts.15.w1", "model.layers.49.block_sparse_moe.experts.16.w1", "model.layers.49.block_sparse_moe.experts.17.w1", "model.layers.49.block_sparse_moe.experts.18.w1", "model.layers.49.block_sparse_moe.experts.19.w1", "model.layers.49.block_sparse_moe.experts.20.w1", "model.layers.49.block_sparse_moe.experts.21.w1", "model.layers.49.block_sparse_moe.experts.22.w1", "model.layers.49.block_sparse_moe.experts.23.w1", "model.layers.49.block_sparse_moe.experts.24.w1", "model.layers.49.block_sparse_moe.experts.25.w1", "model.layers.49.block_sparse_moe.experts.26.w1", "model.layers.49.block_sparse_moe.experts.27.w1", "model.layers.49.block_sparse_moe.experts.28.w1", "model.layers.49.block_sparse_moe.experts.29.w1", "model.layers.49.block_sparse_moe.experts.30.w1", "model.layers.49.block_sparse_moe.experts.31.w1", "model.layers.49.block_sparse_moe.experts.32.w1", "model.layers.49.block_sparse_moe.experts.33.w1", "model.layers.49.block_sparse_moe.experts.34.w1", "model.layers.49.block_sparse_moe.experts.35.w1", "model.layers.49.block_sparse_moe.experts.36.w1", "model.layers.49.block_sparse_moe.experts.37.w1", "model.layers.49.block_sparse_moe.experts.38.w1", "model.layers.49.block_sparse_moe.experts.39.w1", "model.layers.49.block_sparse_moe.experts.40.w1", "model.layers.49.block_sparse_moe.experts.41.w1", "model.layers.49.block_sparse_moe.experts.42.w1", "model.layers.49.block_sparse_moe.experts.43.w1", "model.layers.49.block_sparse_moe.experts.44.w1", "model.layers.49.block_sparse_moe.experts.45.w1", "model.layers.49.block_sparse_moe.experts.46.w1", "model.layers.49.block_sparse_moe.experts.47.w1", "model.layers.49.block_sparse_moe.experts.48.w1", "model.layers.49.block_sparse_moe.experts.49.w1", "model.layers.49.block_sparse_moe.experts.50.w1", "model.layers.49.block_sparse_moe.experts.51.w1", "model.layers.49.block_sparse_moe.experts.52.w1", "model.layers.49.block_sparse_moe.experts.53.w1", "model.layers.49.block_sparse_moe.experts.54.w1", "model.layers.49.block_sparse_moe.experts.55.w1", "model.layers.49.block_sparse_moe.experts.56.w1", "model.layers.49.block_sparse_moe.experts.57.w1", "model.layers.49.block_sparse_moe.experts.58.w1", "model.layers.49.block_sparse_moe.experts.59.w1", "model.layers.49.block_sparse_moe.experts.60.w1", "model.layers.49.block_sparse_moe.experts.61.w1", "model.layers.49.block_sparse_moe.experts.62.w1", "model.layers.49.block_sparse_moe.experts.63.w1", "model.layers.49.block_sparse_moe.experts.64.w1", "model.layers.49.block_sparse_moe.experts.65.w1", "model.layers.49.block_sparse_moe.experts.66.w1", "model.layers.49.block_sparse_moe.experts.67.w1", "model.layers.49.block_sparse_moe.experts.68.w1", "model.layers.49.block_sparse_moe.experts.69.w1", "model.layers.49.block_sparse_moe.experts.70.w1", "model.layers.49.block_sparse_moe.experts.71.w1", "model.layers.49.block_sparse_moe.experts.72.w1", "model.layers.49.block_sparse_moe.experts.73.w1", "model.layers.49.block_sparse_moe.experts.74.w1", "model.layers.49.block_sparse_moe.experts.75.w1", "model.layers.49.block_sparse_moe.experts.76.w1", "model.layers.49.block_sparse_moe.experts.77.w1", "model.layers.49.block_sparse_moe.experts.78.w1", "model.layers.49.block_sparse_moe.experts.79.w1", "model.layers.49.block_sparse_moe.experts.80.w1", "model.layers.49.block_sparse_moe.experts.81.w1", "model.layers.49.block_sparse_moe.experts.82.w1", "model.layers.49.block_sparse_moe.experts.83.w1", "model.layers.49.block_sparse_moe.experts.84.w1", "model.layers.49.block_sparse_moe.experts.85.w1", "model.layers.49.block_sparse_moe.experts.86.w1", "model.layers.49.block_sparse_moe.experts.87.w1", "model.layers.49.block_sparse_moe.experts.88.w1", "model.layers.49.block_sparse_moe.experts.89.w1", "model.layers.49.block_sparse_moe.experts.90.w1", "model.layers.49.block_sparse_moe.experts.91.w1", "model.layers.49.block_sparse_moe.experts.92.w1", "model.layers.49.block_sparse_moe.experts.93.w1", "model.layers.49.block_sparse_moe.experts.94.w1", "model.layers.49.block_sparse_moe.experts.95.w1", "model.layers.49.block_sparse_moe.experts.96.w1", "model.layers.49.block_sparse_moe.experts.97.w1", "model.layers.49.block_sparse_moe.experts.98.w1", "model.layers.49.block_sparse_moe.experts.99.w1", "model.layers.49.block_sparse_moe.experts.100.w1", "model.layers.49.block_sparse_moe.experts.101.w1", "model.layers.49.block_sparse_moe.experts.102.w1", "model.layers.49.block_sparse_moe.experts.103.w1", "model.layers.49.block_sparse_moe.experts.104.w1", "model.layers.49.block_sparse_moe.experts.105.w1", "model.layers.49.block_sparse_moe.experts.106.w1", "model.layers.49.block_sparse_moe.experts.107.w1", "model.layers.49.block_sparse_moe.experts.108.w1", "model.layers.49.block_sparse_moe.experts.109.w1", "model.layers.49.block_sparse_moe.experts.110.w1", "model.layers.49.block_sparse_moe.experts.111.w1", "model.layers.49.block_sparse_moe.experts.112.w1", "model.layers.49.block_sparse_moe.experts.113.w1", "model.layers.49.block_sparse_moe.experts.114.w1", "model.layers.49.block_sparse_moe.experts.115.w1", "model.layers.49.block_sparse_moe.experts.116.w1", "model.layers.49.block_sparse_moe.experts.117.w1", "model.layers.49.block_sparse_moe.experts.118.w1", "model.layers.49.block_sparse_moe.experts.119.w1", "model.layers.49.block_sparse_moe.experts.120.w1", "model.layers.49.block_sparse_moe.experts.121.w1", "model.layers.49.block_sparse_moe.experts.122.w1", "model.layers.49.block_sparse_moe.experts.123.w1", "model.layers.49.block_sparse_moe.experts.124.w1", "model.layers.49.block_sparse_moe.experts.125.w1", "model.layers.49.block_sparse_moe.experts.126.w1", "model.layers.49.block_sparse_moe.experts.127.w1", "model.layers.49.block_sparse_moe.experts.128.w1", "model.layers.49.block_sparse_moe.experts.129.w1", "model.layers.49.block_sparse_moe.experts.130.w1", "model.layers.49.block_sparse_moe.experts.131.w1", "model.layers.49.block_sparse_moe.experts.132.w1", "model.layers.49.block_sparse_moe.experts.133.w1", "model.layers.49.block_sparse_moe.experts.134.w1", "model.layers.49.block_sparse_moe.experts.135.w1", "model.layers.49.block_sparse_moe.experts.136.w1", "model.layers.49.block_sparse_moe.experts.137.w1", "model.layers.49.block_sparse_moe.experts.138.w1", "model.layers.49.block_sparse_moe.experts.139.w1", "model.layers.49.block_sparse_moe.experts.140.w1", "model.layers.49.block_sparse_moe.experts.141.w1", "model.layers.49.block_sparse_moe.experts.142.w1", "model.layers.49.block_sparse_moe.experts.143.w1", "model.layers.49.block_sparse_moe.experts.144.w1", "model.layers.49.block_sparse_moe.experts.145.w1", "model.layers.49.block_sparse_moe.experts.146.w1", "model.layers.49.block_sparse_moe.experts.147.w1", "model.layers.49.block_sparse_moe.experts.148.w1", "model.layers.49.block_sparse_moe.experts.149.w1", "model.layers.49.block_sparse_moe.experts.150.w1", "model.layers.49.block_sparse_moe.experts.151.w1", "model.layers.49.block_sparse_moe.experts.152.w1", "model.layers.49.block_sparse_moe.experts.153.w1", "model.layers.49.block_sparse_moe.experts.154.w1", "model.layers.49.block_sparse_moe.experts.155.w1", "model.layers.49.block_sparse_moe.experts.156.w1", "model.layers.49.block_sparse_moe.experts.157.w1", "model.layers.49.block_sparse_moe.experts.158.w1", "model.layers.49.block_sparse_moe.experts.159.w1", "model.layers.49.block_sparse_moe.experts.160.w1", "model.layers.49.block_sparse_moe.experts.161.w1", "model.layers.49.block_sparse_moe.experts.162.w1", "model.layers.49.block_sparse_moe.experts.163.w1", "model.layers.49.block_sparse_moe.experts.164.w1", "model.layers.49.block_sparse_moe.experts.165.w1", "model.layers.49.block_sparse_moe.experts.166.w1", "model.layers.49.block_sparse_moe.experts.167.w1", "model.layers.49.block_sparse_moe.experts.168.w1", "model.layers.49.block_sparse_moe.experts.169.w1", "model.layers.49.block_sparse_moe.experts.170.w1", "model.layers.49.block_sparse_moe.experts.171.w1", "model.layers.49.block_sparse_moe.experts.172.w1", "model.layers.49.block_sparse_moe.experts.173.w1", "model.layers.49.block_sparse_moe.experts.174.w1", "model.layers.49.block_sparse_moe.experts.175.w1", "model.layers.49.block_sparse_moe.experts.176.w1", "model.layers.49.block_sparse_moe.experts.177.w1", "model.layers.49.block_sparse_moe.experts.178.w1", "model.layers.49.block_sparse_moe.experts.179.w1", "model.layers.49.block_sparse_moe.experts.180.w1", "model.layers.49.block_sparse_moe.experts.181.w1", "model.layers.49.block_sparse_moe.experts.182.w1", "model.layers.49.block_sparse_moe.experts.183.w1", "model.layers.49.block_sparse_moe.experts.184.w1", "model.layers.49.block_sparse_moe.experts.185.w1", "model.layers.49.block_sparse_moe.experts.186.w1", "model.layers.49.block_sparse_moe.experts.187.w1", "model.layers.49.block_sparse_moe.experts.188.w1", "model.layers.49.block_sparse_moe.experts.189.w1", "model.layers.49.block_sparse_moe.experts.190.w1", "model.layers.49.block_sparse_moe.experts.191.w1", "model.layers.49.block_sparse_moe.experts.192.w1", "model.layers.49.block_sparse_moe.experts.193.w1", "model.layers.49.block_sparse_moe.experts.194.w1", "model.layers.49.block_sparse_moe.experts.195.w1", "model.layers.49.block_sparse_moe.experts.196.w1", "model.layers.49.block_sparse_moe.experts.197.w1", "model.layers.49.block_sparse_moe.experts.198.w1", "model.layers.49.block_sparse_moe.experts.199.w1", "model.layers.49.block_sparse_moe.experts.200.w1", "model.layers.49.block_sparse_moe.experts.201.w1", "model.layers.49.block_sparse_moe.experts.202.w1", "model.layers.49.block_sparse_moe.experts.203.w1", "model.layers.49.block_sparse_moe.experts.204.w1", "model.layers.49.block_sparse_moe.experts.205.w1", "model.layers.49.block_sparse_moe.experts.206.w1", "model.layers.49.block_sparse_moe.experts.207.w1", "model.layers.49.block_sparse_moe.experts.208.w1", "model.layers.49.block_sparse_moe.experts.209.w1", "model.layers.49.block_sparse_moe.experts.210.w1", "model.layers.49.block_sparse_moe.experts.211.w1", "model.layers.49.block_sparse_moe.experts.212.w1", "model.layers.49.block_sparse_moe.experts.213.w1", "model.layers.49.block_sparse_moe.experts.214.w1", "model.layers.49.block_sparse_moe.experts.215.w1", "model.layers.49.block_sparse_moe.experts.216.w1", "model.layers.49.block_sparse_moe.experts.217.w1", "model.layers.49.block_sparse_moe.experts.218.w1", "model.layers.49.block_sparse_moe.experts.219.w1", "model.layers.49.block_sparse_moe.experts.220.w1", "model.layers.49.block_sparse_moe.experts.221.w1", "model.layers.49.block_sparse_moe.experts.222.w1", "model.layers.49.block_sparse_moe.experts.223.w1", "model.layers.49.block_sparse_moe.experts.224.w1", "model.layers.49.block_sparse_moe.experts.225.w1", "model.layers.49.block_sparse_moe.experts.226.w1", "model.layers.49.block_sparse_moe.experts.227.w1", "model.layers.49.block_sparse_moe.experts.228.w1", "model.layers.49.block_sparse_moe.experts.229.w1", "model.layers.49.block_sparse_moe.experts.230.w1", "model.layers.49.block_sparse_moe.experts.231.w1", "model.layers.49.block_sparse_moe.experts.232.w1", "model.layers.49.block_sparse_moe.experts.233.w1", "model.layers.49.block_sparse_moe.experts.234.w1", "model.layers.49.block_sparse_moe.experts.235.w1", "model.layers.49.block_sparse_moe.experts.236.w1", "model.layers.49.block_sparse_moe.experts.237.w1", "model.layers.49.block_sparse_moe.experts.238.w1", "model.layers.49.block_sparse_moe.experts.239.w1", "model.layers.49.block_sparse_moe.experts.240.w1", "model.layers.49.block_sparse_moe.experts.241.w1", "model.layers.49.block_sparse_moe.experts.242.w1", "model.layers.49.block_sparse_moe.experts.243.w1", "model.layers.49.block_sparse_moe.experts.244.w1", "model.layers.49.block_sparse_moe.experts.245.w1", "model.layers.49.block_sparse_moe.experts.246.w1", "model.layers.49.block_sparse_moe.experts.247.w1", "model.layers.49.block_sparse_moe.experts.248.w1", "model.layers.49.block_sparse_moe.experts.249.w1", "model.layers.49.block_sparse_moe.experts.250.w1", "model.layers.49.block_sparse_moe.experts.251.w1", "model.layers.49.block_sparse_moe.experts.252.w1", "model.layers.49.block_sparse_moe.experts.253.w1", "model.layers.49.block_sparse_moe.experts.254.w1", "model.layers.49.block_sparse_moe.experts.255.w1", "model.layers.49.block_sparse_moe.experts.0.w3", "model.layers.49.block_sparse_moe.experts.1.w3", "model.layers.49.block_sparse_moe.experts.2.w3", "model.layers.49.block_sparse_moe.experts.3.w3", "model.layers.49.block_sparse_moe.experts.4.w3", "model.layers.49.block_sparse_moe.experts.5.w3", "model.layers.49.block_sparse_moe.experts.6.w3", "model.layers.49.block_sparse_moe.experts.7.w3", "model.layers.49.block_sparse_moe.experts.8.w3", "model.layers.49.block_sparse_moe.experts.9.w3", "model.layers.49.block_sparse_moe.experts.10.w3", "model.layers.49.block_sparse_moe.experts.11.w3", "model.layers.49.block_sparse_moe.experts.12.w3", "model.layers.49.block_sparse_moe.experts.13.w3", "model.layers.49.block_sparse_moe.experts.14.w3", "model.layers.49.block_sparse_moe.experts.15.w3", "model.layers.49.block_sparse_moe.experts.16.w3", "model.layers.49.block_sparse_moe.experts.17.w3", "model.layers.49.block_sparse_moe.experts.18.w3", "model.layers.49.block_sparse_moe.experts.19.w3", "model.layers.49.block_sparse_moe.experts.20.w3", "model.layers.49.block_sparse_moe.experts.21.w3", "model.layers.49.block_sparse_moe.experts.22.w3", "model.layers.49.block_sparse_moe.experts.23.w3", "model.layers.49.block_sparse_moe.experts.24.w3", "model.layers.49.block_sparse_moe.experts.25.w3", "model.layers.49.block_sparse_moe.experts.26.w3", "model.layers.49.block_sparse_moe.experts.27.w3", "model.layers.49.block_sparse_moe.experts.28.w3", "model.layers.49.block_sparse_moe.experts.29.w3", "model.layers.49.block_sparse_moe.experts.30.w3", "model.layers.49.block_sparse_moe.experts.31.w3", "model.layers.49.block_sparse_moe.experts.32.w3", "model.layers.49.block_sparse_moe.experts.33.w3", "model.layers.49.block_sparse_moe.experts.34.w3", "model.layers.49.block_sparse_moe.experts.35.w3", "model.layers.49.block_sparse_moe.experts.36.w3", "model.layers.49.block_sparse_moe.experts.37.w3", "model.layers.49.block_sparse_moe.experts.38.w3", "model.layers.49.block_sparse_moe.experts.39.w3", "model.layers.49.block_sparse_moe.experts.40.w3", "model.layers.49.block_sparse_moe.experts.41.w3", "model.layers.49.block_sparse_moe.experts.42.w3", "model.layers.49.block_sparse_moe.experts.43.w3", "model.layers.49.block_sparse_moe.experts.44.w3", "model.layers.49.block_sparse_moe.experts.45.w3", "model.layers.49.block_sparse_moe.experts.46.w3", "model.layers.49.block_sparse_moe.experts.47.w3", "model.layers.49.block_sparse_moe.experts.48.w3", "model.layers.49.block_sparse_moe.experts.49.w3", "model.layers.49.block_sparse_moe.experts.50.w3", "model.layers.49.block_sparse_moe.experts.51.w3", "model.layers.49.block_sparse_moe.experts.52.w3", "model.layers.49.block_sparse_moe.experts.53.w3", "model.layers.49.block_sparse_moe.experts.54.w3", "model.layers.49.block_sparse_moe.experts.55.w3", "model.layers.49.block_sparse_moe.experts.56.w3", "model.layers.49.block_sparse_moe.experts.57.w3", "model.layers.49.block_sparse_moe.experts.58.w3", "model.layers.49.block_sparse_moe.experts.59.w3", "model.layers.49.block_sparse_moe.experts.60.w3", "model.layers.49.block_sparse_moe.experts.61.w3", "model.layers.49.block_sparse_moe.experts.62.w3", "model.layers.49.block_sparse_moe.experts.63.w3", "model.layers.49.block_sparse_moe.experts.64.w3", "model.layers.49.block_sparse_moe.experts.65.w3", "model.layers.49.block_sparse_moe.experts.66.w3", "model.layers.49.block_sparse_moe.experts.67.w3", "model.layers.49.block_sparse_moe.experts.68.w3", "model.layers.49.block_sparse_moe.experts.69.w3", "model.layers.49.block_sparse_moe.experts.70.w3", "model.layers.49.block_sparse_moe.experts.71.w3", "model.layers.49.block_sparse_moe.experts.72.w3", "model.layers.49.block_sparse_moe.experts.73.w3", "model.layers.49.block_sparse_moe.experts.74.w3", "model.layers.49.block_sparse_moe.experts.75.w3", "model.layers.49.block_sparse_moe.experts.76.w3", "model.layers.49.block_sparse_moe.experts.77.w3", "model.layers.49.block_sparse_moe.experts.78.w3", "model.layers.49.block_sparse_moe.experts.79.w3", "model.layers.49.block_sparse_moe.experts.80.w3", "model.layers.49.block_sparse_moe.experts.81.w3", "model.layers.49.block_sparse_moe.experts.82.w3", "model.layers.49.block_sparse_moe.experts.83.w3", "model.layers.49.block_sparse_moe.experts.84.w3", "model.layers.49.block_sparse_moe.experts.85.w3", "model.layers.49.block_sparse_moe.experts.86.w3", "model.layers.49.block_sparse_moe.experts.87.w3", "model.layers.49.block_sparse_moe.experts.88.w3", "model.layers.49.block_sparse_moe.experts.89.w3", "model.layers.49.block_sparse_moe.experts.90.w3", "model.layers.49.block_sparse_moe.experts.91.w3", "model.layers.49.block_sparse_moe.experts.92.w3", "model.layers.49.block_sparse_moe.experts.93.w3", "model.layers.49.block_sparse_moe.experts.94.w3", "model.layers.49.block_sparse_moe.experts.95.w3", "model.layers.49.block_sparse_moe.experts.96.w3", "model.layers.49.block_sparse_moe.experts.97.w3", "model.layers.49.block_sparse_moe.experts.98.w3", "model.layers.49.block_sparse_moe.experts.99.w3", "model.layers.49.block_sparse_moe.experts.100.w3", "model.layers.49.block_sparse_moe.experts.101.w3", "model.layers.49.block_sparse_moe.experts.102.w3", "model.layers.49.block_sparse_moe.experts.103.w3", "model.layers.49.block_sparse_moe.experts.104.w3", "model.layers.49.block_sparse_moe.experts.105.w3", "model.layers.49.block_sparse_moe.experts.106.w3", "model.layers.49.block_sparse_moe.experts.107.w3", "model.layers.49.block_sparse_moe.experts.108.w3", "model.layers.49.block_sparse_moe.experts.109.w3", "model.layers.49.block_sparse_moe.experts.110.w3", "model.layers.49.block_sparse_moe.experts.111.w3", "model.layers.49.block_sparse_moe.experts.112.w3", "model.layers.49.block_sparse_moe.experts.113.w3", "model.layers.49.block_sparse_moe.experts.114.w3", "model.layers.49.block_sparse_moe.experts.115.w3", "model.layers.49.block_sparse_moe.experts.116.w3", "model.layers.49.block_sparse_moe.experts.117.w3", "model.layers.49.block_sparse_moe.experts.118.w3", "model.layers.49.block_sparse_moe.experts.119.w3", "model.layers.49.block_sparse_moe.experts.120.w3", "model.layers.49.block_sparse_moe.experts.121.w3", "model.layers.49.block_sparse_moe.experts.122.w3", "model.layers.49.block_sparse_moe.experts.123.w3", "model.layers.49.block_sparse_moe.experts.124.w3", "model.layers.49.block_sparse_moe.experts.125.w3", "model.layers.49.block_sparse_moe.experts.126.w3", "model.layers.49.block_sparse_moe.experts.127.w3", "model.layers.49.block_sparse_moe.experts.128.w3", "model.layers.49.block_sparse_moe.experts.129.w3", "model.layers.49.block_sparse_moe.experts.130.w3", "model.layers.49.block_sparse_moe.experts.131.w3", "model.layers.49.block_sparse_moe.experts.132.w3", "model.layers.49.block_sparse_moe.experts.133.w3", "model.layers.49.block_sparse_moe.experts.134.w3", "model.layers.49.block_sparse_moe.experts.135.w3", "model.layers.49.block_sparse_moe.experts.136.w3", "model.layers.49.block_sparse_moe.experts.137.w3", "model.layers.49.block_sparse_moe.experts.138.w3", "model.layers.49.block_sparse_moe.experts.139.w3", "model.layers.49.block_sparse_moe.experts.140.w3", "model.layers.49.block_sparse_moe.experts.141.w3", "model.layers.49.block_sparse_moe.experts.142.w3", "model.layers.49.block_sparse_moe.experts.143.w3", "model.layers.49.block_sparse_moe.experts.144.w3", "model.layers.49.block_sparse_moe.experts.145.w3", "model.layers.49.block_sparse_moe.experts.146.w3", "model.layers.49.block_sparse_moe.experts.147.w3", "model.layers.49.block_sparse_moe.experts.148.w3", "model.layers.49.block_sparse_moe.experts.149.w3", "model.layers.49.block_sparse_moe.experts.150.w3", "model.layers.49.block_sparse_moe.experts.151.w3", "model.layers.49.block_sparse_moe.experts.152.w3", "model.layers.49.block_sparse_moe.experts.153.w3", "model.layers.49.block_sparse_moe.experts.154.w3", "model.layers.49.block_sparse_moe.experts.155.w3", "model.layers.49.block_sparse_moe.experts.156.w3", "model.layers.49.block_sparse_moe.experts.157.w3", "model.layers.49.block_sparse_moe.experts.158.w3", "model.layers.49.block_sparse_moe.experts.159.w3", "model.layers.49.block_sparse_moe.experts.160.w3", "model.layers.49.block_sparse_moe.experts.161.w3", "model.layers.49.block_sparse_moe.experts.162.w3", "model.layers.49.block_sparse_moe.experts.163.w3", "model.layers.49.block_sparse_moe.experts.164.w3", "model.layers.49.block_sparse_moe.experts.165.w3", "model.layers.49.block_sparse_moe.experts.166.w3", "model.layers.49.block_sparse_moe.experts.167.w3", "model.layers.49.block_sparse_moe.experts.168.w3", "model.layers.49.block_sparse_moe.experts.169.w3", "model.layers.49.block_sparse_moe.experts.170.w3", "model.layers.49.block_sparse_moe.experts.171.w3", "model.layers.49.block_sparse_moe.experts.172.w3", "model.layers.49.block_sparse_moe.experts.173.w3", "model.layers.49.block_sparse_moe.experts.174.w3", "model.layers.49.block_sparse_moe.experts.175.w3", "model.layers.49.block_sparse_moe.experts.176.w3", "model.layers.49.block_sparse_moe.experts.177.w3", "model.layers.49.block_sparse_moe.experts.178.w3", "model.layers.49.block_sparse_moe.experts.179.w3", "model.layers.49.block_sparse_moe.experts.180.w3", "model.layers.49.block_sparse_moe.experts.181.w3", "model.layers.49.block_sparse_moe.experts.182.w3", "model.layers.49.block_sparse_moe.experts.183.w3", "model.layers.49.block_sparse_moe.experts.184.w3", "model.layers.49.block_sparse_moe.experts.185.w3", "model.layers.49.block_sparse_moe.experts.186.w3", "model.layers.49.block_sparse_moe.experts.187.w3", "model.layers.49.block_sparse_moe.experts.188.w3", "model.layers.49.block_sparse_moe.experts.189.w3", "model.layers.49.block_sparse_moe.experts.190.w3", "model.layers.49.block_sparse_moe.experts.191.w3", "model.layers.49.block_sparse_moe.experts.192.w3", "model.layers.49.block_sparse_moe.experts.193.w3", "model.layers.49.block_sparse_moe.experts.194.w3", "model.layers.49.block_sparse_moe.experts.195.w3", "model.layers.49.block_sparse_moe.experts.196.w3", "model.layers.49.block_sparse_moe.experts.197.w3", "model.layers.49.block_sparse_moe.experts.198.w3", "model.layers.49.block_sparse_moe.experts.199.w3", "model.layers.49.block_sparse_moe.experts.200.w3", "model.layers.49.block_sparse_moe.experts.201.w3", "model.layers.49.block_sparse_moe.experts.202.w3", "model.layers.49.block_sparse_moe.experts.203.w3", "model.layers.49.block_sparse_moe.experts.204.w3", "model.layers.49.block_sparse_moe.experts.205.w3", "model.layers.49.block_sparse_moe.experts.206.w3", "model.layers.49.block_sparse_moe.experts.207.w3", "model.layers.49.block_sparse_moe.experts.208.w3", "model.layers.49.block_sparse_moe.experts.209.w3", "model.layers.49.block_sparse_moe.experts.210.w3", "model.layers.49.block_sparse_moe.experts.211.w3", "model.layers.49.block_sparse_moe.experts.212.w3", "model.layers.49.block_sparse_moe.experts.213.w3", "model.layers.49.block_sparse_moe.experts.214.w3", "model.layers.49.block_sparse_moe.experts.215.w3", "model.layers.49.block_sparse_moe.experts.216.w3", "model.layers.49.block_sparse_moe.experts.217.w3", "model.layers.49.block_sparse_moe.experts.218.w3", "model.layers.49.block_sparse_moe.experts.219.w3", "model.layers.49.block_sparse_moe.experts.220.w3", "model.layers.49.block_sparse_moe.experts.221.w3", "model.layers.49.block_sparse_moe.experts.222.w3", "model.layers.49.block_sparse_moe.experts.223.w3", "model.layers.49.block_sparse_moe.experts.224.w3", "model.layers.49.block_sparse_moe.experts.225.w3", "model.layers.49.block_sparse_moe.experts.226.w3", "model.layers.49.block_sparse_moe.experts.227.w3", "model.layers.49.block_sparse_moe.experts.228.w3", "model.layers.49.block_sparse_moe.experts.229.w3", "model.layers.49.block_sparse_moe.experts.230.w3", "model.layers.49.block_sparse_moe.experts.231.w3", "model.layers.49.block_sparse_moe.experts.232.w3", "model.layers.49.block_sparse_moe.experts.233.w3", "model.layers.49.block_sparse_moe.experts.234.w3", "model.layers.49.block_sparse_moe.experts.235.w3", "model.layers.49.block_sparse_moe.experts.236.w3", "model.layers.49.block_sparse_moe.experts.237.w3", "model.layers.49.block_sparse_moe.experts.238.w3", "model.layers.49.block_sparse_moe.experts.239.w3", "model.layers.49.block_sparse_moe.experts.240.w3", "model.layers.49.block_sparse_moe.experts.241.w3", "model.layers.49.block_sparse_moe.experts.242.w3", "model.layers.49.block_sparse_moe.experts.243.w3", "model.layers.49.block_sparse_moe.experts.244.w3", "model.layers.49.block_sparse_moe.experts.245.w3", "model.layers.49.block_sparse_moe.experts.246.w3", "model.layers.49.block_sparse_moe.experts.247.w3", "model.layers.49.block_sparse_moe.experts.248.w3", "model.layers.49.block_sparse_moe.experts.249.w3", "model.layers.49.block_sparse_moe.experts.250.w3", "model.layers.49.block_sparse_moe.experts.251.w3", "model.layers.49.block_sparse_moe.experts.252.w3", "model.layers.49.block_sparse_moe.experts.253.w3", "model.layers.49.block_sparse_moe.experts.254.w3", "model.layers.49.block_sparse_moe.experts.255.w3", "model.layers.49.block_sparse_moe.experts.0.w2", "model.layers.49.block_sparse_moe.experts.1.w2", "model.layers.49.block_sparse_moe.experts.2.w2", "model.layers.49.block_sparse_moe.experts.3.w2", "model.layers.49.block_sparse_moe.experts.4.w2", "model.layers.49.block_sparse_moe.experts.5.w2", "model.layers.49.block_sparse_moe.experts.6.w2", "model.layers.49.block_sparse_moe.experts.7.w2", "model.layers.49.block_sparse_moe.experts.8.w2", "model.layers.49.block_sparse_moe.experts.9.w2", "model.layers.49.block_sparse_moe.experts.10.w2", "model.layers.49.block_sparse_moe.experts.11.w2", "model.layers.49.block_sparse_moe.experts.12.w2", "model.layers.49.block_sparse_moe.experts.13.w2", "model.layers.49.block_sparse_moe.experts.14.w2", "model.layers.49.block_sparse_moe.experts.15.w2", "model.layers.49.block_sparse_moe.experts.16.w2", "model.layers.49.block_sparse_moe.experts.17.w2", "model.layers.49.block_sparse_moe.experts.18.w2", "model.layers.49.block_sparse_moe.experts.19.w2", "model.layers.49.block_sparse_moe.experts.20.w2", "model.layers.49.block_sparse_moe.experts.21.w2", "model.layers.49.block_sparse_moe.experts.22.w2", "model.layers.49.block_sparse_moe.experts.23.w2", "model.layers.49.block_sparse_moe.experts.24.w2", "model.layers.49.block_sparse_moe.experts.25.w2", "model.layers.49.block_sparse_moe.experts.26.w2", "model.layers.49.block_sparse_moe.experts.27.w2", "model.layers.49.block_sparse_moe.experts.28.w2", "model.layers.49.block_sparse_moe.experts.29.w2", "model.layers.49.block_sparse_moe.experts.30.w2", "model.layers.49.block_sparse_moe.experts.31.w2", "model.layers.49.block_sparse_moe.experts.32.w2", "model.layers.49.block_sparse_moe.experts.33.w2", "model.layers.49.block_sparse_moe.experts.34.w2", "model.layers.49.block_sparse_moe.experts.35.w2", "model.layers.49.block_sparse_moe.experts.36.w2", "model.layers.49.block_sparse_moe.experts.37.w2", "model.layers.49.block_sparse_moe.experts.38.w2", "model.layers.49.block_sparse_moe.experts.39.w2", "model.layers.49.block_sparse_moe.experts.40.w2", "model.layers.49.block_sparse_moe.experts.41.w2", "model.layers.49.block_sparse_moe.experts.42.w2", "model.layers.49.block_sparse_moe.experts.43.w2", "model.layers.49.block_sparse_moe.experts.44.w2", "model.layers.49.block_sparse_moe.experts.45.w2", "model.layers.49.block_sparse_moe.experts.46.w2", "model.layers.49.block_sparse_moe.experts.47.w2", "model.layers.49.block_sparse_moe.experts.48.w2", "model.layers.49.block_sparse_moe.experts.49.w2", "model.layers.49.block_sparse_moe.experts.50.w2", "model.layers.49.block_sparse_moe.experts.51.w2", "model.layers.49.block_sparse_moe.experts.52.w2", "model.layers.49.block_sparse_moe.experts.53.w2", "model.layers.49.block_sparse_moe.experts.54.w2", "model.layers.49.block_sparse_moe.experts.55.w2", "model.layers.49.block_sparse_moe.experts.56.w2", "model.layers.49.block_sparse_moe.experts.57.w2", "model.layers.49.block_sparse_moe.experts.58.w2", "model.layers.49.block_sparse_moe.experts.59.w2", "model.layers.49.block_sparse_moe.experts.60.w2", "model.layers.49.block_sparse_moe.experts.61.w2", "model.layers.49.block_sparse_moe.experts.62.w2", "model.layers.49.block_sparse_moe.experts.63.w2", "model.layers.49.block_sparse_moe.experts.64.w2", "model.layers.49.block_sparse_moe.experts.65.w2", "model.layers.49.block_sparse_moe.experts.66.w2", "model.layers.49.block_sparse_moe.experts.67.w2", "model.layers.49.block_sparse_moe.experts.68.w2", "model.layers.49.block_sparse_moe.experts.69.w2", "model.layers.49.block_sparse_moe.experts.70.w2", "model.layers.49.block_sparse_moe.experts.71.w2", "model.layers.49.block_sparse_moe.experts.72.w2", "model.layers.49.block_sparse_moe.experts.73.w2", "model.layers.49.block_sparse_moe.experts.74.w2", "model.layers.49.block_sparse_moe.experts.75.w2", "model.layers.49.block_sparse_moe.experts.76.w2", "model.layers.49.block_sparse_moe.experts.77.w2", "model.layers.49.block_sparse_moe.experts.78.w2", "model.layers.49.block_sparse_moe.experts.79.w2", "model.layers.49.block_sparse_moe.experts.80.w2", "model.layers.49.block_sparse_moe.experts.81.w2", "model.layers.49.block_sparse_moe.experts.82.w2", "model.layers.49.block_sparse_moe.experts.83.w2", "model.layers.49.block_sparse_moe.experts.84.w2", "model.layers.49.block_sparse_moe.experts.85.w2", "model.layers.49.block_sparse_moe.experts.86.w2", "model.layers.49.block_sparse_moe.experts.87.w2", "model.layers.49.block_sparse_moe.experts.88.w2", "model.layers.49.block_sparse_moe.experts.89.w2", "model.layers.49.block_sparse_moe.experts.90.w2", "model.layers.49.block_sparse_moe.experts.91.w2", "model.layers.49.block_sparse_moe.experts.92.w2", "model.layers.49.block_sparse_moe.experts.93.w2", "model.layers.49.block_sparse_moe.experts.94.w2", "model.layers.49.block_sparse_moe.experts.95.w2", "model.layers.49.block_sparse_moe.experts.96.w2", "model.layers.49.block_sparse_moe.experts.97.w2", "model.layers.49.block_sparse_moe.experts.98.w2", "model.layers.49.block_sparse_moe.experts.99.w2", "model.layers.49.block_sparse_moe.experts.100.w2", "model.layers.49.block_sparse_moe.experts.101.w2", "model.layers.49.block_sparse_moe.experts.102.w2", "model.layers.49.block_sparse_moe.experts.103.w2", "model.layers.49.block_sparse_moe.experts.104.w2", "model.layers.49.block_sparse_moe.experts.105.w2", "model.layers.49.block_sparse_moe.experts.106.w2", "model.layers.49.block_sparse_moe.experts.107.w2", "model.layers.49.block_sparse_moe.experts.108.w2", "model.layers.49.block_sparse_moe.experts.109.w2", "model.layers.49.block_sparse_moe.experts.110.w2", "model.layers.49.block_sparse_moe.experts.111.w2", "model.layers.49.block_sparse_moe.experts.112.w2", "model.layers.49.block_sparse_moe.experts.113.w2", "model.layers.49.block_sparse_moe.experts.114.w2", "model.layers.49.block_sparse_moe.experts.115.w2", "model.layers.49.block_sparse_moe.experts.116.w2", "model.layers.49.block_sparse_moe.experts.117.w2", "model.layers.49.block_sparse_moe.experts.118.w2", "model.layers.49.block_sparse_moe.experts.119.w2", "model.layers.49.block_sparse_moe.experts.120.w2", "model.layers.49.block_sparse_moe.experts.121.w2", "model.layers.49.block_sparse_moe.experts.122.w2", "model.layers.49.block_sparse_moe.experts.123.w2", "model.layers.49.block_sparse_moe.experts.124.w2", "model.layers.49.block_sparse_moe.experts.125.w2", "model.layers.49.block_sparse_moe.experts.126.w2", "model.layers.49.block_sparse_moe.experts.127.w2", "model.layers.49.block_sparse_moe.experts.128.w2", "model.layers.49.block_sparse_moe.experts.129.w2", "model.layers.49.block_sparse_moe.experts.130.w2", "model.layers.49.block_sparse_moe.experts.131.w2", "model.layers.49.block_sparse_moe.experts.132.w2", "model.layers.49.block_sparse_moe.experts.133.w2", "model.layers.49.block_sparse_moe.experts.134.w2", "model.layers.49.block_sparse_moe.experts.135.w2", "model.layers.49.block_sparse_moe.experts.136.w2", "model.layers.49.block_sparse_moe.experts.137.w2", "model.layers.49.block_sparse_moe.experts.138.w2", "model.layers.49.block_sparse_moe.experts.139.w2", "model.layers.49.block_sparse_moe.experts.140.w2", "model.layers.49.block_sparse_moe.experts.141.w2", "model.layers.49.block_sparse_moe.experts.142.w2", "model.layers.49.block_sparse_moe.experts.143.w2", "model.layers.49.block_sparse_moe.experts.144.w2", "model.layers.49.block_sparse_moe.experts.145.w2", "model.layers.49.block_sparse_moe.experts.146.w2", "model.layers.49.block_sparse_moe.experts.147.w2", "model.layers.49.block_sparse_moe.experts.148.w2", "model.layers.49.block_sparse_moe.experts.149.w2", "model.layers.49.block_sparse_moe.experts.150.w2", "model.layers.49.block_sparse_moe.experts.151.w2", "model.layers.49.block_sparse_moe.experts.152.w2", "model.layers.49.block_sparse_moe.experts.153.w2", "model.layers.49.block_sparse_moe.experts.154.w2", "model.layers.49.block_sparse_moe.experts.155.w2", "model.layers.49.block_sparse_moe.experts.156.w2", "model.layers.49.block_sparse_moe.experts.157.w2", "model.layers.49.block_sparse_moe.experts.158.w2", "model.layers.49.block_sparse_moe.experts.159.w2", "model.layers.49.block_sparse_moe.experts.160.w2", "model.layers.49.block_sparse_moe.experts.161.w2", "model.layers.49.block_sparse_moe.experts.162.w2", "model.layers.49.block_sparse_moe.experts.163.w2", "model.layers.49.block_sparse_moe.experts.164.w2", "model.layers.49.block_sparse_moe.experts.165.w2", "model.layers.49.block_sparse_moe.experts.166.w2", "model.layers.49.block_sparse_moe.experts.167.w2", "model.layers.49.block_sparse_moe.experts.168.w2", "model.layers.49.block_sparse_moe.experts.169.w2", "model.layers.49.block_sparse_moe.experts.170.w2", "model.layers.49.block_sparse_moe.experts.171.w2", "model.layers.49.block_sparse_moe.experts.172.w2", "model.layers.49.block_sparse_moe.experts.173.w2", "model.layers.49.block_sparse_moe.experts.174.w2", "model.layers.49.block_sparse_moe.experts.175.w2", "model.layers.49.block_sparse_moe.experts.176.w2", "model.layers.49.block_sparse_moe.experts.177.w2", "model.layers.49.block_sparse_moe.experts.178.w2", "model.layers.49.block_sparse_moe.experts.179.w2", "model.layers.49.block_sparse_moe.experts.180.w2", "model.layers.49.block_sparse_moe.experts.181.w2", "model.layers.49.block_sparse_moe.experts.182.w2", "model.layers.49.block_sparse_moe.experts.183.w2", "model.layers.49.block_sparse_moe.experts.184.w2", "model.layers.49.block_sparse_moe.experts.185.w2", "model.layers.49.block_sparse_moe.experts.186.w2", "model.layers.49.block_sparse_moe.experts.187.w2", "model.layers.49.block_sparse_moe.experts.188.w2", "model.layers.49.block_sparse_moe.experts.189.w2", "model.layers.49.block_sparse_moe.experts.190.w2", "model.layers.49.block_sparse_moe.experts.191.w2", "model.layers.49.block_sparse_moe.experts.192.w2", "model.layers.49.block_sparse_moe.experts.193.w2", "model.layers.49.block_sparse_moe.experts.194.w2", "model.layers.49.block_sparse_moe.experts.195.w2", "model.layers.49.block_sparse_moe.experts.196.w2", "model.layers.49.block_sparse_moe.experts.197.w2", "model.layers.49.block_sparse_moe.experts.198.w2", "model.layers.49.block_sparse_moe.experts.199.w2", "model.layers.49.block_sparse_moe.experts.200.w2", "model.layers.49.block_sparse_moe.experts.201.w2", "model.layers.49.block_sparse_moe.experts.202.w2", "model.layers.49.block_sparse_moe.experts.203.w2", "model.layers.49.block_sparse_moe.experts.204.w2", "model.layers.49.block_sparse_moe.experts.205.w2", "model.layers.49.block_sparse_moe.experts.206.w2", "model.layers.49.block_sparse_moe.experts.207.w2", "model.layers.49.block_sparse_moe.experts.208.w2", "model.layers.49.block_sparse_moe.experts.209.w2", "model.layers.49.block_sparse_moe.experts.210.w2", "model.layers.49.block_sparse_moe.experts.211.w2", "model.layers.49.block_sparse_moe.experts.212.w2", "model.layers.49.block_sparse_moe.experts.213.w2", "model.layers.49.block_sparse_moe.experts.214.w2", "model.layers.49.block_sparse_moe.experts.215.w2", "model.layers.49.block_sparse_moe.experts.216.w2", "model.layers.49.block_sparse_moe.experts.217.w2", "model.layers.49.block_sparse_moe.experts.218.w2", "model.layers.49.block_sparse_moe.experts.219.w2", "model.layers.49.block_sparse_moe.experts.220.w2", "model.layers.49.block_sparse_moe.experts.221.w2", "model.layers.49.block_sparse_moe.experts.222.w2", "model.layers.49.block_sparse_moe.experts.223.w2", "model.layers.49.block_sparse_moe.experts.224.w2", "model.layers.49.block_sparse_moe.experts.225.w2", "model.layers.49.block_sparse_moe.experts.226.w2", "model.layers.49.block_sparse_moe.experts.227.w2", "model.layers.49.block_sparse_moe.experts.228.w2", "model.layers.49.block_sparse_moe.experts.229.w2", "model.layers.49.block_sparse_moe.experts.230.w2", "model.layers.49.block_sparse_moe.experts.231.w2", "model.layers.49.block_sparse_moe.experts.232.w2", "model.layers.49.block_sparse_moe.experts.233.w2", "model.layers.49.block_sparse_moe.experts.234.w2", "model.layers.49.block_sparse_moe.experts.235.w2", "model.layers.49.block_sparse_moe.experts.236.w2", "model.layers.49.block_sparse_moe.experts.237.w2", "model.layers.49.block_sparse_moe.experts.238.w2", "model.layers.49.block_sparse_moe.experts.239.w2", "model.layers.49.block_sparse_moe.experts.240.w2", "model.layers.49.block_sparse_moe.experts.241.w2", "model.layers.49.block_sparse_moe.experts.242.w2", "model.layers.49.block_sparse_moe.experts.243.w2", "model.layers.49.block_sparse_moe.experts.244.w2", "model.layers.49.block_sparse_moe.experts.245.w2", "model.layers.49.block_sparse_moe.experts.246.w2", "model.layers.49.block_sparse_moe.experts.247.w2", "model.layers.49.block_sparse_moe.experts.248.w2", "model.layers.49.block_sparse_moe.experts.249.w2", "model.layers.49.block_sparse_moe.experts.250.w2", "model.layers.49.block_sparse_moe.experts.251.w2", "model.layers.49.block_sparse_moe.experts.252.w2", "model.layers.49.block_sparse_moe.experts.253.w2", "model.layers.49.block_sparse_moe.experts.254.w2", "model.layers.49.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0005544606596231128, "dbits": 3623878656 } ] }, { "idx": 100, "layers": [ "model.layers.50.self_attn.q_proj", "model.layers.50.self_attn.k_proj", "model.layers.50.self_attn.v_proj", "model.layers.50.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0014968797564506309, "dbits": 44040192 } ] }, { "idx": 101, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w1", "model.layers.50.block_sparse_moe.experts.1.w1", "model.layers.50.block_sparse_moe.experts.2.w1", "model.layers.50.block_sparse_moe.experts.3.w1", "model.layers.50.block_sparse_moe.experts.4.w1", "model.layers.50.block_sparse_moe.experts.5.w1", "model.layers.50.block_sparse_moe.experts.6.w1", "model.layers.50.block_sparse_moe.experts.7.w1", "model.layers.50.block_sparse_moe.experts.8.w1", "model.layers.50.block_sparse_moe.experts.9.w1", "model.layers.50.block_sparse_moe.experts.10.w1", "model.layers.50.block_sparse_moe.experts.11.w1", "model.layers.50.block_sparse_moe.experts.12.w1", "model.layers.50.block_sparse_moe.experts.13.w1", "model.layers.50.block_sparse_moe.experts.14.w1", "model.layers.50.block_sparse_moe.experts.15.w1", "model.layers.50.block_sparse_moe.experts.16.w1", "model.layers.50.block_sparse_moe.experts.17.w1", "model.layers.50.block_sparse_moe.experts.18.w1", "model.layers.50.block_sparse_moe.experts.19.w1", "model.layers.50.block_sparse_moe.experts.20.w1", "model.layers.50.block_sparse_moe.experts.21.w1", "model.layers.50.block_sparse_moe.experts.22.w1", "model.layers.50.block_sparse_moe.experts.23.w1", "model.layers.50.block_sparse_moe.experts.24.w1", "model.layers.50.block_sparse_moe.experts.25.w1", "model.layers.50.block_sparse_moe.experts.26.w1", "model.layers.50.block_sparse_moe.experts.27.w1", "model.layers.50.block_sparse_moe.experts.28.w1", "model.layers.50.block_sparse_moe.experts.29.w1", "model.layers.50.block_sparse_moe.experts.30.w1", "model.layers.50.block_sparse_moe.experts.31.w1", "model.layers.50.block_sparse_moe.experts.32.w1", "model.layers.50.block_sparse_moe.experts.33.w1", "model.layers.50.block_sparse_moe.experts.34.w1", "model.layers.50.block_sparse_moe.experts.35.w1", "model.layers.50.block_sparse_moe.experts.36.w1", "model.layers.50.block_sparse_moe.experts.37.w1", "model.layers.50.block_sparse_moe.experts.38.w1", "model.layers.50.block_sparse_moe.experts.39.w1", "model.layers.50.block_sparse_moe.experts.40.w1", "model.layers.50.block_sparse_moe.experts.41.w1", "model.layers.50.block_sparse_moe.experts.42.w1", "model.layers.50.block_sparse_moe.experts.43.w1", "model.layers.50.block_sparse_moe.experts.44.w1", "model.layers.50.block_sparse_moe.experts.45.w1", "model.layers.50.block_sparse_moe.experts.46.w1", "model.layers.50.block_sparse_moe.experts.47.w1", "model.layers.50.block_sparse_moe.experts.48.w1", "model.layers.50.block_sparse_moe.experts.49.w1", "model.layers.50.block_sparse_moe.experts.50.w1", "model.layers.50.block_sparse_moe.experts.51.w1", "model.layers.50.block_sparse_moe.experts.52.w1", "model.layers.50.block_sparse_moe.experts.53.w1", "model.layers.50.block_sparse_moe.experts.54.w1", "model.layers.50.block_sparse_moe.experts.55.w1", "model.layers.50.block_sparse_moe.experts.56.w1", "model.layers.50.block_sparse_moe.experts.57.w1", "model.layers.50.block_sparse_moe.experts.58.w1", "model.layers.50.block_sparse_moe.experts.59.w1", "model.layers.50.block_sparse_moe.experts.60.w1", "model.layers.50.block_sparse_moe.experts.61.w1", "model.layers.50.block_sparse_moe.experts.62.w1", "model.layers.50.block_sparse_moe.experts.63.w1", "model.layers.50.block_sparse_moe.experts.64.w1", "model.layers.50.block_sparse_moe.experts.65.w1", "model.layers.50.block_sparse_moe.experts.66.w1", "model.layers.50.block_sparse_moe.experts.67.w1", "model.layers.50.block_sparse_moe.experts.68.w1", "model.layers.50.block_sparse_moe.experts.69.w1", "model.layers.50.block_sparse_moe.experts.70.w1", "model.layers.50.block_sparse_moe.experts.71.w1", "model.layers.50.block_sparse_moe.experts.72.w1", "model.layers.50.block_sparse_moe.experts.73.w1", "model.layers.50.block_sparse_moe.experts.74.w1", "model.layers.50.block_sparse_moe.experts.75.w1", "model.layers.50.block_sparse_moe.experts.76.w1", "model.layers.50.block_sparse_moe.experts.77.w1", "model.layers.50.block_sparse_moe.experts.78.w1", "model.layers.50.block_sparse_moe.experts.79.w1", "model.layers.50.block_sparse_moe.experts.80.w1", "model.layers.50.block_sparse_moe.experts.81.w1", "model.layers.50.block_sparse_moe.experts.82.w1", "model.layers.50.block_sparse_moe.experts.83.w1", "model.layers.50.block_sparse_moe.experts.84.w1", "model.layers.50.block_sparse_moe.experts.85.w1", "model.layers.50.block_sparse_moe.experts.86.w1", "model.layers.50.block_sparse_moe.experts.87.w1", "model.layers.50.block_sparse_moe.experts.88.w1", "model.layers.50.block_sparse_moe.experts.89.w1", "model.layers.50.block_sparse_moe.experts.90.w1", "model.layers.50.block_sparse_moe.experts.91.w1", "model.layers.50.block_sparse_moe.experts.92.w1", "model.layers.50.block_sparse_moe.experts.93.w1", "model.layers.50.block_sparse_moe.experts.94.w1", "model.layers.50.block_sparse_moe.experts.95.w1", "model.layers.50.block_sparse_moe.experts.96.w1", "model.layers.50.block_sparse_moe.experts.97.w1", "model.layers.50.block_sparse_moe.experts.98.w1", "model.layers.50.block_sparse_moe.experts.99.w1", "model.layers.50.block_sparse_moe.experts.100.w1", "model.layers.50.block_sparse_moe.experts.101.w1", "model.layers.50.block_sparse_moe.experts.102.w1", "model.layers.50.block_sparse_moe.experts.103.w1", "model.layers.50.block_sparse_moe.experts.104.w1", "model.layers.50.block_sparse_moe.experts.105.w1", "model.layers.50.block_sparse_moe.experts.106.w1", "model.layers.50.block_sparse_moe.experts.107.w1", "model.layers.50.block_sparse_moe.experts.108.w1", "model.layers.50.block_sparse_moe.experts.109.w1", "model.layers.50.block_sparse_moe.experts.110.w1", "model.layers.50.block_sparse_moe.experts.111.w1", "model.layers.50.block_sparse_moe.experts.112.w1", "model.layers.50.block_sparse_moe.experts.113.w1", "model.layers.50.block_sparse_moe.experts.114.w1", "model.layers.50.block_sparse_moe.experts.115.w1", "model.layers.50.block_sparse_moe.experts.116.w1", "model.layers.50.block_sparse_moe.experts.117.w1", "model.layers.50.block_sparse_moe.experts.118.w1", "model.layers.50.block_sparse_moe.experts.119.w1", "model.layers.50.block_sparse_moe.experts.120.w1", "model.layers.50.block_sparse_moe.experts.121.w1", "model.layers.50.block_sparse_moe.experts.122.w1", "model.layers.50.block_sparse_moe.experts.123.w1", "model.layers.50.block_sparse_moe.experts.124.w1", "model.layers.50.block_sparse_moe.experts.125.w1", "model.layers.50.block_sparse_moe.experts.126.w1", "model.layers.50.block_sparse_moe.experts.127.w1", "model.layers.50.block_sparse_moe.experts.128.w1", "model.layers.50.block_sparse_moe.experts.129.w1", "model.layers.50.block_sparse_moe.experts.130.w1", "model.layers.50.block_sparse_moe.experts.131.w1", "model.layers.50.block_sparse_moe.experts.132.w1", "model.layers.50.block_sparse_moe.experts.133.w1", "model.layers.50.block_sparse_moe.experts.134.w1", "model.layers.50.block_sparse_moe.experts.135.w1", "model.layers.50.block_sparse_moe.experts.136.w1", "model.layers.50.block_sparse_moe.experts.137.w1", "model.layers.50.block_sparse_moe.experts.138.w1", "model.layers.50.block_sparse_moe.experts.139.w1", "model.layers.50.block_sparse_moe.experts.140.w1", "model.layers.50.block_sparse_moe.experts.141.w1", "model.layers.50.block_sparse_moe.experts.142.w1", "model.layers.50.block_sparse_moe.experts.143.w1", "model.layers.50.block_sparse_moe.experts.144.w1", "model.layers.50.block_sparse_moe.experts.145.w1", "model.layers.50.block_sparse_moe.experts.146.w1", "model.layers.50.block_sparse_moe.experts.147.w1", "model.layers.50.block_sparse_moe.experts.148.w1", "model.layers.50.block_sparse_moe.experts.149.w1", "model.layers.50.block_sparse_moe.experts.150.w1", "model.layers.50.block_sparse_moe.experts.151.w1", "model.layers.50.block_sparse_moe.experts.152.w1", "model.layers.50.block_sparse_moe.experts.153.w1", "model.layers.50.block_sparse_moe.experts.154.w1", "model.layers.50.block_sparse_moe.experts.155.w1", "model.layers.50.block_sparse_moe.experts.156.w1", "model.layers.50.block_sparse_moe.experts.157.w1", "model.layers.50.block_sparse_moe.experts.158.w1", "model.layers.50.block_sparse_moe.experts.159.w1", "model.layers.50.block_sparse_moe.experts.160.w1", "model.layers.50.block_sparse_moe.experts.161.w1", "model.layers.50.block_sparse_moe.experts.162.w1", "model.layers.50.block_sparse_moe.experts.163.w1", "model.layers.50.block_sparse_moe.experts.164.w1", "model.layers.50.block_sparse_moe.experts.165.w1", "model.layers.50.block_sparse_moe.experts.166.w1", "model.layers.50.block_sparse_moe.experts.167.w1", "model.layers.50.block_sparse_moe.experts.168.w1", "model.layers.50.block_sparse_moe.experts.169.w1", "model.layers.50.block_sparse_moe.experts.170.w1", "model.layers.50.block_sparse_moe.experts.171.w1", "model.layers.50.block_sparse_moe.experts.172.w1", "model.layers.50.block_sparse_moe.experts.173.w1", "model.layers.50.block_sparse_moe.experts.174.w1", "model.layers.50.block_sparse_moe.experts.175.w1", "model.layers.50.block_sparse_moe.experts.176.w1", "model.layers.50.block_sparse_moe.experts.177.w1", "model.layers.50.block_sparse_moe.experts.178.w1", "model.layers.50.block_sparse_moe.experts.179.w1", "model.layers.50.block_sparse_moe.experts.180.w1", "model.layers.50.block_sparse_moe.experts.181.w1", "model.layers.50.block_sparse_moe.experts.182.w1", "model.layers.50.block_sparse_moe.experts.183.w1", "model.layers.50.block_sparse_moe.experts.184.w1", "model.layers.50.block_sparse_moe.experts.185.w1", "model.layers.50.block_sparse_moe.experts.186.w1", "model.layers.50.block_sparse_moe.experts.187.w1", "model.layers.50.block_sparse_moe.experts.188.w1", "model.layers.50.block_sparse_moe.experts.189.w1", "model.layers.50.block_sparse_moe.experts.190.w1", "model.layers.50.block_sparse_moe.experts.191.w1", "model.layers.50.block_sparse_moe.experts.192.w1", "model.layers.50.block_sparse_moe.experts.193.w1", "model.layers.50.block_sparse_moe.experts.194.w1", "model.layers.50.block_sparse_moe.experts.195.w1", "model.layers.50.block_sparse_moe.experts.196.w1", "model.layers.50.block_sparse_moe.experts.197.w1", "model.layers.50.block_sparse_moe.experts.198.w1", "model.layers.50.block_sparse_moe.experts.199.w1", "model.layers.50.block_sparse_moe.experts.200.w1", "model.layers.50.block_sparse_moe.experts.201.w1", "model.layers.50.block_sparse_moe.experts.202.w1", "model.layers.50.block_sparse_moe.experts.203.w1", "model.layers.50.block_sparse_moe.experts.204.w1", "model.layers.50.block_sparse_moe.experts.205.w1", "model.layers.50.block_sparse_moe.experts.206.w1", "model.layers.50.block_sparse_moe.experts.207.w1", "model.layers.50.block_sparse_moe.experts.208.w1", "model.layers.50.block_sparse_moe.experts.209.w1", "model.layers.50.block_sparse_moe.experts.210.w1", "model.layers.50.block_sparse_moe.experts.211.w1", "model.layers.50.block_sparse_moe.experts.212.w1", "model.layers.50.block_sparse_moe.experts.213.w1", "model.layers.50.block_sparse_moe.experts.214.w1", "model.layers.50.block_sparse_moe.experts.215.w1", "model.layers.50.block_sparse_moe.experts.216.w1", "model.layers.50.block_sparse_moe.experts.217.w1", "model.layers.50.block_sparse_moe.experts.218.w1", "model.layers.50.block_sparse_moe.experts.219.w1", "model.layers.50.block_sparse_moe.experts.220.w1", "model.layers.50.block_sparse_moe.experts.221.w1", "model.layers.50.block_sparse_moe.experts.222.w1", "model.layers.50.block_sparse_moe.experts.223.w1", "model.layers.50.block_sparse_moe.experts.224.w1", "model.layers.50.block_sparse_moe.experts.225.w1", "model.layers.50.block_sparse_moe.experts.226.w1", "model.layers.50.block_sparse_moe.experts.227.w1", "model.layers.50.block_sparse_moe.experts.228.w1", "model.layers.50.block_sparse_moe.experts.229.w1", "model.layers.50.block_sparse_moe.experts.230.w1", "model.layers.50.block_sparse_moe.experts.231.w1", "model.layers.50.block_sparse_moe.experts.232.w1", "model.layers.50.block_sparse_moe.experts.233.w1", "model.layers.50.block_sparse_moe.experts.234.w1", "model.layers.50.block_sparse_moe.experts.235.w1", "model.layers.50.block_sparse_moe.experts.236.w1", "model.layers.50.block_sparse_moe.experts.237.w1", "model.layers.50.block_sparse_moe.experts.238.w1", "model.layers.50.block_sparse_moe.experts.239.w1", "model.layers.50.block_sparse_moe.experts.240.w1", "model.layers.50.block_sparse_moe.experts.241.w1", "model.layers.50.block_sparse_moe.experts.242.w1", "model.layers.50.block_sparse_moe.experts.243.w1", "model.layers.50.block_sparse_moe.experts.244.w1", "model.layers.50.block_sparse_moe.experts.245.w1", "model.layers.50.block_sparse_moe.experts.246.w1", "model.layers.50.block_sparse_moe.experts.247.w1", "model.layers.50.block_sparse_moe.experts.248.w1", "model.layers.50.block_sparse_moe.experts.249.w1", "model.layers.50.block_sparse_moe.experts.250.w1", "model.layers.50.block_sparse_moe.experts.251.w1", "model.layers.50.block_sparse_moe.experts.252.w1", "model.layers.50.block_sparse_moe.experts.253.w1", "model.layers.50.block_sparse_moe.experts.254.w1", "model.layers.50.block_sparse_moe.experts.255.w1", "model.layers.50.block_sparse_moe.experts.0.w3", "model.layers.50.block_sparse_moe.experts.1.w3", "model.layers.50.block_sparse_moe.experts.2.w3", "model.layers.50.block_sparse_moe.experts.3.w3", "model.layers.50.block_sparse_moe.experts.4.w3", "model.layers.50.block_sparse_moe.experts.5.w3", "model.layers.50.block_sparse_moe.experts.6.w3", "model.layers.50.block_sparse_moe.experts.7.w3", "model.layers.50.block_sparse_moe.experts.8.w3", "model.layers.50.block_sparse_moe.experts.9.w3", "model.layers.50.block_sparse_moe.experts.10.w3", "model.layers.50.block_sparse_moe.experts.11.w3", "model.layers.50.block_sparse_moe.experts.12.w3", "model.layers.50.block_sparse_moe.experts.13.w3", "model.layers.50.block_sparse_moe.experts.14.w3", "model.layers.50.block_sparse_moe.experts.15.w3", "model.layers.50.block_sparse_moe.experts.16.w3", "model.layers.50.block_sparse_moe.experts.17.w3", "model.layers.50.block_sparse_moe.experts.18.w3", "model.layers.50.block_sparse_moe.experts.19.w3", "model.layers.50.block_sparse_moe.experts.20.w3", "model.layers.50.block_sparse_moe.experts.21.w3", "model.layers.50.block_sparse_moe.experts.22.w3", "model.layers.50.block_sparse_moe.experts.23.w3", "model.layers.50.block_sparse_moe.experts.24.w3", "model.layers.50.block_sparse_moe.experts.25.w3", "model.layers.50.block_sparse_moe.experts.26.w3", "model.layers.50.block_sparse_moe.experts.27.w3", "model.layers.50.block_sparse_moe.experts.28.w3", "model.layers.50.block_sparse_moe.experts.29.w3", "model.layers.50.block_sparse_moe.experts.30.w3", "model.layers.50.block_sparse_moe.experts.31.w3", "model.layers.50.block_sparse_moe.experts.32.w3", "model.layers.50.block_sparse_moe.experts.33.w3", "model.layers.50.block_sparse_moe.experts.34.w3", "model.layers.50.block_sparse_moe.experts.35.w3", "model.layers.50.block_sparse_moe.experts.36.w3", "model.layers.50.block_sparse_moe.experts.37.w3", "model.layers.50.block_sparse_moe.experts.38.w3", "model.layers.50.block_sparse_moe.experts.39.w3", "model.layers.50.block_sparse_moe.experts.40.w3", "model.layers.50.block_sparse_moe.experts.41.w3", "model.layers.50.block_sparse_moe.experts.42.w3", "model.layers.50.block_sparse_moe.experts.43.w3", "model.layers.50.block_sparse_moe.experts.44.w3", "model.layers.50.block_sparse_moe.experts.45.w3", "model.layers.50.block_sparse_moe.experts.46.w3", "model.layers.50.block_sparse_moe.experts.47.w3", "model.layers.50.block_sparse_moe.experts.48.w3", "model.layers.50.block_sparse_moe.experts.49.w3", "model.layers.50.block_sparse_moe.experts.50.w3", "model.layers.50.block_sparse_moe.experts.51.w3", "model.layers.50.block_sparse_moe.experts.52.w3", "model.layers.50.block_sparse_moe.experts.53.w3", "model.layers.50.block_sparse_moe.experts.54.w3", "model.layers.50.block_sparse_moe.experts.55.w3", "model.layers.50.block_sparse_moe.experts.56.w3", "model.layers.50.block_sparse_moe.experts.57.w3", "model.layers.50.block_sparse_moe.experts.58.w3", "model.layers.50.block_sparse_moe.experts.59.w3", "model.layers.50.block_sparse_moe.experts.60.w3", "model.layers.50.block_sparse_moe.experts.61.w3", "model.layers.50.block_sparse_moe.experts.62.w3", "model.layers.50.block_sparse_moe.experts.63.w3", "model.layers.50.block_sparse_moe.experts.64.w3", "model.layers.50.block_sparse_moe.experts.65.w3", "model.layers.50.block_sparse_moe.experts.66.w3", "model.layers.50.block_sparse_moe.experts.67.w3", "model.layers.50.block_sparse_moe.experts.68.w3", "model.layers.50.block_sparse_moe.experts.69.w3", "model.layers.50.block_sparse_moe.experts.70.w3", "model.layers.50.block_sparse_moe.experts.71.w3", "model.layers.50.block_sparse_moe.experts.72.w3", "model.layers.50.block_sparse_moe.experts.73.w3", "model.layers.50.block_sparse_moe.experts.74.w3", "model.layers.50.block_sparse_moe.experts.75.w3", "model.layers.50.block_sparse_moe.experts.76.w3", "model.layers.50.block_sparse_moe.experts.77.w3", "model.layers.50.block_sparse_moe.experts.78.w3", "model.layers.50.block_sparse_moe.experts.79.w3", "model.layers.50.block_sparse_moe.experts.80.w3", "model.layers.50.block_sparse_moe.experts.81.w3", "model.layers.50.block_sparse_moe.experts.82.w3", "model.layers.50.block_sparse_moe.experts.83.w3", "model.layers.50.block_sparse_moe.experts.84.w3", "model.layers.50.block_sparse_moe.experts.85.w3", "model.layers.50.block_sparse_moe.experts.86.w3", "model.layers.50.block_sparse_moe.experts.87.w3", "model.layers.50.block_sparse_moe.experts.88.w3", "model.layers.50.block_sparse_moe.experts.89.w3", "model.layers.50.block_sparse_moe.experts.90.w3", "model.layers.50.block_sparse_moe.experts.91.w3", "model.layers.50.block_sparse_moe.experts.92.w3", "model.layers.50.block_sparse_moe.experts.93.w3", "model.layers.50.block_sparse_moe.experts.94.w3", "model.layers.50.block_sparse_moe.experts.95.w3", "model.layers.50.block_sparse_moe.experts.96.w3", "model.layers.50.block_sparse_moe.experts.97.w3", "model.layers.50.block_sparse_moe.experts.98.w3", "model.layers.50.block_sparse_moe.experts.99.w3", "model.layers.50.block_sparse_moe.experts.100.w3", "model.layers.50.block_sparse_moe.experts.101.w3", "model.layers.50.block_sparse_moe.experts.102.w3", "model.layers.50.block_sparse_moe.experts.103.w3", "model.layers.50.block_sparse_moe.experts.104.w3", "model.layers.50.block_sparse_moe.experts.105.w3", "model.layers.50.block_sparse_moe.experts.106.w3", "model.layers.50.block_sparse_moe.experts.107.w3", "model.layers.50.block_sparse_moe.experts.108.w3", "model.layers.50.block_sparse_moe.experts.109.w3", "model.layers.50.block_sparse_moe.experts.110.w3", "model.layers.50.block_sparse_moe.experts.111.w3", "model.layers.50.block_sparse_moe.experts.112.w3", "model.layers.50.block_sparse_moe.experts.113.w3", "model.layers.50.block_sparse_moe.experts.114.w3", "model.layers.50.block_sparse_moe.experts.115.w3", "model.layers.50.block_sparse_moe.experts.116.w3", "model.layers.50.block_sparse_moe.experts.117.w3", "model.layers.50.block_sparse_moe.experts.118.w3", "model.layers.50.block_sparse_moe.experts.119.w3", "model.layers.50.block_sparse_moe.experts.120.w3", "model.layers.50.block_sparse_moe.experts.121.w3", "model.layers.50.block_sparse_moe.experts.122.w3", "model.layers.50.block_sparse_moe.experts.123.w3", "model.layers.50.block_sparse_moe.experts.124.w3", "model.layers.50.block_sparse_moe.experts.125.w3", "model.layers.50.block_sparse_moe.experts.126.w3", "model.layers.50.block_sparse_moe.experts.127.w3", "model.layers.50.block_sparse_moe.experts.128.w3", "model.layers.50.block_sparse_moe.experts.129.w3", "model.layers.50.block_sparse_moe.experts.130.w3", "model.layers.50.block_sparse_moe.experts.131.w3", "model.layers.50.block_sparse_moe.experts.132.w3", "model.layers.50.block_sparse_moe.experts.133.w3", "model.layers.50.block_sparse_moe.experts.134.w3", "model.layers.50.block_sparse_moe.experts.135.w3", "model.layers.50.block_sparse_moe.experts.136.w3", "model.layers.50.block_sparse_moe.experts.137.w3", "model.layers.50.block_sparse_moe.experts.138.w3", "model.layers.50.block_sparse_moe.experts.139.w3", "model.layers.50.block_sparse_moe.experts.140.w3", "model.layers.50.block_sparse_moe.experts.141.w3", "model.layers.50.block_sparse_moe.experts.142.w3", "model.layers.50.block_sparse_moe.experts.143.w3", "model.layers.50.block_sparse_moe.experts.144.w3", "model.layers.50.block_sparse_moe.experts.145.w3", "model.layers.50.block_sparse_moe.experts.146.w3", "model.layers.50.block_sparse_moe.experts.147.w3", "model.layers.50.block_sparse_moe.experts.148.w3", "model.layers.50.block_sparse_moe.experts.149.w3", "model.layers.50.block_sparse_moe.experts.150.w3", "model.layers.50.block_sparse_moe.experts.151.w3", "model.layers.50.block_sparse_moe.experts.152.w3", "model.layers.50.block_sparse_moe.experts.153.w3", "model.layers.50.block_sparse_moe.experts.154.w3", "model.layers.50.block_sparse_moe.experts.155.w3", "model.layers.50.block_sparse_moe.experts.156.w3", "model.layers.50.block_sparse_moe.experts.157.w3", "model.layers.50.block_sparse_moe.experts.158.w3", "model.layers.50.block_sparse_moe.experts.159.w3", "model.layers.50.block_sparse_moe.experts.160.w3", "model.layers.50.block_sparse_moe.experts.161.w3", "model.layers.50.block_sparse_moe.experts.162.w3", "model.layers.50.block_sparse_moe.experts.163.w3", "model.layers.50.block_sparse_moe.experts.164.w3", "model.layers.50.block_sparse_moe.experts.165.w3", "model.layers.50.block_sparse_moe.experts.166.w3", "model.layers.50.block_sparse_moe.experts.167.w3", "model.layers.50.block_sparse_moe.experts.168.w3", "model.layers.50.block_sparse_moe.experts.169.w3", "model.layers.50.block_sparse_moe.experts.170.w3", "model.layers.50.block_sparse_moe.experts.171.w3", "model.layers.50.block_sparse_moe.experts.172.w3", "model.layers.50.block_sparse_moe.experts.173.w3", "model.layers.50.block_sparse_moe.experts.174.w3", "model.layers.50.block_sparse_moe.experts.175.w3", "model.layers.50.block_sparse_moe.experts.176.w3", "model.layers.50.block_sparse_moe.experts.177.w3", "model.layers.50.block_sparse_moe.experts.178.w3", "model.layers.50.block_sparse_moe.experts.179.w3", "model.layers.50.block_sparse_moe.experts.180.w3", "model.layers.50.block_sparse_moe.experts.181.w3", "model.layers.50.block_sparse_moe.experts.182.w3", "model.layers.50.block_sparse_moe.experts.183.w3", "model.layers.50.block_sparse_moe.experts.184.w3", "model.layers.50.block_sparse_moe.experts.185.w3", "model.layers.50.block_sparse_moe.experts.186.w3", "model.layers.50.block_sparse_moe.experts.187.w3", "model.layers.50.block_sparse_moe.experts.188.w3", "model.layers.50.block_sparse_moe.experts.189.w3", "model.layers.50.block_sparse_moe.experts.190.w3", "model.layers.50.block_sparse_moe.experts.191.w3", "model.layers.50.block_sparse_moe.experts.192.w3", "model.layers.50.block_sparse_moe.experts.193.w3", "model.layers.50.block_sparse_moe.experts.194.w3", "model.layers.50.block_sparse_moe.experts.195.w3", "model.layers.50.block_sparse_moe.experts.196.w3", "model.layers.50.block_sparse_moe.experts.197.w3", "model.layers.50.block_sparse_moe.experts.198.w3", "model.layers.50.block_sparse_moe.experts.199.w3", "model.layers.50.block_sparse_moe.experts.200.w3", "model.layers.50.block_sparse_moe.experts.201.w3", "model.layers.50.block_sparse_moe.experts.202.w3", "model.layers.50.block_sparse_moe.experts.203.w3", "model.layers.50.block_sparse_moe.experts.204.w3", "model.layers.50.block_sparse_moe.experts.205.w3", "model.layers.50.block_sparse_moe.experts.206.w3", "model.layers.50.block_sparse_moe.experts.207.w3", "model.layers.50.block_sparse_moe.experts.208.w3", "model.layers.50.block_sparse_moe.experts.209.w3", "model.layers.50.block_sparse_moe.experts.210.w3", "model.layers.50.block_sparse_moe.experts.211.w3", "model.layers.50.block_sparse_moe.experts.212.w3", "model.layers.50.block_sparse_moe.experts.213.w3", "model.layers.50.block_sparse_moe.experts.214.w3", "model.layers.50.block_sparse_moe.experts.215.w3", "model.layers.50.block_sparse_moe.experts.216.w3", "model.layers.50.block_sparse_moe.experts.217.w3", "model.layers.50.block_sparse_moe.experts.218.w3", "model.layers.50.block_sparse_moe.experts.219.w3", "model.layers.50.block_sparse_moe.experts.220.w3", "model.layers.50.block_sparse_moe.experts.221.w3", "model.layers.50.block_sparse_moe.experts.222.w3", "model.layers.50.block_sparse_moe.experts.223.w3", "model.layers.50.block_sparse_moe.experts.224.w3", "model.layers.50.block_sparse_moe.experts.225.w3", "model.layers.50.block_sparse_moe.experts.226.w3", "model.layers.50.block_sparse_moe.experts.227.w3", "model.layers.50.block_sparse_moe.experts.228.w3", "model.layers.50.block_sparse_moe.experts.229.w3", "model.layers.50.block_sparse_moe.experts.230.w3", "model.layers.50.block_sparse_moe.experts.231.w3", "model.layers.50.block_sparse_moe.experts.232.w3", "model.layers.50.block_sparse_moe.experts.233.w3", "model.layers.50.block_sparse_moe.experts.234.w3", "model.layers.50.block_sparse_moe.experts.235.w3", "model.layers.50.block_sparse_moe.experts.236.w3", "model.layers.50.block_sparse_moe.experts.237.w3", "model.layers.50.block_sparse_moe.experts.238.w3", "model.layers.50.block_sparse_moe.experts.239.w3", "model.layers.50.block_sparse_moe.experts.240.w3", "model.layers.50.block_sparse_moe.experts.241.w3", "model.layers.50.block_sparse_moe.experts.242.w3", "model.layers.50.block_sparse_moe.experts.243.w3", "model.layers.50.block_sparse_moe.experts.244.w3", "model.layers.50.block_sparse_moe.experts.245.w3", "model.layers.50.block_sparse_moe.experts.246.w3", "model.layers.50.block_sparse_moe.experts.247.w3", "model.layers.50.block_sparse_moe.experts.248.w3", "model.layers.50.block_sparse_moe.experts.249.w3", "model.layers.50.block_sparse_moe.experts.250.w3", "model.layers.50.block_sparse_moe.experts.251.w3", "model.layers.50.block_sparse_moe.experts.252.w3", "model.layers.50.block_sparse_moe.experts.253.w3", "model.layers.50.block_sparse_moe.experts.254.w3", "model.layers.50.block_sparse_moe.experts.255.w3", "model.layers.50.block_sparse_moe.experts.0.w2", "model.layers.50.block_sparse_moe.experts.1.w2", "model.layers.50.block_sparse_moe.experts.2.w2", "model.layers.50.block_sparse_moe.experts.3.w2", "model.layers.50.block_sparse_moe.experts.4.w2", "model.layers.50.block_sparse_moe.experts.5.w2", "model.layers.50.block_sparse_moe.experts.6.w2", "model.layers.50.block_sparse_moe.experts.7.w2", "model.layers.50.block_sparse_moe.experts.8.w2", "model.layers.50.block_sparse_moe.experts.9.w2", "model.layers.50.block_sparse_moe.experts.10.w2", "model.layers.50.block_sparse_moe.experts.11.w2", "model.layers.50.block_sparse_moe.experts.12.w2", "model.layers.50.block_sparse_moe.experts.13.w2", "model.layers.50.block_sparse_moe.experts.14.w2", "model.layers.50.block_sparse_moe.experts.15.w2", "model.layers.50.block_sparse_moe.experts.16.w2", "model.layers.50.block_sparse_moe.experts.17.w2", "model.layers.50.block_sparse_moe.experts.18.w2", "model.layers.50.block_sparse_moe.experts.19.w2", "model.layers.50.block_sparse_moe.experts.20.w2", "model.layers.50.block_sparse_moe.experts.21.w2", "model.layers.50.block_sparse_moe.experts.22.w2", "model.layers.50.block_sparse_moe.experts.23.w2", "model.layers.50.block_sparse_moe.experts.24.w2", "model.layers.50.block_sparse_moe.experts.25.w2", "model.layers.50.block_sparse_moe.experts.26.w2", "model.layers.50.block_sparse_moe.experts.27.w2", "model.layers.50.block_sparse_moe.experts.28.w2", "model.layers.50.block_sparse_moe.experts.29.w2", "model.layers.50.block_sparse_moe.experts.30.w2", "model.layers.50.block_sparse_moe.experts.31.w2", "model.layers.50.block_sparse_moe.experts.32.w2", "model.layers.50.block_sparse_moe.experts.33.w2", "model.layers.50.block_sparse_moe.experts.34.w2", "model.layers.50.block_sparse_moe.experts.35.w2", "model.layers.50.block_sparse_moe.experts.36.w2", "model.layers.50.block_sparse_moe.experts.37.w2", "model.layers.50.block_sparse_moe.experts.38.w2", "model.layers.50.block_sparse_moe.experts.39.w2", "model.layers.50.block_sparse_moe.experts.40.w2", "model.layers.50.block_sparse_moe.experts.41.w2", "model.layers.50.block_sparse_moe.experts.42.w2", "model.layers.50.block_sparse_moe.experts.43.w2", "model.layers.50.block_sparse_moe.experts.44.w2", "model.layers.50.block_sparse_moe.experts.45.w2", "model.layers.50.block_sparse_moe.experts.46.w2", "model.layers.50.block_sparse_moe.experts.47.w2", "model.layers.50.block_sparse_moe.experts.48.w2", "model.layers.50.block_sparse_moe.experts.49.w2", "model.layers.50.block_sparse_moe.experts.50.w2", "model.layers.50.block_sparse_moe.experts.51.w2", "model.layers.50.block_sparse_moe.experts.52.w2", "model.layers.50.block_sparse_moe.experts.53.w2", "model.layers.50.block_sparse_moe.experts.54.w2", "model.layers.50.block_sparse_moe.experts.55.w2", "model.layers.50.block_sparse_moe.experts.56.w2", "model.layers.50.block_sparse_moe.experts.57.w2", "model.layers.50.block_sparse_moe.experts.58.w2", "model.layers.50.block_sparse_moe.experts.59.w2", "model.layers.50.block_sparse_moe.experts.60.w2", "model.layers.50.block_sparse_moe.experts.61.w2", "model.layers.50.block_sparse_moe.experts.62.w2", "model.layers.50.block_sparse_moe.experts.63.w2", "model.layers.50.block_sparse_moe.experts.64.w2", "model.layers.50.block_sparse_moe.experts.65.w2", "model.layers.50.block_sparse_moe.experts.66.w2", "model.layers.50.block_sparse_moe.experts.67.w2", "model.layers.50.block_sparse_moe.experts.68.w2", "model.layers.50.block_sparse_moe.experts.69.w2", "model.layers.50.block_sparse_moe.experts.70.w2", "model.layers.50.block_sparse_moe.experts.71.w2", "model.layers.50.block_sparse_moe.experts.72.w2", "model.layers.50.block_sparse_moe.experts.73.w2", "model.layers.50.block_sparse_moe.experts.74.w2", "model.layers.50.block_sparse_moe.experts.75.w2", "model.layers.50.block_sparse_moe.experts.76.w2", "model.layers.50.block_sparse_moe.experts.77.w2", "model.layers.50.block_sparse_moe.experts.78.w2", "model.layers.50.block_sparse_moe.experts.79.w2", "model.layers.50.block_sparse_moe.experts.80.w2", "model.layers.50.block_sparse_moe.experts.81.w2", "model.layers.50.block_sparse_moe.experts.82.w2", "model.layers.50.block_sparse_moe.experts.83.w2", "model.layers.50.block_sparse_moe.experts.84.w2", "model.layers.50.block_sparse_moe.experts.85.w2", "model.layers.50.block_sparse_moe.experts.86.w2", "model.layers.50.block_sparse_moe.experts.87.w2", "model.layers.50.block_sparse_moe.experts.88.w2", "model.layers.50.block_sparse_moe.experts.89.w2", "model.layers.50.block_sparse_moe.experts.90.w2", "model.layers.50.block_sparse_moe.experts.91.w2", "model.layers.50.block_sparse_moe.experts.92.w2", "model.layers.50.block_sparse_moe.experts.93.w2", "model.layers.50.block_sparse_moe.experts.94.w2", "model.layers.50.block_sparse_moe.experts.95.w2", "model.layers.50.block_sparse_moe.experts.96.w2", "model.layers.50.block_sparse_moe.experts.97.w2", "model.layers.50.block_sparse_moe.experts.98.w2", "model.layers.50.block_sparse_moe.experts.99.w2", "model.layers.50.block_sparse_moe.experts.100.w2", "model.layers.50.block_sparse_moe.experts.101.w2", "model.layers.50.block_sparse_moe.experts.102.w2", "model.layers.50.block_sparse_moe.experts.103.w2", "model.layers.50.block_sparse_moe.experts.104.w2", "model.layers.50.block_sparse_moe.experts.105.w2", "model.layers.50.block_sparse_moe.experts.106.w2", "model.layers.50.block_sparse_moe.experts.107.w2", "model.layers.50.block_sparse_moe.experts.108.w2", "model.layers.50.block_sparse_moe.experts.109.w2", "model.layers.50.block_sparse_moe.experts.110.w2", "model.layers.50.block_sparse_moe.experts.111.w2", "model.layers.50.block_sparse_moe.experts.112.w2", "model.layers.50.block_sparse_moe.experts.113.w2", "model.layers.50.block_sparse_moe.experts.114.w2", "model.layers.50.block_sparse_moe.experts.115.w2", "model.layers.50.block_sparse_moe.experts.116.w2", "model.layers.50.block_sparse_moe.experts.117.w2", "model.layers.50.block_sparse_moe.experts.118.w2", "model.layers.50.block_sparse_moe.experts.119.w2", "model.layers.50.block_sparse_moe.experts.120.w2", "model.layers.50.block_sparse_moe.experts.121.w2", "model.layers.50.block_sparse_moe.experts.122.w2", "model.layers.50.block_sparse_moe.experts.123.w2", "model.layers.50.block_sparse_moe.experts.124.w2", "model.layers.50.block_sparse_moe.experts.125.w2", "model.layers.50.block_sparse_moe.experts.126.w2", "model.layers.50.block_sparse_moe.experts.127.w2", "model.layers.50.block_sparse_moe.experts.128.w2", "model.layers.50.block_sparse_moe.experts.129.w2", "model.layers.50.block_sparse_moe.experts.130.w2", "model.layers.50.block_sparse_moe.experts.131.w2", "model.layers.50.block_sparse_moe.experts.132.w2", "model.layers.50.block_sparse_moe.experts.133.w2", "model.layers.50.block_sparse_moe.experts.134.w2", "model.layers.50.block_sparse_moe.experts.135.w2", "model.layers.50.block_sparse_moe.experts.136.w2", "model.layers.50.block_sparse_moe.experts.137.w2", "model.layers.50.block_sparse_moe.experts.138.w2", "model.layers.50.block_sparse_moe.experts.139.w2", "model.layers.50.block_sparse_moe.experts.140.w2", "model.layers.50.block_sparse_moe.experts.141.w2", "model.layers.50.block_sparse_moe.experts.142.w2", "model.layers.50.block_sparse_moe.experts.143.w2", "model.layers.50.block_sparse_moe.experts.144.w2", "model.layers.50.block_sparse_moe.experts.145.w2", "model.layers.50.block_sparse_moe.experts.146.w2", "model.layers.50.block_sparse_moe.experts.147.w2", "model.layers.50.block_sparse_moe.experts.148.w2", "model.layers.50.block_sparse_moe.experts.149.w2", "model.layers.50.block_sparse_moe.experts.150.w2", "model.layers.50.block_sparse_moe.experts.151.w2", "model.layers.50.block_sparse_moe.experts.152.w2", "model.layers.50.block_sparse_moe.experts.153.w2", "model.layers.50.block_sparse_moe.experts.154.w2", "model.layers.50.block_sparse_moe.experts.155.w2", "model.layers.50.block_sparse_moe.experts.156.w2", "model.layers.50.block_sparse_moe.experts.157.w2", "model.layers.50.block_sparse_moe.experts.158.w2", "model.layers.50.block_sparse_moe.experts.159.w2", "model.layers.50.block_sparse_moe.experts.160.w2", "model.layers.50.block_sparse_moe.experts.161.w2", "model.layers.50.block_sparse_moe.experts.162.w2", "model.layers.50.block_sparse_moe.experts.163.w2", "model.layers.50.block_sparse_moe.experts.164.w2", "model.layers.50.block_sparse_moe.experts.165.w2", "model.layers.50.block_sparse_moe.experts.166.w2", "model.layers.50.block_sparse_moe.experts.167.w2", "model.layers.50.block_sparse_moe.experts.168.w2", "model.layers.50.block_sparse_moe.experts.169.w2", "model.layers.50.block_sparse_moe.experts.170.w2", "model.layers.50.block_sparse_moe.experts.171.w2", "model.layers.50.block_sparse_moe.experts.172.w2", "model.layers.50.block_sparse_moe.experts.173.w2", "model.layers.50.block_sparse_moe.experts.174.w2", "model.layers.50.block_sparse_moe.experts.175.w2", "model.layers.50.block_sparse_moe.experts.176.w2", "model.layers.50.block_sparse_moe.experts.177.w2", "model.layers.50.block_sparse_moe.experts.178.w2", "model.layers.50.block_sparse_moe.experts.179.w2", "model.layers.50.block_sparse_moe.experts.180.w2", "model.layers.50.block_sparse_moe.experts.181.w2", "model.layers.50.block_sparse_moe.experts.182.w2", "model.layers.50.block_sparse_moe.experts.183.w2", "model.layers.50.block_sparse_moe.experts.184.w2", "model.layers.50.block_sparse_moe.experts.185.w2", "model.layers.50.block_sparse_moe.experts.186.w2", "model.layers.50.block_sparse_moe.experts.187.w2", "model.layers.50.block_sparse_moe.experts.188.w2", "model.layers.50.block_sparse_moe.experts.189.w2", "model.layers.50.block_sparse_moe.experts.190.w2", "model.layers.50.block_sparse_moe.experts.191.w2", "model.layers.50.block_sparse_moe.experts.192.w2", "model.layers.50.block_sparse_moe.experts.193.w2", "model.layers.50.block_sparse_moe.experts.194.w2", "model.layers.50.block_sparse_moe.experts.195.w2", "model.layers.50.block_sparse_moe.experts.196.w2", "model.layers.50.block_sparse_moe.experts.197.w2", "model.layers.50.block_sparse_moe.experts.198.w2", "model.layers.50.block_sparse_moe.experts.199.w2", "model.layers.50.block_sparse_moe.experts.200.w2", "model.layers.50.block_sparse_moe.experts.201.w2", "model.layers.50.block_sparse_moe.experts.202.w2", "model.layers.50.block_sparse_moe.experts.203.w2", "model.layers.50.block_sparse_moe.experts.204.w2", "model.layers.50.block_sparse_moe.experts.205.w2", "model.layers.50.block_sparse_moe.experts.206.w2", "model.layers.50.block_sparse_moe.experts.207.w2", "model.layers.50.block_sparse_moe.experts.208.w2", "model.layers.50.block_sparse_moe.experts.209.w2", "model.layers.50.block_sparse_moe.experts.210.w2", "model.layers.50.block_sparse_moe.experts.211.w2", "model.layers.50.block_sparse_moe.experts.212.w2", "model.layers.50.block_sparse_moe.experts.213.w2", "model.layers.50.block_sparse_moe.experts.214.w2", "model.layers.50.block_sparse_moe.experts.215.w2", "model.layers.50.block_sparse_moe.experts.216.w2", "model.layers.50.block_sparse_moe.experts.217.w2", "model.layers.50.block_sparse_moe.experts.218.w2", "model.layers.50.block_sparse_moe.experts.219.w2", "model.layers.50.block_sparse_moe.experts.220.w2", "model.layers.50.block_sparse_moe.experts.221.w2", "model.layers.50.block_sparse_moe.experts.222.w2", "model.layers.50.block_sparse_moe.experts.223.w2", "model.layers.50.block_sparse_moe.experts.224.w2", "model.layers.50.block_sparse_moe.experts.225.w2", "model.layers.50.block_sparse_moe.experts.226.w2", "model.layers.50.block_sparse_moe.experts.227.w2", "model.layers.50.block_sparse_moe.experts.228.w2", "model.layers.50.block_sparse_moe.experts.229.w2", "model.layers.50.block_sparse_moe.experts.230.w2", "model.layers.50.block_sparse_moe.experts.231.w2", "model.layers.50.block_sparse_moe.experts.232.w2", "model.layers.50.block_sparse_moe.experts.233.w2", "model.layers.50.block_sparse_moe.experts.234.w2", "model.layers.50.block_sparse_moe.experts.235.w2", "model.layers.50.block_sparse_moe.experts.236.w2", "model.layers.50.block_sparse_moe.experts.237.w2", "model.layers.50.block_sparse_moe.experts.238.w2", "model.layers.50.block_sparse_moe.experts.239.w2", "model.layers.50.block_sparse_moe.experts.240.w2", "model.layers.50.block_sparse_moe.experts.241.w2", "model.layers.50.block_sparse_moe.experts.242.w2", "model.layers.50.block_sparse_moe.experts.243.w2", "model.layers.50.block_sparse_moe.experts.244.w2", "model.layers.50.block_sparse_moe.experts.245.w2", "model.layers.50.block_sparse_moe.experts.246.w2", "model.layers.50.block_sparse_moe.experts.247.w2", "model.layers.50.block_sparse_moe.experts.248.w2", "model.layers.50.block_sparse_moe.experts.249.w2", "model.layers.50.block_sparse_moe.experts.250.w2", "model.layers.50.block_sparse_moe.experts.251.w2", "model.layers.50.block_sparse_moe.experts.252.w2", "model.layers.50.block_sparse_moe.experts.253.w2", "model.layers.50.block_sparse_moe.experts.254.w2", "model.layers.50.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00019103959202765308, "dbits": 3623878656 } ] }, { "idx": 102, "layers": [ "model.layers.51.self_attn.q_proj", "model.layers.51.self_attn.k_proj", "model.layers.51.self_attn.v_proj", "model.layers.51.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004434597492218029, "dbits": 44040192 } ] }, { "idx": 103, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w1", "model.layers.51.block_sparse_moe.experts.1.w1", "model.layers.51.block_sparse_moe.experts.2.w1", "model.layers.51.block_sparse_moe.experts.3.w1", "model.layers.51.block_sparse_moe.experts.4.w1", "model.layers.51.block_sparse_moe.experts.5.w1", "model.layers.51.block_sparse_moe.experts.6.w1", "model.layers.51.block_sparse_moe.experts.7.w1", "model.layers.51.block_sparse_moe.experts.8.w1", "model.layers.51.block_sparse_moe.experts.9.w1", "model.layers.51.block_sparse_moe.experts.10.w1", "model.layers.51.block_sparse_moe.experts.11.w1", "model.layers.51.block_sparse_moe.experts.12.w1", "model.layers.51.block_sparse_moe.experts.13.w1", "model.layers.51.block_sparse_moe.experts.14.w1", "model.layers.51.block_sparse_moe.experts.15.w1", "model.layers.51.block_sparse_moe.experts.16.w1", "model.layers.51.block_sparse_moe.experts.17.w1", "model.layers.51.block_sparse_moe.experts.18.w1", "model.layers.51.block_sparse_moe.experts.19.w1", "model.layers.51.block_sparse_moe.experts.20.w1", "model.layers.51.block_sparse_moe.experts.21.w1", "model.layers.51.block_sparse_moe.experts.22.w1", "model.layers.51.block_sparse_moe.experts.23.w1", "model.layers.51.block_sparse_moe.experts.24.w1", "model.layers.51.block_sparse_moe.experts.25.w1", "model.layers.51.block_sparse_moe.experts.26.w1", "model.layers.51.block_sparse_moe.experts.27.w1", "model.layers.51.block_sparse_moe.experts.28.w1", "model.layers.51.block_sparse_moe.experts.29.w1", "model.layers.51.block_sparse_moe.experts.30.w1", "model.layers.51.block_sparse_moe.experts.31.w1", "model.layers.51.block_sparse_moe.experts.32.w1", "model.layers.51.block_sparse_moe.experts.33.w1", "model.layers.51.block_sparse_moe.experts.34.w1", "model.layers.51.block_sparse_moe.experts.35.w1", "model.layers.51.block_sparse_moe.experts.36.w1", "model.layers.51.block_sparse_moe.experts.37.w1", "model.layers.51.block_sparse_moe.experts.38.w1", "model.layers.51.block_sparse_moe.experts.39.w1", "model.layers.51.block_sparse_moe.experts.40.w1", "model.layers.51.block_sparse_moe.experts.41.w1", "model.layers.51.block_sparse_moe.experts.42.w1", "model.layers.51.block_sparse_moe.experts.43.w1", "model.layers.51.block_sparse_moe.experts.44.w1", "model.layers.51.block_sparse_moe.experts.45.w1", "model.layers.51.block_sparse_moe.experts.46.w1", "model.layers.51.block_sparse_moe.experts.47.w1", "model.layers.51.block_sparse_moe.experts.48.w1", "model.layers.51.block_sparse_moe.experts.49.w1", "model.layers.51.block_sparse_moe.experts.50.w1", "model.layers.51.block_sparse_moe.experts.51.w1", "model.layers.51.block_sparse_moe.experts.52.w1", "model.layers.51.block_sparse_moe.experts.53.w1", "model.layers.51.block_sparse_moe.experts.54.w1", "model.layers.51.block_sparse_moe.experts.55.w1", "model.layers.51.block_sparse_moe.experts.56.w1", "model.layers.51.block_sparse_moe.experts.57.w1", "model.layers.51.block_sparse_moe.experts.58.w1", "model.layers.51.block_sparse_moe.experts.59.w1", "model.layers.51.block_sparse_moe.experts.60.w1", "model.layers.51.block_sparse_moe.experts.61.w1", "model.layers.51.block_sparse_moe.experts.62.w1", "model.layers.51.block_sparse_moe.experts.63.w1", "model.layers.51.block_sparse_moe.experts.64.w1", "model.layers.51.block_sparse_moe.experts.65.w1", "model.layers.51.block_sparse_moe.experts.66.w1", "model.layers.51.block_sparse_moe.experts.67.w1", "model.layers.51.block_sparse_moe.experts.68.w1", "model.layers.51.block_sparse_moe.experts.69.w1", "model.layers.51.block_sparse_moe.experts.70.w1", "model.layers.51.block_sparse_moe.experts.71.w1", "model.layers.51.block_sparse_moe.experts.72.w1", "model.layers.51.block_sparse_moe.experts.73.w1", "model.layers.51.block_sparse_moe.experts.74.w1", "model.layers.51.block_sparse_moe.experts.75.w1", "model.layers.51.block_sparse_moe.experts.76.w1", "model.layers.51.block_sparse_moe.experts.77.w1", "model.layers.51.block_sparse_moe.experts.78.w1", "model.layers.51.block_sparse_moe.experts.79.w1", "model.layers.51.block_sparse_moe.experts.80.w1", "model.layers.51.block_sparse_moe.experts.81.w1", "model.layers.51.block_sparse_moe.experts.82.w1", "model.layers.51.block_sparse_moe.experts.83.w1", "model.layers.51.block_sparse_moe.experts.84.w1", "model.layers.51.block_sparse_moe.experts.85.w1", "model.layers.51.block_sparse_moe.experts.86.w1", "model.layers.51.block_sparse_moe.experts.87.w1", "model.layers.51.block_sparse_moe.experts.88.w1", "model.layers.51.block_sparse_moe.experts.89.w1", "model.layers.51.block_sparse_moe.experts.90.w1", "model.layers.51.block_sparse_moe.experts.91.w1", "model.layers.51.block_sparse_moe.experts.92.w1", "model.layers.51.block_sparse_moe.experts.93.w1", "model.layers.51.block_sparse_moe.experts.94.w1", "model.layers.51.block_sparse_moe.experts.95.w1", "model.layers.51.block_sparse_moe.experts.96.w1", "model.layers.51.block_sparse_moe.experts.97.w1", "model.layers.51.block_sparse_moe.experts.98.w1", "model.layers.51.block_sparse_moe.experts.99.w1", "model.layers.51.block_sparse_moe.experts.100.w1", "model.layers.51.block_sparse_moe.experts.101.w1", "model.layers.51.block_sparse_moe.experts.102.w1", "model.layers.51.block_sparse_moe.experts.103.w1", "model.layers.51.block_sparse_moe.experts.104.w1", "model.layers.51.block_sparse_moe.experts.105.w1", "model.layers.51.block_sparse_moe.experts.106.w1", "model.layers.51.block_sparse_moe.experts.107.w1", "model.layers.51.block_sparse_moe.experts.108.w1", "model.layers.51.block_sparse_moe.experts.109.w1", "model.layers.51.block_sparse_moe.experts.110.w1", "model.layers.51.block_sparse_moe.experts.111.w1", "model.layers.51.block_sparse_moe.experts.112.w1", "model.layers.51.block_sparse_moe.experts.113.w1", "model.layers.51.block_sparse_moe.experts.114.w1", "model.layers.51.block_sparse_moe.experts.115.w1", "model.layers.51.block_sparse_moe.experts.116.w1", "model.layers.51.block_sparse_moe.experts.117.w1", "model.layers.51.block_sparse_moe.experts.118.w1", "model.layers.51.block_sparse_moe.experts.119.w1", "model.layers.51.block_sparse_moe.experts.120.w1", "model.layers.51.block_sparse_moe.experts.121.w1", "model.layers.51.block_sparse_moe.experts.122.w1", "model.layers.51.block_sparse_moe.experts.123.w1", "model.layers.51.block_sparse_moe.experts.124.w1", "model.layers.51.block_sparse_moe.experts.125.w1", "model.layers.51.block_sparse_moe.experts.126.w1", "model.layers.51.block_sparse_moe.experts.127.w1", "model.layers.51.block_sparse_moe.experts.128.w1", "model.layers.51.block_sparse_moe.experts.129.w1", "model.layers.51.block_sparse_moe.experts.130.w1", "model.layers.51.block_sparse_moe.experts.131.w1", "model.layers.51.block_sparse_moe.experts.132.w1", "model.layers.51.block_sparse_moe.experts.133.w1", "model.layers.51.block_sparse_moe.experts.134.w1", "model.layers.51.block_sparse_moe.experts.135.w1", "model.layers.51.block_sparse_moe.experts.136.w1", "model.layers.51.block_sparse_moe.experts.137.w1", "model.layers.51.block_sparse_moe.experts.138.w1", "model.layers.51.block_sparse_moe.experts.139.w1", "model.layers.51.block_sparse_moe.experts.140.w1", "model.layers.51.block_sparse_moe.experts.141.w1", "model.layers.51.block_sparse_moe.experts.142.w1", "model.layers.51.block_sparse_moe.experts.143.w1", "model.layers.51.block_sparse_moe.experts.144.w1", "model.layers.51.block_sparse_moe.experts.145.w1", "model.layers.51.block_sparse_moe.experts.146.w1", "model.layers.51.block_sparse_moe.experts.147.w1", "model.layers.51.block_sparse_moe.experts.148.w1", "model.layers.51.block_sparse_moe.experts.149.w1", "model.layers.51.block_sparse_moe.experts.150.w1", "model.layers.51.block_sparse_moe.experts.151.w1", "model.layers.51.block_sparse_moe.experts.152.w1", "model.layers.51.block_sparse_moe.experts.153.w1", "model.layers.51.block_sparse_moe.experts.154.w1", "model.layers.51.block_sparse_moe.experts.155.w1", "model.layers.51.block_sparse_moe.experts.156.w1", "model.layers.51.block_sparse_moe.experts.157.w1", "model.layers.51.block_sparse_moe.experts.158.w1", "model.layers.51.block_sparse_moe.experts.159.w1", "model.layers.51.block_sparse_moe.experts.160.w1", "model.layers.51.block_sparse_moe.experts.161.w1", "model.layers.51.block_sparse_moe.experts.162.w1", "model.layers.51.block_sparse_moe.experts.163.w1", "model.layers.51.block_sparse_moe.experts.164.w1", "model.layers.51.block_sparse_moe.experts.165.w1", "model.layers.51.block_sparse_moe.experts.166.w1", "model.layers.51.block_sparse_moe.experts.167.w1", "model.layers.51.block_sparse_moe.experts.168.w1", "model.layers.51.block_sparse_moe.experts.169.w1", "model.layers.51.block_sparse_moe.experts.170.w1", "model.layers.51.block_sparse_moe.experts.171.w1", "model.layers.51.block_sparse_moe.experts.172.w1", "model.layers.51.block_sparse_moe.experts.173.w1", "model.layers.51.block_sparse_moe.experts.174.w1", "model.layers.51.block_sparse_moe.experts.175.w1", "model.layers.51.block_sparse_moe.experts.176.w1", "model.layers.51.block_sparse_moe.experts.177.w1", "model.layers.51.block_sparse_moe.experts.178.w1", "model.layers.51.block_sparse_moe.experts.179.w1", "model.layers.51.block_sparse_moe.experts.180.w1", "model.layers.51.block_sparse_moe.experts.181.w1", "model.layers.51.block_sparse_moe.experts.182.w1", "model.layers.51.block_sparse_moe.experts.183.w1", "model.layers.51.block_sparse_moe.experts.184.w1", "model.layers.51.block_sparse_moe.experts.185.w1", "model.layers.51.block_sparse_moe.experts.186.w1", "model.layers.51.block_sparse_moe.experts.187.w1", "model.layers.51.block_sparse_moe.experts.188.w1", "model.layers.51.block_sparse_moe.experts.189.w1", "model.layers.51.block_sparse_moe.experts.190.w1", "model.layers.51.block_sparse_moe.experts.191.w1", "model.layers.51.block_sparse_moe.experts.192.w1", "model.layers.51.block_sparse_moe.experts.193.w1", "model.layers.51.block_sparse_moe.experts.194.w1", "model.layers.51.block_sparse_moe.experts.195.w1", "model.layers.51.block_sparse_moe.experts.196.w1", "model.layers.51.block_sparse_moe.experts.197.w1", "model.layers.51.block_sparse_moe.experts.198.w1", "model.layers.51.block_sparse_moe.experts.199.w1", "model.layers.51.block_sparse_moe.experts.200.w1", "model.layers.51.block_sparse_moe.experts.201.w1", "model.layers.51.block_sparse_moe.experts.202.w1", "model.layers.51.block_sparse_moe.experts.203.w1", "model.layers.51.block_sparse_moe.experts.204.w1", "model.layers.51.block_sparse_moe.experts.205.w1", "model.layers.51.block_sparse_moe.experts.206.w1", "model.layers.51.block_sparse_moe.experts.207.w1", "model.layers.51.block_sparse_moe.experts.208.w1", "model.layers.51.block_sparse_moe.experts.209.w1", "model.layers.51.block_sparse_moe.experts.210.w1", "model.layers.51.block_sparse_moe.experts.211.w1", "model.layers.51.block_sparse_moe.experts.212.w1", "model.layers.51.block_sparse_moe.experts.213.w1", "model.layers.51.block_sparse_moe.experts.214.w1", "model.layers.51.block_sparse_moe.experts.215.w1", "model.layers.51.block_sparse_moe.experts.216.w1", "model.layers.51.block_sparse_moe.experts.217.w1", "model.layers.51.block_sparse_moe.experts.218.w1", "model.layers.51.block_sparse_moe.experts.219.w1", "model.layers.51.block_sparse_moe.experts.220.w1", "model.layers.51.block_sparse_moe.experts.221.w1", "model.layers.51.block_sparse_moe.experts.222.w1", "model.layers.51.block_sparse_moe.experts.223.w1", "model.layers.51.block_sparse_moe.experts.224.w1", "model.layers.51.block_sparse_moe.experts.225.w1", "model.layers.51.block_sparse_moe.experts.226.w1", "model.layers.51.block_sparse_moe.experts.227.w1", "model.layers.51.block_sparse_moe.experts.228.w1", "model.layers.51.block_sparse_moe.experts.229.w1", "model.layers.51.block_sparse_moe.experts.230.w1", "model.layers.51.block_sparse_moe.experts.231.w1", "model.layers.51.block_sparse_moe.experts.232.w1", "model.layers.51.block_sparse_moe.experts.233.w1", "model.layers.51.block_sparse_moe.experts.234.w1", "model.layers.51.block_sparse_moe.experts.235.w1", "model.layers.51.block_sparse_moe.experts.236.w1", "model.layers.51.block_sparse_moe.experts.237.w1", "model.layers.51.block_sparse_moe.experts.238.w1", "model.layers.51.block_sparse_moe.experts.239.w1", "model.layers.51.block_sparse_moe.experts.240.w1", "model.layers.51.block_sparse_moe.experts.241.w1", "model.layers.51.block_sparse_moe.experts.242.w1", "model.layers.51.block_sparse_moe.experts.243.w1", "model.layers.51.block_sparse_moe.experts.244.w1", "model.layers.51.block_sparse_moe.experts.245.w1", "model.layers.51.block_sparse_moe.experts.246.w1", "model.layers.51.block_sparse_moe.experts.247.w1", "model.layers.51.block_sparse_moe.experts.248.w1", "model.layers.51.block_sparse_moe.experts.249.w1", "model.layers.51.block_sparse_moe.experts.250.w1", "model.layers.51.block_sparse_moe.experts.251.w1", "model.layers.51.block_sparse_moe.experts.252.w1", "model.layers.51.block_sparse_moe.experts.253.w1", "model.layers.51.block_sparse_moe.experts.254.w1", "model.layers.51.block_sparse_moe.experts.255.w1", "model.layers.51.block_sparse_moe.experts.0.w3", "model.layers.51.block_sparse_moe.experts.1.w3", "model.layers.51.block_sparse_moe.experts.2.w3", "model.layers.51.block_sparse_moe.experts.3.w3", "model.layers.51.block_sparse_moe.experts.4.w3", "model.layers.51.block_sparse_moe.experts.5.w3", "model.layers.51.block_sparse_moe.experts.6.w3", "model.layers.51.block_sparse_moe.experts.7.w3", "model.layers.51.block_sparse_moe.experts.8.w3", "model.layers.51.block_sparse_moe.experts.9.w3", "model.layers.51.block_sparse_moe.experts.10.w3", "model.layers.51.block_sparse_moe.experts.11.w3", "model.layers.51.block_sparse_moe.experts.12.w3", "model.layers.51.block_sparse_moe.experts.13.w3", "model.layers.51.block_sparse_moe.experts.14.w3", "model.layers.51.block_sparse_moe.experts.15.w3", "model.layers.51.block_sparse_moe.experts.16.w3", "model.layers.51.block_sparse_moe.experts.17.w3", "model.layers.51.block_sparse_moe.experts.18.w3", "model.layers.51.block_sparse_moe.experts.19.w3", "model.layers.51.block_sparse_moe.experts.20.w3", "model.layers.51.block_sparse_moe.experts.21.w3", "model.layers.51.block_sparse_moe.experts.22.w3", "model.layers.51.block_sparse_moe.experts.23.w3", "model.layers.51.block_sparse_moe.experts.24.w3", "model.layers.51.block_sparse_moe.experts.25.w3", "model.layers.51.block_sparse_moe.experts.26.w3", "model.layers.51.block_sparse_moe.experts.27.w3", "model.layers.51.block_sparse_moe.experts.28.w3", "model.layers.51.block_sparse_moe.experts.29.w3", "model.layers.51.block_sparse_moe.experts.30.w3", "model.layers.51.block_sparse_moe.experts.31.w3", "model.layers.51.block_sparse_moe.experts.32.w3", "model.layers.51.block_sparse_moe.experts.33.w3", "model.layers.51.block_sparse_moe.experts.34.w3", "model.layers.51.block_sparse_moe.experts.35.w3", "model.layers.51.block_sparse_moe.experts.36.w3", "model.layers.51.block_sparse_moe.experts.37.w3", "model.layers.51.block_sparse_moe.experts.38.w3", "model.layers.51.block_sparse_moe.experts.39.w3", "model.layers.51.block_sparse_moe.experts.40.w3", "model.layers.51.block_sparse_moe.experts.41.w3", "model.layers.51.block_sparse_moe.experts.42.w3", "model.layers.51.block_sparse_moe.experts.43.w3", "model.layers.51.block_sparse_moe.experts.44.w3", "model.layers.51.block_sparse_moe.experts.45.w3", "model.layers.51.block_sparse_moe.experts.46.w3", "model.layers.51.block_sparse_moe.experts.47.w3", "model.layers.51.block_sparse_moe.experts.48.w3", "model.layers.51.block_sparse_moe.experts.49.w3", "model.layers.51.block_sparse_moe.experts.50.w3", "model.layers.51.block_sparse_moe.experts.51.w3", "model.layers.51.block_sparse_moe.experts.52.w3", "model.layers.51.block_sparse_moe.experts.53.w3", "model.layers.51.block_sparse_moe.experts.54.w3", "model.layers.51.block_sparse_moe.experts.55.w3", "model.layers.51.block_sparse_moe.experts.56.w3", "model.layers.51.block_sparse_moe.experts.57.w3", "model.layers.51.block_sparse_moe.experts.58.w3", "model.layers.51.block_sparse_moe.experts.59.w3", "model.layers.51.block_sparse_moe.experts.60.w3", "model.layers.51.block_sparse_moe.experts.61.w3", "model.layers.51.block_sparse_moe.experts.62.w3", "model.layers.51.block_sparse_moe.experts.63.w3", "model.layers.51.block_sparse_moe.experts.64.w3", "model.layers.51.block_sparse_moe.experts.65.w3", "model.layers.51.block_sparse_moe.experts.66.w3", "model.layers.51.block_sparse_moe.experts.67.w3", "model.layers.51.block_sparse_moe.experts.68.w3", "model.layers.51.block_sparse_moe.experts.69.w3", "model.layers.51.block_sparse_moe.experts.70.w3", "model.layers.51.block_sparse_moe.experts.71.w3", "model.layers.51.block_sparse_moe.experts.72.w3", "model.layers.51.block_sparse_moe.experts.73.w3", "model.layers.51.block_sparse_moe.experts.74.w3", "model.layers.51.block_sparse_moe.experts.75.w3", "model.layers.51.block_sparse_moe.experts.76.w3", "model.layers.51.block_sparse_moe.experts.77.w3", "model.layers.51.block_sparse_moe.experts.78.w3", "model.layers.51.block_sparse_moe.experts.79.w3", "model.layers.51.block_sparse_moe.experts.80.w3", "model.layers.51.block_sparse_moe.experts.81.w3", "model.layers.51.block_sparse_moe.experts.82.w3", "model.layers.51.block_sparse_moe.experts.83.w3", "model.layers.51.block_sparse_moe.experts.84.w3", "model.layers.51.block_sparse_moe.experts.85.w3", "model.layers.51.block_sparse_moe.experts.86.w3", "model.layers.51.block_sparse_moe.experts.87.w3", "model.layers.51.block_sparse_moe.experts.88.w3", "model.layers.51.block_sparse_moe.experts.89.w3", "model.layers.51.block_sparse_moe.experts.90.w3", "model.layers.51.block_sparse_moe.experts.91.w3", "model.layers.51.block_sparse_moe.experts.92.w3", "model.layers.51.block_sparse_moe.experts.93.w3", "model.layers.51.block_sparse_moe.experts.94.w3", "model.layers.51.block_sparse_moe.experts.95.w3", "model.layers.51.block_sparse_moe.experts.96.w3", "model.layers.51.block_sparse_moe.experts.97.w3", "model.layers.51.block_sparse_moe.experts.98.w3", "model.layers.51.block_sparse_moe.experts.99.w3", "model.layers.51.block_sparse_moe.experts.100.w3", "model.layers.51.block_sparse_moe.experts.101.w3", "model.layers.51.block_sparse_moe.experts.102.w3", "model.layers.51.block_sparse_moe.experts.103.w3", "model.layers.51.block_sparse_moe.experts.104.w3", "model.layers.51.block_sparse_moe.experts.105.w3", "model.layers.51.block_sparse_moe.experts.106.w3", "model.layers.51.block_sparse_moe.experts.107.w3", "model.layers.51.block_sparse_moe.experts.108.w3", "model.layers.51.block_sparse_moe.experts.109.w3", "model.layers.51.block_sparse_moe.experts.110.w3", "model.layers.51.block_sparse_moe.experts.111.w3", "model.layers.51.block_sparse_moe.experts.112.w3", "model.layers.51.block_sparse_moe.experts.113.w3", "model.layers.51.block_sparse_moe.experts.114.w3", "model.layers.51.block_sparse_moe.experts.115.w3", "model.layers.51.block_sparse_moe.experts.116.w3", "model.layers.51.block_sparse_moe.experts.117.w3", "model.layers.51.block_sparse_moe.experts.118.w3", "model.layers.51.block_sparse_moe.experts.119.w3", "model.layers.51.block_sparse_moe.experts.120.w3", "model.layers.51.block_sparse_moe.experts.121.w3", "model.layers.51.block_sparse_moe.experts.122.w3", "model.layers.51.block_sparse_moe.experts.123.w3", "model.layers.51.block_sparse_moe.experts.124.w3", "model.layers.51.block_sparse_moe.experts.125.w3", "model.layers.51.block_sparse_moe.experts.126.w3", "model.layers.51.block_sparse_moe.experts.127.w3", "model.layers.51.block_sparse_moe.experts.128.w3", "model.layers.51.block_sparse_moe.experts.129.w3", "model.layers.51.block_sparse_moe.experts.130.w3", "model.layers.51.block_sparse_moe.experts.131.w3", "model.layers.51.block_sparse_moe.experts.132.w3", "model.layers.51.block_sparse_moe.experts.133.w3", "model.layers.51.block_sparse_moe.experts.134.w3", "model.layers.51.block_sparse_moe.experts.135.w3", "model.layers.51.block_sparse_moe.experts.136.w3", "model.layers.51.block_sparse_moe.experts.137.w3", "model.layers.51.block_sparse_moe.experts.138.w3", "model.layers.51.block_sparse_moe.experts.139.w3", "model.layers.51.block_sparse_moe.experts.140.w3", "model.layers.51.block_sparse_moe.experts.141.w3", "model.layers.51.block_sparse_moe.experts.142.w3", "model.layers.51.block_sparse_moe.experts.143.w3", "model.layers.51.block_sparse_moe.experts.144.w3", "model.layers.51.block_sparse_moe.experts.145.w3", "model.layers.51.block_sparse_moe.experts.146.w3", "model.layers.51.block_sparse_moe.experts.147.w3", "model.layers.51.block_sparse_moe.experts.148.w3", "model.layers.51.block_sparse_moe.experts.149.w3", "model.layers.51.block_sparse_moe.experts.150.w3", "model.layers.51.block_sparse_moe.experts.151.w3", "model.layers.51.block_sparse_moe.experts.152.w3", "model.layers.51.block_sparse_moe.experts.153.w3", "model.layers.51.block_sparse_moe.experts.154.w3", "model.layers.51.block_sparse_moe.experts.155.w3", "model.layers.51.block_sparse_moe.experts.156.w3", "model.layers.51.block_sparse_moe.experts.157.w3", "model.layers.51.block_sparse_moe.experts.158.w3", "model.layers.51.block_sparse_moe.experts.159.w3", "model.layers.51.block_sparse_moe.experts.160.w3", "model.layers.51.block_sparse_moe.experts.161.w3", "model.layers.51.block_sparse_moe.experts.162.w3", "model.layers.51.block_sparse_moe.experts.163.w3", "model.layers.51.block_sparse_moe.experts.164.w3", "model.layers.51.block_sparse_moe.experts.165.w3", "model.layers.51.block_sparse_moe.experts.166.w3", "model.layers.51.block_sparse_moe.experts.167.w3", "model.layers.51.block_sparse_moe.experts.168.w3", "model.layers.51.block_sparse_moe.experts.169.w3", "model.layers.51.block_sparse_moe.experts.170.w3", "model.layers.51.block_sparse_moe.experts.171.w3", "model.layers.51.block_sparse_moe.experts.172.w3", "model.layers.51.block_sparse_moe.experts.173.w3", "model.layers.51.block_sparse_moe.experts.174.w3", "model.layers.51.block_sparse_moe.experts.175.w3", "model.layers.51.block_sparse_moe.experts.176.w3", "model.layers.51.block_sparse_moe.experts.177.w3", "model.layers.51.block_sparse_moe.experts.178.w3", "model.layers.51.block_sparse_moe.experts.179.w3", "model.layers.51.block_sparse_moe.experts.180.w3", "model.layers.51.block_sparse_moe.experts.181.w3", "model.layers.51.block_sparse_moe.experts.182.w3", "model.layers.51.block_sparse_moe.experts.183.w3", "model.layers.51.block_sparse_moe.experts.184.w3", "model.layers.51.block_sparse_moe.experts.185.w3", "model.layers.51.block_sparse_moe.experts.186.w3", "model.layers.51.block_sparse_moe.experts.187.w3", "model.layers.51.block_sparse_moe.experts.188.w3", "model.layers.51.block_sparse_moe.experts.189.w3", "model.layers.51.block_sparse_moe.experts.190.w3", "model.layers.51.block_sparse_moe.experts.191.w3", "model.layers.51.block_sparse_moe.experts.192.w3", "model.layers.51.block_sparse_moe.experts.193.w3", "model.layers.51.block_sparse_moe.experts.194.w3", "model.layers.51.block_sparse_moe.experts.195.w3", "model.layers.51.block_sparse_moe.experts.196.w3", "model.layers.51.block_sparse_moe.experts.197.w3", "model.layers.51.block_sparse_moe.experts.198.w3", "model.layers.51.block_sparse_moe.experts.199.w3", "model.layers.51.block_sparse_moe.experts.200.w3", "model.layers.51.block_sparse_moe.experts.201.w3", "model.layers.51.block_sparse_moe.experts.202.w3", "model.layers.51.block_sparse_moe.experts.203.w3", "model.layers.51.block_sparse_moe.experts.204.w3", "model.layers.51.block_sparse_moe.experts.205.w3", "model.layers.51.block_sparse_moe.experts.206.w3", "model.layers.51.block_sparse_moe.experts.207.w3", "model.layers.51.block_sparse_moe.experts.208.w3", "model.layers.51.block_sparse_moe.experts.209.w3", "model.layers.51.block_sparse_moe.experts.210.w3", "model.layers.51.block_sparse_moe.experts.211.w3", "model.layers.51.block_sparse_moe.experts.212.w3", "model.layers.51.block_sparse_moe.experts.213.w3", "model.layers.51.block_sparse_moe.experts.214.w3", "model.layers.51.block_sparse_moe.experts.215.w3", "model.layers.51.block_sparse_moe.experts.216.w3", "model.layers.51.block_sparse_moe.experts.217.w3", "model.layers.51.block_sparse_moe.experts.218.w3", "model.layers.51.block_sparse_moe.experts.219.w3", "model.layers.51.block_sparse_moe.experts.220.w3", "model.layers.51.block_sparse_moe.experts.221.w3", "model.layers.51.block_sparse_moe.experts.222.w3", "model.layers.51.block_sparse_moe.experts.223.w3", "model.layers.51.block_sparse_moe.experts.224.w3", "model.layers.51.block_sparse_moe.experts.225.w3", "model.layers.51.block_sparse_moe.experts.226.w3", "model.layers.51.block_sparse_moe.experts.227.w3", "model.layers.51.block_sparse_moe.experts.228.w3", "model.layers.51.block_sparse_moe.experts.229.w3", "model.layers.51.block_sparse_moe.experts.230.w3", "model.layers.51.block_sparse_moe.experts.231.w3", "model.layers.51.block_sparse_moe.experts.232.w3", "model.layers.51.block_sparse_moe.experts.233.w3", "model.layers.51.block_sparse_moe.experts.234.w3", "model.layers.51.block_sparse_moe.experts.235.w3", "model.layers.51.block_sparse_moe.experts.236.w3", "model.layers.51.block_sparse_moe.experts.237.w3", "model.layers.51.block_sparse_moe.experts.238.w3", "model.layers.51.block_sparse_moe.experts.239.w3", "model.layers.51.block_sparse_moe.experts.240.w3", "model.layers.51.block_sparse_moe.experts.241.w3", "model.layers.51.block_sparse_moe.experts.242.w3", "model.layers.51.block_sparse_moe.experts.243.w3", "model.layers.51.block_sparse_moe.experts.244.w3", "model.layers.51.block_sparse_moe.experts.245.w3", "model.layers.51.block_sparse_moe.experts.246.w3", "model.layers.51.block_sparse_moe.experts.247.w3", "model.layers.51.block_sparse_moe.experts.248.w3", "model.layers.51.block_sparse_moe.experts.249.w3", "model.layers.51.block_sparse_moe.experts.250.w3", "model.layers.51.block_sparse_moe.experts.251.w3", "model.layers.51.block_sparse_moe.experts.252.w3", "model.layers.51.block_sparse_moe.experts.253.w3", "model.layers.51.block_sparse_moe.experts.254.w3", "model.layers.51.block_sparse_moe.experts.255.w3", "model.layers.51.block_sparse_moe.experts.0.w2", "model.layers.51.block_sparse_moe.experts.1.w2", "model.layers.51.block_sparse_moe.experts.2.w2", "model.layers.51.block_sparse_moe.experts.3.w2", "model.layers.51.block_sparse_moe.experts.4.w2", "model.layers.51.block_sparse_moe.experts.5.w2", "model.layers.51.block_sparse_moe.experts.6.w2", "model.layers.51.block_sparse_moe.experts.7.w2", "model.layers.51.block_sparse_moe.experts.8.w2", "model.layers.51.block_sparse_moe.experts.9.w2", "model.layers.51.block_sparse_moe.experts.10.w2", "model.layers.51.block_sparse_moe.experts.11.w2", "model.layers.51.block_sparse_moe.experts.12.w2", "model.layers.51.block_sparse_moe.experts.13.w2", "model.layers.51.block_sparse_moe.experts.14.w2", "model.layers.51.block_sparse_moe.experts.15.w2", "model.layers.51.block_sparse_moe.experts.16.w2", "model.layers.51.block_sparse_moe.experts.17.w2", "model.layers.51.block_sparse_moe.experts.18.w2", "model.layers.51.block_sparse_moe.experts.19.w2", "model.layers.51.block_sparse_moe.experts.20.w2", "model.layers.51.block_sparse_moe.experts.21.w2", "model.layers.51.block_sparse_moe.experts.22.w2", "model.layers.51.block_sparse_moe.experts.23.w2", "model.layers.51.block_sparse_moe.experts.24.w2", "model.layers.51.block_sparse_moe.experts.25.w2", "model.layers.51.block_sparse_moe.experts.26.w2", "model.layers.51.block_sparse_moe.experts.27.w2", "model.layers.51.block_sparse_moe.experts.28.w2", "model.layers.51.block_sparse_moe.experts.29.w2", "model.layers.51.block_sparse_moe.experts.30.w2", "model.layers.51.block_sparse_moe.experts.31.w2", "model.layers.51.block_sparse_moe.experts.32.w2", "model.layers.51.block_sparse_moe.experts.33.w2", "model.layers.51.block_sparse_moe.experts.34.w2", "model.layers.51.block_sparse_moe.experts.35.w2", "model.layers.51.block_sparse_moe.experts.36.w2", "model.layers.51.block_sparse_moe.experts.37.w2", "model.layers.51.block_sparse_moe.experts.38.w2", "model.layers.51.block_sparse_moe.experts.39.w2", "model.layers.51.block_sparse_moe.experts.40.w2", "model.layers.51.block_sparse_moe.experts.41.w2", "model.layers.51.block_sparse_moe.experts.42.w2", "model.layers.51.block_sparse_moe.experts.43.w2", "model.layers.51.block_sparse_moe.experts.44.w2", "model.layers.51.block_sparse_moe.experts.45.w2", "model.layers.51.block_sparse_moe.experts.46.w2", "model.layers.51.block_sparse_moe.experts.47.w2", "model.layers.51.block_sparse_moe.experts.48.w2", "model.layers.51.block_sparse_moe.experts.49.w2", "model.layers.51.block_sparse_moe.experts.50.w2", "model.layers.51.block_sparse_moe.experts.51.w2", "model.layers.51.block_sparse_moe.experts.52.w2", "model.layers.51.block_sparse_moe.experts.53.w2", "model.layers.51.block_sparse_moe.experts.54.w2", "model.layers.51.block_sparse_moe.experts.55.w2", "model.layers.51.block_sparse_moe.experts.56.w2", "model.layers.51.block_sparse_moe.experts.57.w2", "model.layers.51.block_sparse_moe.experts.58.w2", "model.layers.51.block_sparse_moe.experts.59.w2", "model.layers.51.block_sparse_moe.experts.60.w2", "model.layers.51.block_sparse_moe.experts.61.w2", "model.layers.51.block_sparse_moe.experts.62.w2", "model.layers.51.block_sparse_moe.experts.63.w2", "model.layers.51.block_sparse_moe.experts.64.w2", "model.layers.51.block_sparse_moe.experts.65.w2", "model.layers.51.block_sparse_moe.experts.66.w2", "model.layers.51.block_sparse_moe.experts.67.w2", "model.layers.51.block_sparse_moe.experts.68.w2", "model.layers.51.block_sparse_moe.experts.69.w2", "model.layers.51.block_sparse_moe.experts.70.w2", "model.layers.51.block_sparse_moe.experts.71.w2", "model.layers.51.block_sparse_moe.experts.72.w2", "model.layers.51.block_sparse_moe.experts.73.w2", "model.layers.51.block_sparse_moe.experts.74.w2", "model.layers.51.block_sparse_moe.experts.75.w2", "model.layers.51.block_sparse_moe.experts.76.w2", "model.layers.51.block_sparse_moe.experts.77.w2", "model.layers.51.block_sparse_moe.experts.78.w2", "model.layers.51.block_sparse_moe.experts.79.w2", "model.layers.51.block_sparse_moe.experts.80.w2", "model.layers.51.block_sparse_moe.experts.81.w2", "model.layers.51.block_sparse_moe.experts.82.w2", "model.layers.51.block_sparse_moe.experts.83.w2", "model.layers.51.block_sparse_moe.experts.84.w2", "model.layers.51.block_sparse_moe.experts.85.w2", "model.layers.51.block_sparse_moe.experts.86.w2", "model.layers.51.block_sparse_moe.experts.87.w2", "model.layers.51.block_sparse_moe.experts.88.w2", "model.layers.51.block_sparse_moe.experts.89.w2", "model.layers.51.block_sparse_moe.experts.90.w2", "model.layers.51.block_sparse_moe.experts.91.w2", "model.layers.51.block_sparse_moe.experts.92.w2", "model.layers.51.block_sparse_moe.experts.93.w2", "model.layers.51.block_sparse_moe.experts.94.w2", "model.layers.51.block_sparse_moe.experts.95.w2", "model.layers.51.block_sparse_moe.experts.96.w2", "model.layers.51.block_sparse_moe.experts.97.w2", "model.layers.51.block_sparse_moe.experts.98.w2", "model.layers.51.block_sparse_moe.experts.99.w2", "model.layers.51.block_sparse_moe.experts.100.w2", "model.layers.51.block_sparse_moe.experts.101.w2", "model.layers.51.block_sparse_moe.experts.102.w2", "model.layers.51.block_sparse_moe.experts.103.w2", "model.layers.51.block_sparse_moe.experts.104.w2", "model.layers.51.block_sparse_moe.experts.105.w2", "model.layers.51.block_sparse_moe.experts.106.w2", "model.layers.51.block_sparse_moe.experts.107.w2", "model.layers.51.block_sparse_moe.experts.108.w2", "model.layers.51.block_sparse_moe.experts.109.w2", "model.layers.51.block_sparse_moe.experts.110.w2", "model.layers.51.block_sparse_moe.experts.111.w2", "model.layers.51.block_sparse_moe.experts.112.w2", "model.layers.51.block_sparse_moe.experts.113.w2", "model.layers.51.block_sparse_moe.experts.114.w2", "model.layers.51.block_sparse_moe.experts.115.w2", "model.layers.51.block_sparse_moe.experts.116.w2", "model.layers.51.block_sparse_moe.experts.117.w2", "model.layers.51.block_sparse_moe.experts.118.w2", "model.layers.51.block_sparse_moe.experts.119.w2", "model.layers.51.block_sparse_moe.experts.120.w2", "model.layers.51.block_sparse_moe.experts.121.w2", "model.layers.51.block_sparse_moe.experts.122.w2", "model.layers.51.block_sparse_moe.experts.123.w2", "model.layers.51.block_sparse_moe.experts.124.w2", "model.layers.51.block_sparse_moe.experts.125.w2", "model.layers.51.block_sparse_moe.experts.126.w2", "model.layers.51.block_sparse_moe.experts.127.w2", "model.layers.51.block_sparse_moe.experts.128.w2", "model.layers.51.block_sparse_moe.experts.129.w2", "model.layers.51.block_sparse_moe.experts.130.w2", "model.layers.51.block_sparse_moe.experts.131.w2", "model.layers.51.block_sparse_moe.experts.132.w2", "model.layers.51.block_sparse_moe.experts.133.w2", "model.layers.51.block_sparse_moe.experts.134.w2", "model.layers.51.block_sparse_moe.experts.135.w2", "model.layers.51.block_sparse_moe.experts.136.w2", "model.layers.51.block_sparse_moe.experts.137.w2", "model.layers.51.block_sparse_moe.experts.138.w2", "model.layers.51.block_sparse_moe.experts.139.w2", "model.layers.51.block_sparse_moe.experts.140.w2", "model.layers.51.block_sparse_moe.experts.141.w2", "model.layers.51.block_sparse_moe.experts.142.w2", "model.layers.51.block_sparse_moe.experts.143.w2", "model.layers.51.block_sparse_moe.experts.144.w2", "model.layers.51.block_sparse_moe.experts.145.w2", "model.layers.51.block_sparse_moe.experts.146.w2", "model.layers.51.block_sparse_moe.experts.147.w2", "model.layers.51.block_sparse_moe.experts.148.w2", "model.layers.51.block_sparse_moe.experts.149.w2", "model.layers.51.block_sparse_moe.experts.150.w2", "model.layers.51.block_sparse_moe.experts.151.w2", "model.layers.51.block_sparse_moe.experts.152.w2", "model.layers.51.block_sparse_moe.experts.153.w2", "model.layers.51.block_sparse_moe.experts.154.w2", "model.layers.51.block_sparse_moe.experts.155.w2", "model.layers.51.block_sparse_moe.experts.156.w2", "model.layers.51.block_sparse_moe.experts.157.w2", "model.layers.51.block_sparse_moe.experts.158.w2", "model.layers.51.block_sparse_moe.experts.159.w2", "model.layers.51.block_sparse_moe.experts.160.w2", "model.layers.51.block_sparse_moe.experts.161.w2", "model.layers.51.block_sparse_moe.experts.162.w2", "model.layers.51.block_sparse_moe.experts.163.w2", "model.layers.51.block_sparse_moe.experts.164.w2", "model.layers.51.block_sparse_moe.experts.165.w2", "model.layers.51.block_sparse_moe.experts.166.w2", "model.layers.51.block_sparse_moe.experts.167.w2", "model.layers.51.block_sparse_moe.experts.168.w2", "model.layers.51.block_sparse_moe.experts.169.w2", "model.layers.51.block_sparse_moe.experts.170.w2", "model.layers.51.block_sparse_moe.experts.171.w2", "model.layers.51.block_sparse_moe.experts.172.w2", "model.layers.51.block_sparse_moe.experts.173.w2", "model.layers.51.block_sparse_moe.experts.174.w2", "model.layers.51.block_sparse_moe.experts.175.w2", "model.layers.51.block_sparse_moe.experts.176.w2", "model.layers.51.block_sparse_moe.experts.177.w2", "model.layers.51.block_sparse_moe.experts.178.w2", "model.layers.51.block_sparse_moe.experts.179.w2", "model.layers.51.block_sparse_moe.experts.180.w2", "model.layers.51.block_sparse_moe.experts.181.w2", "model.layers.51.block_sparse_moe.experts.182.w2", "model.layers.51.block_sparse_moe.experts.183.w2", "model.layers.51.block_sparse_moe.experts.184.w2", "model.layers.51.block_sparse_moe.experts.185.w2", "model.layers.51.block_sparse_moe.experts.186.w2", "model.layers.51.block_sparse_moe.experts.187.w2", "model.layers.51.block_sparse_moe.experts.188.w2", "model.layers.51.block_sparse_moe.experts.189.w2", "model.layers.51.block_sparse_moe.experts.190.w2", "model.layers.51.block_sparse_moe.experts.191.w2", "model.layers.51.block_sparse_moe.experts.192.w2", "model.layers.51.block_sparse_moe.experts.193.w2", "model.layers.51.block_sparse_moe.experts.194.w2", "model.layers.51.block_sparse_moe.experts.195.w2", "model.layers.51.block_sparse_moe.experts.196.w2", "model.layers.51.block_sparse_moe.experts.197.w2", "model.layers.51.block_sparse_moe.experts.198.w2", "model.layers.51.block_sparse_moe.experts.199.w2", "model.layers.51.block_sparse_moe.experts.200.w2", "model.layers.51.block_sparse_moe.experts.201.w2", "model.layers.51.block_sparse_moe.experts.202.w2", "model.layers.51.block_sparse_moe.experts.203.w2", "model.layers.51.block_sparse_moe.experts.204.w2", "model.layers.51.block_sparse_moe.experts.205.w2", "model.layers.51.block_sparse_moe.experts.206.w2", "model.layers.51.block_sparse_moe.experts.207.w2", "model.layers.51.block_sparse_moe.experts.208.w2", "model.layers.51.block_sparse_moe.experts.209.w2", "model.layers.51.block_sparse_moe.experts.210.w2", "model.layers.51.block_sparse_moe.experts.211.w2", "model.layers.51.block_sparse_moe.experts.212.w2", "model.layers.51.block_sparse_moe.experts.213.w2", "model.layers.51.block_sparse_moe.experts.214.w2", "model.layers.51.block_sparse_moe.experts.215.w2", "model.layers.51.block_sparse_moe.experts.216.w2", "model.layers.51.block_sparse_moe.experts.217.w2", "model.layers.51.block_sparse_moe.experts.218.w2", "model.layers.51.block_sparse_moe.experts.219.w2", "model.layers.51.block_sparse_moe.experts.220.w2", "model.layers.51.block_sparse_moe.experts.221.w2", "model.layers.51.block_sparse_moe.experts.222.w2", "model.layers.51.block_sparse_moe.experts.223.w2", "model.layers.51.block_sparse_moe.experts.224.w2", "model.layers.51.block_sparse_moe.experts.225.w2", "model.layers.51.block_sparse_moe.experts.226.w2", "model.layers.51.block_sparse_moe.experts.227.w2", "model.layers.51.block_sparse_moe.experts.228.w2", "model.layers.51.block_sparse_moe.experts.229.w2", "model.layers.51.block_sparse_moe.experts.230.w2", "model.layers.51.block_sparse_moe.experts.231.w2", "model.layers.51.block_sparse_moe.experts.232.w2", "model.layers.51.block_sparse_moe.experts.233.w2", "model.layers.51.block_sparse_moe.experts.234.w2", "model.layers.51.block_sparse_moe.experts.235.w2", "model.layers.51.block_sparse_moe.experts.236.w2", "model.layers.51.block_sparse_moe.experts.237.w2", "model.layers.51.block_sparse_moe.experts.238.w2", "model.layers.51.block_sparse_moe.experts.239.w2", "model.layers.51.block_sparse_moe.experts.240.w2", "model.layers.51.block_sparse_moe.experts.241.w2", "model.layers.51.block_sparse_moe.experts.242.w2", "model.layers.51.block_sparse_moe.experts.243.w2", "model.layers.51.block_sparse_moe.experts.244.w2", "model.layers.51.block_sparse_moe.experts.245.w2", "model.layers.51.block_sparse_moe.experts.246.w2", "model.layers.51.block_sparse_moe.experts.247.w2", "model.layers.51.block_sparse_moe.experts.248.w2", "model.layers.51.block_sparse_moe.experts.249.w2", "model.layers.51.block_sparse_moe.experts.250.w2", "model.layers.51.block_sparse_moe.experts.251.w2", "model.layers.51.block_sparse_moe.experts.252.w2", "model.layers.51.block_sparse_moe.experts.253.w2", "model.layers.51.block_sparse_moe.experts.254.w2", "model.layers.51.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00022536888718605042, "dbits": 3623878656 } ] }, { "idx": 104, "layers": [ "model.layers.52.self_attn.q_proj", "model.layers.52.self_attn.k_proj", "model.layers.52.self_attn.v_proj", "model.layers.52.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00150315072387458, "dbits": 44040192 } ] }, { "idx": 105, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w1", "model.layers.52.block_sparse_moe.experts.1.w1", "model.layers.52.block_sparse_moe.experts.2.w1", "model.layers.52.block_sparse_moe.experts.3.w1", "model.layers.52.block_sparse_moe.experts.4.w1", "model.layers.52.block_sparse_moe.experts.5.w1", "model.layers.52.block_sparse_moe.experts.6.w1", "model.layers.52.block_sparse_moe.experts.7.w1", "model.layers.52.block_sparse_moe.experts.8.w1", "model.layers.52.block_sparse_moe.experts.9.w1", "model.layers.52.block_sparse_moe.experts.10.w1", "model.layers.52.block_sparse_moe.experts.11.w1", "model.layers.52.block_sparse_moe.experts.12.w1", "model.layers.52.block_sparse_moe.experts.13.w1", "model.layers.52.block_sparse_moe.experts.14.w1", "model.layers.52.block_sparse_moe.experts.15.w1", "model.layers.52.block_sparse_moe.experts.16.w1", "model.layers.52.block_sparse_moe.experts.17.w1", "model.layers.52.block_sparse_moe.experts.18.w1", "model.layers.52.block_sparse_moe.experts.19.w1", "model.layers.52.block_sparse_moe.experts.20.w1", "model.layers.52.block_sparse_moe.experts.21.w1", "model.layers.52.block_sparse_moe.experts.22.w1", "model.layers.52.block_sparse_moe.experts.23.w1", "model.layers.52.block_sparse_moe.experts.24.w1", "model.layers.52.block_sparse_moe.experts.25.w1", "model.layers.52.block_sparse_moe.experts.26.w1", "model.layers.52.block_sparse_moe.experts.27.w1", "model.layers.52.block_sparse_moe.experts.28.w1", "model.layers.52.block_sparse_moe.experts.29.w1", "model.layers.52.block_sparse_moe.experts.30.w1", "model.layers.52.block_sparse_moe.experts.31.w1", "model.layers.52.block_sparse_moe.experts.32.w1", "model.layers.52.block_sparse_moe.experts.33.w1", "model.layers.52.block_sparse_moe.experts.34.w1", "model.layers.52.block_sparse_moe.experts.35.w1", "model.layers.52.block_sparse_moe.experts.36.w1", "model.layers.52.block_sparse_moe.experts.37.w1", "model.layers.52.block_sparse_moe.experts.38.w1", "model.layers.52.block_sparse_moe.experts.39.w1", "model.layers.52.block_sparse_moe.experts.40.w1", "model.layers.52.block_sparse_moe.experts.41.w1", "model.layers.52.block_sparse_moe.experts.42.w1", "model.layers.52.block_sparse_moe.experts.43.w1", "model.layers.52.block_sparse_moe.experts.44.w1", "model.layers.52.block_sparse_moe.experts.45.w1", "model.layers.52.block_sparse_moe.experts.46.w1", "model.layers.52.block_sparse_moe.experts.47.w1", "model.layers.52.block_sparse_moe.experts.48.w1", "model.layers.52.block_sparse_moe.experts.49.w1", "model.layers.52.block_sparse_moe.experts.50.w1", "model.layers.52.block_sparse_moe.experts.51.w1", "model.layers.52.block_sparse_moe.experts.52.w1", "model.layers.52.block_sparse_moe.experts.53.w1", "model.layers.52.block_sparse_moe.experts.54.w1", "model.layers.52.block_sparse_moe.experts.55.w1", "model.layers.52.block_sparse_moe.experts.56.w1", "model.layers.52.block_sparse_moe.experts.57.w1", "model.layers.52.block_sparse_moe.experts.58.w1", "model.layers.52.block_sparse_moe.experts.59.w1", "model.layers.52.block_sparse_moe.experts.60.w1", "model.layers.52.block_sparse_moe.experts.61.w1", "model.layers.52.block_sparse_moe.experts.62.w1", "model.layers.52.block_sparse_moe.experts.63.w1", "model.layers.52.block_sparse_moe.experts.64.w1", "model.layers.52.block_sparse_moe.experts.65.w1", "model.layers.52.block_sparse_moe.experts.66.w1", "model.layers.52.block_sparse_moe.experts.67.w1", "model.layers.52.block_sparse_moe.experts.68.w1", "model.layers.52.block_sparse_moe.experts.69.w1", "model.layers.52.block_sparse_moe.experts.70.w1", "model.layers.52.block_sparse_moe.experts.71.w1", "model.layers.52.block_sparse_moe.experts.72.w1", "model.layers.52.block_sparse_moe.experts.73.w1", "model.layers.52.block_sparse_moe.experts.74.w1", "model.layers.52.block_sparse_moe.experts.75.w1", "model.layers.52.block_sparse_moe.experts.76.w1", "model.layers.52.block_sparse_moe.experts.77.w1", "model.layers.52.block_sparse_moe.experts.78.w1", "model.layers.52.block_sparse_moe.experts.79.w1", "model.layers.52.block_sparse_moe.experts.80.w1", "model.layers.52.block_sparse_moe.experts.81.w1", "model.layers.52.block_sparse_moe.experts.82.w1", "model.layers.52.block_sparse_moe.experts.83.w1", "model.layers.52.block_sparse_moe.experts.84.w1", "model.layers.52.block_sparse_moe.experts.85.w1", "model.layers.52.block_sparse_moe.experts.86.w1", "model.layers.52.block_sparse_moe.experts.87.w1", "model.layers.52.block_sparse_moe.experts.88.w1", "model.layers.52.block_sparse_moe.experts.89.w1", "model.layers.52.block_sparse_moe.experts.90.w1", "model.layers.52.block_sparse_moe.experts.91.w1", "model.layers.52.block_sparse_moe.experts.92.w1", "model.layers.52.block_sparse_moe.experts.93.w1", "model.layers.52.block_sparse_moe.experts.94.w1", "model.layers.52.block_sparse_moe.experts.95.w1", "model.layers.52.block_sparse_moe.experts.96.w1", "model.layers.52.block_sparse_moe.experts.97.w1", "model.layers.52.block_sparse_moe.experts.98.w1", "model.layers.52.block_sparse_moe.experts.99.w1", "model.layers.52.block_sparse_moe.experts.100.w1", "model.layers.52.block_sparse_moe.experts.101.w1", "model.layers.52.block_sparse_moe.experts.102.w1", "model.layers.52.block_sparse_moe.experts.103.w1", "model.layers.52.block_sparse_moe.experts.104.w1", "model.layers.52.block_sparse_moe.experts.105.w1", "model.layers.52.block_sparse_moe.experts.106.w1", "model.layers.52.block_sparse_moe.experts.107.w1", "model.layers.52.block_sparse_moe.experts.108.w1", "model.layers.52.block_sparse_moe.experts.109.w1", "model.layers.52.block_sparse_moe.experts.110.w1", "model.layers.52.block_sparse_moe.experts.111.w1", "model.layers.52.block_sparse_moe.experts.112.w1", "model.layers.52.block_sparse_moe.experts.113.w1", "model.layers.52.block_sparse_moe.experts.114.w1", "model.layers.52.block_sparse_moe.experts.115.w1", "model.layers.52.block_sparse_moe.experts.116.w1", "model.layers.52.block_sparse_moe.experts.117.w1", "model.layers.52.block_sparse_moe.experts.118.w1", "model.layers.52.block_sparse_moe.experts.119.w1", "model.layers.52.block_sparse_moe.experts.120.w1", "model.layers.52.block_sparse_moe.experts.121.w1", "model.layers.52.block_sparse_moe.experts.122.w1", "model.layers.52.block_sparse_moe.experts.123.w1", "model.layers.52.block_sparse_moe.experts.124.w1", "model.layers.52.block_sparse_moe.experts.125.w1", "model.layers.52.block_sparse_moe.experts.126.w1", "model.layers.52.block_sparse_moe.experts.127.w1", "model.layers.52.block_sparse_moe.experts.128.w1", "model.layers.52.block_sparse_moe.experts.129.w1", "model.layers.52.block_sparse_moe.experts.130.w1", "model.layers.52.block_sparse_moe.experts.131.w1", "model.layers.52.block_sparse_moe.experts.132.w1", "model.layers.52.block_sparse_moe.experts.133.w1", "model.layers.52.block_sparse_moe.experts.134.w1", "model.layers.52.block_sparse_moe.experts.135.w1", "model.layers.52.block_sparse_moe.experts.136.w1", "model.layers.52.block_sparse_moe.experts.137.w1", "model.layers.52.block_sparse_moe.experts.138.w1", "model.layers.52.block_sparse_moe.experts.139.w1", "model.layers.52.block_sparse_moe.experts.140.w1", "model.layers.52.block_sparse_moe.experts.141.w1", "model.layers.52.block_sparse_moe.experts.142.w1", "model.layers.52.block_sparse_moe.experts.143.w1", "model.layers.52.block_sparse_moe.experts.144.w1", "model.layers.52.block_sparse_moe.experts.145.w1", "model.layers.52.block_sparse_moe.experts.146.w1", "model.layers.52.block_sparse_moe.experts.147.w1", "model.layers.52.block_sparse_moe.experts.148.w1", "model.layers.52.block_sparse_moe.experts.149.w1", "model.layers.52.block_sparse_moe.experts.150.w1", "model.layers.52.block_sparse_moe.experts.151.w1", "model.layers.52.block_sparse_moe.experts.152.w1", "model.layers.52.block_sparse_moe.experts.153.w1", "model.layers.52.block_sparse_moe.experts.154.w1", "model.layers.52.block_sparse_moe.experts.155.w1", "model.layers.52.block_sparse_moe.experts.156.w1", "model.layers.52.block_sparse_moe.experts.157.w1", "model.layers.52.block_sparse_moe.experts.158.w1", "model.layers.52.block_sparse_moe.experts.159.w1", "model.layers.52.block_sparse_moe.experts.160.w1", "model.layers.52.block_sparse_moe.experts.161.w1", "model.layers.52.block_sparse_moe.experts.162.w1", "model.layers.52.block_sparse_moe.experts.163.w1", "model.layers.52.block_sparse_moe.experts.164.w1", "model.layers.52.block_sparse_moe.experts.165.w1", "model.layers.52.block_sparse_moe.experts.166.w1", "model.layers.52.block_sparse_moe.experts.167.w1", "model.layers.52.block_sparse_moe.experts.168.w1", "model.layers.52.block_sparse_moe.experts.169.w1", "model.layers.52.block_sparse_moe.experts.170.w1", "model.layers.52.block_sparse_moe.experts.171.w1", "model.layers.52.block_sparse_moe.experts.172.w1", "model.layers.52.block_sparse_moe.experts.173.w1", "model.layers.52.block_sparse_moe.experts.174.w1", "model.layers.52.block_sparse_moe.experts.175.w1", "model.layers.52.block_sparse_moe.experts.176.w1", "model.layers.52.block_sparse_moe.experts.177.w1", "model.layers.52.block_sparse_moe.experts.178.w1", "model.layers.52.block_sparse_moe.experts.179.w1", "model.layers.52.block_sparse_moe.experts.180.w1", "model.layers.52.block_sparse_moe.experts.181.w1", "model.layers.52.block_sparse_moe.experts.182.w1", "model.layers.52.block_sparse_moe.experts.183.w1", "model.layers.52.block_sparse_moe.experts.184.w1", "model.layers.52.block_sparse_moe.experts.185.w1", "model.layers.52.block_sparse_moe.experts.186.w1", "model.layers.52.block_sparse_moe.experts.187.w1", "model.layers.52.block_sparse_moe.experts.188.w1", "model.layers.52.block_sparse_moe.experts.189.w1", "model.layers.52.block_sparse_moe.experts.190.w1", "model.layers.52.block_sparse_moe.experts.191.w1", "model.layers.52.block_sparse_moe.experts.192.w1", "model.layers.52.block_sparse_moe.experts.193.w1", "model.layers.52.block_sparse_moe.experts.194.w1", "model.layers.52.block_sparse_moe.experts.195.w1", "model.layers.52.block_sparse_moe.experts.196.w1", "model.layers.52.block_sparse_moe.experts.197.w1", "model.layers.52.block_sparse_moe.experts.198.w1", "model.layers.52.block_sparse_moe.experts.199.w1", "model.layers.52.block_sparse_moe.experts.200.w1", "model.layers.52.block_sparse_moe.experts.201.w1", "model.layers.52.block_sparse_moe.experts.202.w1", "model.layers.52.block_sparse_moe.experts.203.w1", "model.layers.52.block_sparse_moe.experts.204.w1", "model.layers.52.block_sparse_moe.experts.205.w1", "model.layers.52.block_sparse_moe.experts.206.w1", "model.layers.52.block_sparse_moe.experts.207.w1", "model.layers.52.block_sparse_moe.experts.208.w1", "model.layers.52.block_sparse_moe.experts.209.w1", "model.layers.52.block_sparse_moe.experts.210.w1", "model.layers.52.block_sparse_moe.experts.211.w1", "model.layers.52.block_sparse_moe.experts.212.w1", "model.layers.52.block_sparse_moe.experts.213.w1", "model.layers.52.block_sparse_moe.experts.214.w1", "model.layers.52.block_sparse_moe.experts.215.w1", "model.layers.52.block_sparse_moe.experts.216.w1", "model.layers.52.block_sparse_moe.experts.217.w1", "model.layers.52.block_sparse_moe.experts.218.w1", "model.layers.52.block_sparse_moe.experts.219.w1", "model.layers.52.block_sparse_moe.experts.220.w1", "model.layers.52.block_sparse_moe.experts.221.w1", "model.layers.52.block_sparse_moe.experts.222.w1", "model.layers.52.block_sparse_moe.experts.223.w1", "model.layers.52.block_sparse_moe.experts.224.w1", "model.layers.52.block_sparse_moe.experts.225.w1", "model.layers.52.block_sparse_moe.experts.226.w1", "model.layers.52.block_sparse_moe.experts.227.w1", "model.layers.52.block_sparse_moe.experts.228.w1", "model.layers.52.block_sparse_moe.experts.229.w1", "model.layers.52.block_sparse_moe.experts.230.w1", "model.layers.52.block_sparse_moe.experts.231.w1", "model.layers.52.block_sparse_moe.experts.232.w1", "model.layers.52.block_sparse_moe.experts.233.w1", "model.layers.52.block_sparse_moe.experts.234.w1", "model.layers.52.block_sparse_moe.experts.235.w1", "model.layers.52.block_sparse_moe.experts.236.w1", "model.layers.52.block_sparse_moe.experts.237.w1", "model.layers.52.block_sparse_moe.experts.238.w1", "model.layers.52.block_sparse_moe.experts.239.w1", "model.layers.52.block_sparse_moe.experts.240.w1", "model.layers.52.block_sparse_moe.experts.241.w1", "model.layers.52.block_sparse_moe.experts.242.w1", "model.layers.52.block_sparse_moe.experts.243.w1", "model.layers.52.block_sparse_moe.experts.244.w1", "model.layers.52.block_sparse_moe.experts.245.w1", "model.layers.52.block_sparse_moe.experts.246.w1", "model.layers.52.block_sparse_moe.experts.247.w1", "model.layers.52.block_sparse_moe.experts.248.w1", "model.layers.52.block_sparse_moe.experts.249.w1", "model.layers.52.block_sparse_moe.experts.250.w1", "model.layers.52.block_sparse_moe.experts.251.w1", "model.layers.52.block_sparse_moe.experts.252.w1", "model.layers.52.block_sparse_moe.experts.253.w1", "model.layers.52.block_sparse_moe.experts.254.w1", "model.layers.52.block_sparse_moe.experts.255.w1", "model.layers.52.block_sparse_moe.experts.0.w3", "model.layers.52.block_sparse_moe.experts.1.w3", "model.layers.52.block_sparse_moe.experts.2.w3", "model.layers.52.block_sparse_moe.experts.3.w3", "model.layers.52.block_sparse_moe.experts.4.w3", "model.layers.52.block_sparse_moe.experts.5.w3", "model.layers.52.block_sparse_moe.experts.6.w3", "model.layers.52.block_sparse_moe.experts.7.w3", "model.layers.52.block_sparse_moe.experts.8.w3", "model.layers.52.block_sparse_moe.experts.9.w3", "model.layers.52.block_sparse_moe.experts.10.w3", "model.layers.52.block_sparse_moe.experts.11.w3", "model.layers.52.block_sparse_moe.experts.12.w3", "model.layers.52.block_sparse_moe.experts.13.w3", "model.layers.52.block_sparse_moe.experts.14.w3", "model.layers.52.block_sparse_moe.experts.15.w3", "model.layers.52.block_sparse_moe.experts.16.w3", "model.layers.52.block_sparse_moe.experts.17.w3", "model.layers.52.block_sparse_moe.experts.18.w3", "model.layers.52.block_sparse_moe.experts.19.w3", "model.layers.52.block_sparse_moe.experts.20.w3", "model.layers.52.block_sparse_moe.experts.21.w3", "model.layers.52.block_sparse_moe.experts.22.w3", "model.layers.52.block_sparse_moe.experts.23.w3", "model.layers.52.block_sparse_moe.experts.24.w3", "model.layers.52.block_sparse_moe.experts.25.w3", "model.layers.52.block_sparse_moe.experts.26.w3", "model.layers.52.block_sparse_moe.experts.27.w3", "model.layers.52.block_sparse_moe.experts.28.w3", "model.layers.52.block_sparse_moe.experts.29.w3", "model.layers.52.block_sparse_moe.experts.30.w3", "model.layers.52.block_sparse_moe.experts.31.w3", "model.layers.52.block_sparse_moe.experts.32.w3", "model.layers.52.block_sparse_moe.experts.33.w3", "model.layers.52.block_sparse_moe.experts.34.w3", "model.layers.52.block_sparse_moe.experts.35.w3", "model.layers.52.block_sparse_moe.experts.36.w3", "model.layers.52.block_sparse_moe.experts.37.w3", "model.layers.52.block_sparse_moe.experts.38.w3", "model.layers.52.block_sparse_moe.experts.39.w3", "model.layers.52.block_sparse_moe.experts.40.w3", "model.layers.52.block_sparse_moe.experts.41.w3", "model.layers.52.block_sparse_moe.experts.42.w3", "model.layers.52.block_sparse_moe.experts.43.w3", "model.layers.52.block_sparse_moe.experts.44.w3", "model.layers.52.block_sparse_moe.experts.45.w3", "model.layers.52.block_sparse_moe.experts.46.w3", "model.layers.52.block_sparse_moe.experts.47.w3", "model.layers.52.block_sparse_moe.experts.48.w3", "model.layers.52.block_sparse_moe.experts.49.w3", "model.layers.52.block_sparse_moe.experts.50.w3", "model.layers.52.block_sparse_moe.experts.51.w3", "model.layers.52.block_sparse_moe.experts.52.w3", "model.layers.52.block_sparse_moe.experts.53.w3", "model.layers.52.block_sparse_moe.experts.54.w3", "model.layers.52.block_sparse_moe.experts.55.w3", "model.layers.52.block_sparse_moe.experts.56.w3", "model.layers.52.block_sparse_moe.experts.57.w3", "model.layers.52.block_sparse_moe.experts.58.w3", "model.layers.52.block_sparse_moe.experts.59.w3", "model.layers.52.block_sparse_moe.experts.60.w3", "model.layers.52.block_sparse_moe.experts.61.w3", "model.layers.52.block_sparse_moe.experts.62.w3", "model.layers.52.block_sparse_moe.experts.63.w3", "model.layers.52.block_sparse_moe.experts.64.w3", "model.layers.52.block_sparse_moe.experts.65.w3", "model.layers.52.block_sparse_moe.experts.66.w3", "model.layers.52.block_sparse_moe.experts.67.w3", "model.layers.52.block_sparse_moe.experts.68.w3", "model.layers.52.block_sparse_moe.experts.69.w3", "model.layers.52.block_sparse_moe.experts.70.w3", "model.layers.52.block_sparse_moe.experts.71.w3", "model.layers.52.block_sparse_moe.experts.72.w3", "model.layers.52.block_sparse_moe.experts.73.w3", "model.layers.52.block_sparse_moe.experts.74.w3", "model.layers.52.block_sparse_moe.experts.75.w3", "model.layers.52.block_sparse_moe.experts.76.w3", "model.layers.52.block_sparse_moe.experts.77.w3", "model.layers.52.block_sparse_moe.experts.78.w3", "model.layers.52.block_sparse_moe.experts.79.w3", "model.layers.52.block_sparse_moe.experts.80.w3", "model.layers.52.block_sparse_moe.experts.81.w3", "model.layers.52.block_sparse_moe.experts.82.w3", "model.layers.52.block_sparse_moe.experts.83.w3", "model.layers.52.block_sparse_moe.experts.84.w3", "model.layers.52.block_sparse_moe.experts.85.w3", "model.layers.52.block_sparse_moe.experts.86.w3", "model.layers.52.block_sparse_moe.experts.87.w3", "model.layers.52.block_sparse_moe.experts.88.w3", "model.layers.52.block_sparse_moe.experts.89.w3", "model.layers.52.block_sparse_moe.experts.90.w3", "model.layers.52.block_sparse_moe.experts.91.w3", "model.layers.52.block_sparse_moe.experts.92.w3", "model.layers.52.block_sparse_moe.experts.93.w3", "model.layers.52.block_sparse_moe.experts.94.w3", "model.layers.52.block_sparse_moe.experts.95.w3", "model.layers.52.block_sparse_moe.experts.96.w3", "model.layers.52.block_sparse_moe.experts.97.w3", "model.layers.52.block_sparse_moe.experts.98.w3", "model.layers.52.block_sparse_moe.experts.99.w3", "model.layers.52.block_sparse_moe.experts.100.w3", "model.layers.52.block_sparse_moe.experts.101.w3", "model.layers.52.block_sparse_moe.experts.102.w3", "model.layers.52.block_sparse_moe.experts.103.w3", "model.layers.52.block_sparse_moe.experts.104.w3", "model.layers.52.block_sparse_moe.experts.105.w3", "model.layers.52.block_sparse_moe.experts.106.w3", "model.layers.52.block_sparse_moe.experts.107.w3", "model.layers.52.block_sparse_moe.experts.108.w3", "model.layers.52.block_sparse_moe.experts.109.w3", "model.layers.52.block_sparse_moe.experts.110.w3", "model.layers.52.block_sparse_moe.experts.111.w3", "model.layers.52.block_sparse_moe.experts.112.w3", "model.layers.52.block_sparse_moe.experts.113.w3", "model.layers.52.block_sparse_moe.experts.114.w3", "model.layers.52.block_sparse_moe.experts.115.w3", "model.layers.52.block_sparse_moe.experts.116.w3", "model.layers.52.block_sparse_moe.experts.117.w3", "model.layers.52.block_sparse_moe.experts.118.w3", "model.layers.52.block_sparse_moe.experts.119.w3", "model.layers.52.block_sparse_moe.experts.120.w3", "model.layers.52.block_sparse_moe.experts.121.w3", "model.layers.52.block_sparse_moe.experts.122.w3", "model.layers.52.block_sparse_moe.experts.123.w3", "model.layers.52.block_sparse_moe.experts.124.w3", "model.layers.52.block_sparse_moe.experts.125.w3", "model.layers.52.block_sparse_moe.experts.126.w3", "model.layers.52.block_sparse_moe.experts.127.w3", "model.layers.52.block_sparse_moe.experts.128.w3", "model.layers.52.block_sparse_moe.experts.129.w3", "model.layers.52.block_sparse_moe.experts.130.w3", "model.layers.52.block_sparse_moe.experts.131.w3", "model.layers.52.block_sparse_moe.experts.132.w3", "model.layers.52.block_sparse_moe.experts.133.w3", "model.layers.52.block_sparse_moe.experts.134.w3", "model.layers.52.block_sparse_moe.experts.135.w3", "model.layers.52.block_sparse_moe.experts.136.w3", "model.layers.52.block_sparse_moe.experts.137.w3", "model.layers.52.block_sparse_moe.experts.138.w3", "model.layers.52.block_sparse_moe.experts.139.w3", "model.layers.52.block_sparse_moe.experts.140.w3", "model.layers.52.block_sparse_moe.experts.141.w3", "model.layers.52.block_sparse_moe.experts.142.w3", "model.layers.52.block_sparse_moe.experts.143.w3", "model.layers.52.block_sparse_moe.experts.144.w3", "model.layers.52.block_sparse_moe.experts.145.w3", "model.layers.52.block_sparse_moe.experts.146.w3", "model.layers.52.block_sparse_moe.experts.147.w3", "model.layers.52.block_sparse_moe.experts.148.w3", "model.layers.52.block_sparse_moe.experts.149.w3", "model.layers.52.block_sparse_moe.experts.150.w3", "model.layers.52.block_sparse_moe.experts.151.w3", "model.layers.52.block_sparse_moe.experts.152.w3", "model.layers.52.block_sparse_moe.experts.153.w3", "model.layers.52.block_sparse_moe.experts.154.w3", "model.layers.52.block_sparse_moe.experts.155.w3", "model.layers.52.block_sparse_moe.experts.156.w3", "model.layers.52.block_sparse_moe.experts.157.w3", "model.layers.52.block_sparse_moe.experts.158.w3", "model.layers.52.block_sparse_moe.experts.159.w3", "model.layers.52.block_sparse_moe.experts.160.w3", "model.layers.52.block_sparse_moe.experts.161.w3", "model.layers.52.block_sparse_moe.experts.162.w3", "model.layers.52.block_sparse_moe.experts.163.w3", "model.layers.52.block_sparse_moe.experts.164.w3", "model.layers.52.block_sparse_moe.experts.165.w3", "model.layers.52.block_sparse_moe.experts.166.w3", "model.layers.52.block_sparse_moe.experts.167.w3", "model.layers.52.block_sparse_moe.experts.168.w3", "model.layers.52.block_sparse_moe.experts.169.w3", "model.layers.52.block_sparse_moe.experts.170.w3", "model.layers.52.block_sparse_moe.experts.171.w3", "model.layers.52.block_sparse_moe.experts.172.w3", "model.layers.52.block_sparse_moe.experts.173.w3", "model.layers.52.block_sparse_moe.experts.174.w3", "model.layers.52.block_sparse_moe.experts.175.w3", "model.layers.52.block_sparse_moe.experts.176.w3", "model.layers.52.block_sparse_moe.experts.177.w3", "model.layers.52.block_sparse_moe.experts.178.w3", "model.layers.52.block_sparse_moe.experts.179.w3", "model.layers.52.block_sparse_moe.experts.180.w3", "model.layers.52.block_sparse_moe.experts.181.w3", "model.layers.52.block_sparse_moe.experts.182.w3", "model.layers.52.block_sparse_moe.experts.183.w3", "model.layers.52.block_sparse_moe.experts.184.w3", "model.layers.52.block_sparse_moe.experts.185.w3", "model.layers.52.block_sparse_moe.experts.186.w3", "model.layers.52.block_sparse_moe.experts.187.w3", "model.layers.52.block_sparse_moe.experts.188.w3", "model.layers.52.block_sparse_moe.experts.189.w3", "model.layers.52.block_sparse_moe.experts.190.w3", "model.layers.52.block_sparse_moe.experts.191.w3", "model.layers.52.block_sparse_moe.experts.192.w3", "model.layers.52.block_sparse_moe.experts.193.w3", "model.layers.52.block_sparse_moe.experts.194.w3", "model.layers.52.block_sparse_moe.experts.195.w3", "model.layers.52.block_sparse_moe.experts.196.w3", "model.layers.52.block_sparse_moe.experts.197.w3", "model.layers.52.block_sparse_moe.experts.198.w3", "model.layers.52.block_sparse_moe.experts.199.w3", "model.layers.52.block_sparse_moe.experts.200.w3", "model.layers.52.block_sparse_moe.experts.201.w3", "model.layers.52.block_sparse_moe.experts.202.w3", "model.layers.52.block_sparse_moe.experts.203.w3", "model.layers.52.block_sparse_moe.experts.204.w3", "model.layers.52.block_sparse_moe.experts.205.w3", "model.layers.52.block_sparse_moe.experts.206.w3", "model.layers.52.block_sparse_moe.experts.207.w3", "model.layers.52.block_sparse_moe.experts.208.w3", "model.layers.52.block_sparse_moe.experts.209.w3", "model.layers.52.block_sparse_moe.experts.210.w3", "model.layers.52.block_sparse_moe.experts.211.w3", "model.layers.52.block_sparse_moe.experts.212.w3", "model.layers.52.block_sparse_moe.experts.213.w3", "model.layers.52.block_sparse_moe.experts.214.w3", "model.layers.52.block_sparse_moe.experts.215.w3", "model.layers.52.block_sparse_moe.experts.216.w3", "model.layers.52.block_sparse_moe.experts.217.w3", "model.layers.52.block_sparse_moe.experts.218.w3", "model.layers.52.block_sparse_moe.experts.219.w3", "model.layers.52.block_sparse_moe.experts.220.w3", "model.layers.52.block_sparse_moe.experts.221.w3", "model.layers.52.block_sparse_moe.experts.222.w3", "model.layers.52.block_sparse_moe.experts.223.w3", "model.layers.52.block_sparse_moe.experts.224.w3", "model.layers.52.block_sparse_moe.experts.225.w3", "model.layers.52.block_sparse_moe.experts.226.w3", "model.layers.52.block_sparse_moe.experts.227.w3", "model.layers.52.block_sparse_moe.experts.228.w3", "model.layers.52.block_sparse_moe.experts.229.w3", "model.layers.52.block_sparse_moe.experts.230.w3", "model.layers.52.block_sparse_moe.experts.231.w3", "model.layers.52.block_sparse_moe.experts.232.w3", "model.layers.52.block_sparse_moe.experts.233.w3", "model.layers.52.block_sparse_moe.experts.234.w3", "model.layers.52.block_sparse_moe.experts.235.w3", "model.layers.52.block_sparse_moe.experts.236.w3", "model.layers.52.block_sparse_moe.experts.237.w3", "model.layers.52.block_sparse_moe.experts.238.w3", "model.layers.52.block_sparse_moe.experts.239.w3", "model.layers.52.block_sparse_moe.experts.240.w3", "model.layers.52.block_sparse_moe.experts.241.w3", "model.layers.52.block_sparse_moe.experts.242.w3", "model.layers.52.block_sparse_moe.experts.243.w3", "model.layers.52.block_sparse_moe.experts.244.w3", "model.layers.52.block_sparse_moe.experts.245.w3", "model.layers.52.block_sparse_moe.experts.246.w3", "model.layers.52.block_sparse_moe.experts.247.w3", "model.layers.52.block_sparse_moe.experts.248.w3", "model.layers.52.block_sparse_moe.experts.249.w3", "model.layers.52.block_sparse_moe.experts.250.w3", "model.layers.52.block_sparse_moe.experts.251.w3", "model.layers.52.block_sparse_moe.experts.252.w3", "model.layers.52.block_sparse_moe.experts.253.w3", "model.layers.52.block_sparse_moe.experts.254.w3", "model.layers.52.block_sparse_moe.experts.255.w3", "model.layers.52.block_sparse_moe.experts.0.w2", "model.layers.52.block_sparse_moe.experts.1.w2", "model.layers.52.block_sparse_moe.experts.2.w2", "model.layers.52.block_sparse_moe.experts.3.w2", "model.layers.52.block_sparse_moe.experts.4.w2", "model.layers.52.block_sparse_moe.experts.5.w2", "model.layers.52.block_sparse_moe.experts.6.w2", "model.layers.52.block_sparse_moe.experts.7.w2", "model.layers.52.block_sparse_moe.experts.8.w2", "model.layers.52.block_sparse_moe.experts.9.w2", "model.layers.52.block_sparse_moe.experts.10.w2", "model.layers.52.block_sparse_moe.experts.11.w2", "model.layers.52.block_sparse_moe.experts.12.w2", "model.layers.52.block_sparse_moe.experts.13.w2", "model.layers.52.block_sparse_moe.experts.14.w2", "model.layers.52.block_sparse_moe.experts.15.w2", "model.layers.52.block_sparse_moe.experts.16.w2", "model.layers.52.block_sparse_moe.experts.17.w2", "model.layers.52.block_sparse_moe.experts.18.w2", "model.layers.52.block_sparse_moe.experts.19.w2", "model.layers.52.block_sparse_moe.experts.20.w2", "model.layers.52.block_sparse_moe.experts.21.w2", "model.layers.52.block_sparse_moe.experts.22.w2", "model.layers.52.block_sparse_moe.experts.23.w2", "model.layers.52.block_sparse_moe.experts.24.w2", "model.layers.52.block_sparse_moe.experts.25.w2", "model.layers.52.block_sparse_moe.experts.26.w2", "model.layers.52.block_sparse_moe.experts.27.w2", "model.layers.52.block_sparse_moe.experts.28.w2", "model.layers.52.block_sparse_moe.experts.29.w2", "model.layers.52.block_sparse_moe.experts.30.w2", "model.layers.52.block_sparse_moe.experts.31.w2", "model.layers.52.block_sparse_moe.experts.32.w2", "model.layers.52.block_sparse_moe.experts.33.w2", "model.layers.52.block_sparse_moe.experts.34.w2", "model.layers.52.block_sparse_moe.experts.35.w2", "model.layers.52.block_sparse_moe.experts.36.w2", "model.layers.52.block_sparse_moe.experts.37.w2", "model.layers.52.block_sparse_moe.experts.38.w2", "model.layers.52.block_sparse_moe.experts.39.w2", "model.layers.52.block_sparse_moe.experts.40.w2", "model.layers.52.block_sparse_moe.experts.41.w2", "model.layers.52.block_sparse_moe.experts.42.w2", "model.layers.52.block_sparse_moe.experts.43.w2", "model.layers.52.block_sparse_moe.experts.44.w2", "model.layers.52.block_sparse_moe.experts.45.w2", "model.layers.52.block_sparse_moe.experts.46.w2", "model.layers.52.block_sparse_moe.experts.47.w2", "model.layers.52.block_sparse_moe.experts.48.w2", "model.layers.52.block_sparse_moe.experts.49.w2", "model.layers.52.block_sparse_moe.experts.50.w2", "model.layers.52.block_sparse_moe.experts.51.w2", "model.layers.52.block_sparse_moe.experts.52.w2", "model.layers.52.block_sparse_moe.experts.53.w2", "model.layers.52.block_sparse_moe.experts.54.w2", "model.layers.52.block_sparse_moe.experts.55.w2", "model.layers.52.block_sparse_moe.experts.56.w2", "model.layers.52.block_sparse_moe.experts.57.w2", "model.layers.52.block_sparse_moe.experts.58.w2", "model.layers.52.block_sparse_moe.experts.59.w2", "model.layers.52.block_sparse_moe.experts.60.w2", "model.layers.52.block_sparse_moe.experts.61.w2", "model.layers.52.block_sparse_moe.experts.62.w2", "model.layers.52.block_sparse_moe.experts.63.w2", "model.layers.52.block_sparse_moe.experts.64.w2", "model.layers.52.block_sparse_moe.experts.65.w2", "model.layers.52.block_sparse_moe.experts.66.w2", "model.layers.52.block_sparse_moe.experts.67.w2", "model.layers.52.block_sparse_moe.experts.68.w2", "model.layers.52.block_sparse_moe.experts.69.w2", "model.layers.52.block_sparse_moe.experts.70.w2", "model.layers.52.block_sparse_moe.experts.71.w2", "model.layers.52.block_sparse_moe.experts.72.w2", "model.layers.52.block_sparse_moe.experts.73.w2", "model.layers.52.block_sparse_moe.experts.74.w2", "model.layers.52.block_sparse_moe.experts.75.w2", "model.layers.52.block_sparse_moe.experts.76.w2", "model.layers.52.block_sparse_moe.experts.77.w2", "model.layers.52.block_sparse_moe.experts.78.w2", "model.layers.52.block_sparse_moe.experts.79.w2", "model.layers.52.block_sparse_moe.experts.80.w2", "model.layers.52.block_sparse_moe.experts.81.w2", "model.layers.52.block_sparse_moe.experts.82.w2", "model.layers.52.block_sparse_moe.experts.83.w2", "model.layers.52.block_sparse_moe.experts.84.w2", "model.layers.52.block_sparse_moe.experts.85.w2", "model.layers.52.block_sparse_moe.experts.86.w2", "model.layers.52.block_sparse_moe.experts.87.w2", "model.layers.52.block_sparse_moe.experts.88.w2", "model.layers.52.block_sparse_moe.experts.89.w2", "model.layers.52.block_sparse_moe.experts.90.w2", "model.layers.52.block_sparse_moe.experts.91.w2", "model.layers.52.block_sparse_moe.experts.92.w2", "model.layers.52.block_sparse_moe.experts.93.w2", "model.layers.52.block_sparse_moe.experts.94.w2", "model.layers.52.block_sparse_moe.experts.95.w2", "model.layers.52.block_sparse_moe.experts.96.w2", "model.layers.52.block_sparse_moe.experts.97.w2", "model.layers.52.block_sparse_moe.experts.98.w2", "model.layers.52.block_sparse_moe.experts.99.w2", "model.layers.52.block_sparse_moe.experts.100.w2", "model.layers.52.block_sparse_moe.experts.101.w2", "model.layers.52.block_sparse_moe.experts.102.w2", "model.layers.52.block_sparse_moe.experts.103.w2", "model.layers.52.block_sparse_moe.experts.104.w2", "model.layers.52.block_sparse_moe.experts.105.w2", "model.layers.52.block_sparse_moe.experts.106.w2", "model.layers.52.block_sparse_moe.experts.107.w2", "model.layers.52.block_sparse_moe.experts.108.w2", "model.layers.52.block_sparse_moe.experts.109.w2", "model.layers.52.block_sparse_moe.experts.110.w2", "model.layers.52.block_sparse_moe.experts.111.w2", "model.layers.52.block_sparse_moe.experts.112.w2", "model.layers.52.block_sparse_moe.experts.113.w2", "model.layers.52.block_sparse_moe.experts.114.w2", "model.layers.52.block_sparse_moe.experts.115.w2", "model.layers.52.block_sparse_moe.experts.116.w2", "model.layers.52.block_sparse_moe.experts.117.w2", "model.layers.52.block_sparse_moe.experts.118.w2", "model.layers.52.block_sparse_moe.experts.119.w2", "model.layers.52.block_sparse_moe.experts.120.w2", "model.layers.52.block_sparse_moe.experts.121.w2", "model.layers.52.block_sparse_moe.experts.122.w2", "model.layers.52.block_sparse_moe.experts.123.w2", "model.layers.52.block_sparse_moe.experts.124.w2", "model.layers.52.block_sparse_moe.experts.125.w2", "model.layers.52.block_sparse_moe.experts.126.w2", "model.layers.52.block_sparse_moe.experts.127.w2", "model.layers.52.block_sparse_moe.experts.128.w2", "model.layers.52.block_sparse_moe.experts.129.w2", "model.layers.52.block_sparse_moe.experts.130.w2", "model.layers.52.block_sparse_moe.experts.131.w2", "model.layers.52.block_sparse_moe.experts.132.w2", "model.layers.52.block_sparse_moe.experts.133.w2", "model.layers.52.block_sparse_moe.experts.134.w2", "model.layers.52.block_sparse_moe.experts.135.w2", "model.layers.52.block_sparse_moe.experts.136.w2", "model.layers.52.block_sparse_moe.experts.137.w2", "model.layers.52.block_sparse_moe.experts.138.w2", "model.layers.52.block_sparse_moe.experts.139.w2", "model.layers.52.block_sparse_moe.experts.140.w2", "model.layers.52.block_sparse_moe.experts.141.w2", "model.layers.52.block_sparse_moe.experts.142.w2", "model.layers.52.block_sparse_moe.experts.143.w2", "model.layers.52.block_sparse_moe.experts.144.w2", "model.layers.52.block_sparse_moe.experts.145.w2", "model.layers.52.block_sparse_moe.experts.146.w2", "model.layers.52.block_sparse_moe.experts.147.w2", "model.layers.52.block_sparse_moe.experts.148.w2", "model.layers.52.block_sparse_moe.experts.149.w2", "model.layers.52.block_sparse_moe.experts.150.w2", "model.layers.52.block_sparse_moe.experts.151.w2", "model.layers.52.block_sparse_moe.experts.152.w2", "model.layers.52.block_sparse_moe.experts.153.w2", "model.layers.52.block_sparse_moe.experts.154.w2", "model.layers.52.block_sparse_moe.experts.155.w2", "model.layers.52.block_sparse_moe.experts.156.w2", "model.layers.52.block_sparse_moe.experts.157.w2", "model.layers.52.block_sparse_moe.experts.158.w2", "model.layers.52.block_sparse_moe.experts.159.w2", "model.layers.52.block_sparse_moe.experts.160.w2", "model.layers.52.block_sparse_moe.experts.161.w2", "model.layers.52.block_sparse_moe.experts.162.w2", "model.layers.52.block_sparse_moe.experts.163.w2", "model.layers.52.block_sparse_moe.experts.164.w2", "model.layers.52.block_sparse_moe.experts.165.w2", "model.layers.52.block_sparse_moe.experts.166.w2", "model.layers.52.block_sparse_moe.experts.167.w2", "model.layers.52.block_sparse_moe.experts.168.w2", "model.layers.52.block_sparse_moe.experts.169.w2", "model.layers.52.block_sparse_moe.experts.170.w2", "model.layers.52.block_sparse_moe.experts.171.w2", "model.layers.52.block_sparse_moe.experts.172.w2", "model.layers.52.block_sparse_moe.experts.173.w2", "model.layers.52.block_sparse_moe.experts.174.w2", "model.layers.52.block_sparse_moe.experts.175.w2", "model.layers.52.block_sparse_moe.experts.176.w2", "model.layers.52.block_sparse_moe.experts.177.w2", "model.layers.52.block_sparse_moe.experts.178.w2", "model.layers.52.block_sparse_moe.experts.179.w2", "model.layers.52.block_sparse_moe.experts.180.w2", "model.layers.52.block_sparse_moe.experts.181.w2", "model.layers.52.block_sparse_moe.experts.182.w2", "model.layers.52.block_sparse_moe.experts.183.w2", "model.layers.52.block_sparse_moe.experts.184.w2", "model.layers.52.block_sparse_moe.experts.185.w2", "model.layers.52.block_sparse_moe.experts.186.w2", "model.layers.52.block_sparse_moe.experts.187.w2", "model.layers.52.block_sparse_moe.experts.188.w2", "model.layers.52.block_sparse_moe.experts.189.w2", "model.layers.52.block_sparse_moe.experts.190.w2", "model.layers.52.block_sparse_moe.experts.191.w2", "model.layers.52.block_sparse_moe.experts.192.w2", "model.layers.52.block_sparse_moe.experts.193.w2", "model.layers.52.block_sparse_moe.experts.194.w2", "model.layers.52.block_sparse_moe.experts.195.w2", "model.layers.52.block_sparse_moe.experts.196.w2", "model.layers.52.block_sparse_moe.experts.197.w2", "model.layers.52.block_sparse_moe.experts.198.w2", "model.layers.52.block_sparse_moe.experts.199.w2", "model.layers.52.block_sparse_moe.experts.200.w2", "model.layers.52.block_sparse_moe.experts.201.w2", "model.layers.52.block_sparse_moe.experts.202.w2", "model.layers.52.block_sparse_moe.experts.203.w2", "model.layers.52.block_sparse_moe.experts.204.w2", "model.layers.52.block_sparse_moe.experts.205.w2", "model.layers.52.block_sparse_moe.experts.206.w2", "model.layers.52.block_sparse_moe.experts.207.w2", "model.layers.52.block_sparse_moe.experts.208.w2", "model.layers.52.block_sparse_moe.experts.209.w2", "model.layers.52.block_sparse_moe.experts.210.w2", "model.layers.52.block_sparse_moe.experts.211.w2", "model.layers.52.block_sparse_moe.experts.212.w2", "model.layers.52.block_sparse_moe.experts.213.w2", "model.layers.52.block_sparse_moe.experts.214.w2", "model.layers.52.block_sparse_moe.experts.215.w2", "model.layers.52.block_sparse_moe.experts.216.w2", "model.layers.52.block_sparse_moe.experts.217.w2", "model.layers.52.block_sparse_moe.experts.218.w2", "model.layers.52.block_sparse_moe.experts.219.w2", "model.layers.52.block_sparse_moe.experts.220.w2", "model.layers.52.block_sparse_moe.experts.221.w2", "model.layers.52.block_sparse_moe.experts.222.w2", "model.layers.52.block_sparse_moe.experts.223.w2", "model.layers.52.block_sparse_moe.experts.224.w2", "model.layers.52.block_sparse_moe.experts.225.w2", "model.layers.52.block_sparse_moe.experts.226.w2", "model.layers.52.block_sparse_moe.experts.227.w2", "model.layers.52.block_sparse_moe.experts.228.w2", "model.layers.52.block_sparse_moe.experts.229.w2", "model.layers.52.block_sparse_moe.experts.230.w2", "model.layers.52.block_sparse_moe.experts.231.w2", "model.layers.52.block_sparse_moe.experts.232.w2", "model.layers.52.block_sparse_moe.experts.233.w2", "model.layers.52.block_sparse_moe.experts.234.w2", "model.layers.52.block_sparse_moe.experts.235.w2", "model.layers.52.block_sparse_moe.experts.236.w2", "model.layers.52.block_sparse_moe.experts.237.w2", "model.layers.52.block_sparse_moe.experts.238.w2", "model.layers.52.block_sparse_moe.experts.239.w2", "model.layers.52.block_sparse_moe.experts.240.w2", "model.layers.52.block_sparse_moe.experts.241.w2", "model.layers.52.block_sparse_moe.experts.242.w2", "model.layers.52.block_sparse_moe.experts.243.w2", "model.layers.52.block_sparse_moe.experts.244.w2", "model.layers.52.block_sparse_moe.experts.245.w2", "model.layers.52.block_sparse_moe.experts.246.w2", "model.layers.52.block_sparse_moe.experts.247.w2", "model.layers.52.block_sparse_moe.experts.248.w2", "model.layers.52.block_sparse_moe.experts.249.w2", "model.layers.52.block_sparse_moe.experts.250.w2", "model.layers.52.block_sparse_moe.experts.251.w2", "model.layers.52.block_sparse_moe.experts.252.w2", "model.layers.52.block_sparse_moe.experts.253.w2", "model.layers.52.block_sparse_moe.experts.254.w2", "model.layers.52.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006549403071403614, "dbits": 3623878656 } ] }, { "idx": 106, "layers": [ "model.layers.53.self_attn.q_proj", "model.layers.53.self_attn.k_proj", "model.layers.53.self_attn.v_proj", "model.layers.53.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0005904521793126949, "dbits": 44040192 } ] }, { "idx": 107, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w1", "model.layers.53.block_sparse_moe.experts.1.w1", "model.layers.53.block_sparse_moe.experts.2.w1", "model.layers.53.block_sparse_moe.experts.3.w1", "model.layers.53.block_sparse_moe.experts.4.w1", "model.layers.53.block_sparse_moe.experts.5.w1", "model.layers.53.block_sparse_moe.experts.6.w1", "model.layers.53.block_sparse_moe.experts.7.w1", "model.layers.53.block_sparse_moe.experts.8.w1", "model.layers.53.block_sparse_moe.experts.9.w1", "model.layers.53.block_sparse_moe.experts.10.w1", "model.layers.53.block_sparse_moe.experts.11.w1", "model.layers.53.block_sparse_moe.experts.12.w1", "model.layers.53.block_sparse_moe.experts.13.w1", "model.layers.53.block_sparse_moe.experts.14.w1", "model.layers.53.block_sparse_moe.experts.15.w1", "model.layers.53.block_sparse_moe.experts.16.w1", "model.layers.53.block_sparse_moe.experts.17.w1", "model.layers.53.block_sparse_moe.experts.18.w1", "model.layers.53.block_sparse_moe.experts.19.w1", "model.layers.53.block_sparse_moe.experts.20.w1", "model.layers.53.block_sparse_moe.experts.21.w1", "model.layers.53.block_sparse_moe.experts.22.w1", "model.layers.53.block_sparse_moe.experts.23.w1", "model.layers.53.block_sparse_moe.experts.24.w1", "model.layers.53.block_sparse_moe.experts.25.w1", "model.layers.53.block_sparse_moe.experts.26.w1", "model.layers.53.block_sparse_moe.experts.27.w1", "model.layers.53.block_sparse_moe.experts.28.w1", "model.layers.53.block_sparse_moe.experts.29.w1", "model.layers.53.block_sparse_moe.experts.30.w1", "model.layers.53.block_sparse_moe.experts.31.w1", "model.layers.53.block_sparse_moe.experts.32.w1", "model.layers.53.block_sparse_moe.experts.33.w1", "model.layers.53.block_sparse_moe.experts.34.w1", "model.layers.53.block_sparse_moe.experts.35.w1", "model.layers.53.block_sparse_moe.experts.36.w1", "model.layers.53.block_sparse_moe.experts.37.w1", "model.layers.53.block_sparse_moe.experts.38.w1", "model.layers.53.block_sparse_moe.experts.39.w1", "model.layers.53.block_sparse_moe.experts.40.w1", "model.layers.53.block_sparse_moe.experts.41.w1", "model.layers.53.block_sparse_moe.experts.42.w1", "model.layers.53.block_sparse_moe.experts.43.w1", "model.layers.53.block_sparse_moe.experts.44.w1", "model.layers.53.block_sparse_moe.experts.45.w1", "model.layers.53.block_sparse_moe.experts.46.w1", "model.layers.53.block_sparse_moe.experts.47.w1", "model.layers.53.block_sparse_moe.experts.48.w1", "model.layers.53.block_sparse_moe.experts.49.w1", "model.layers.53.block_sparse_moe.experts.50.w1", "model.layers.53.block_sparse_moe.experts.51.w1", "model.layers.53.block_sparse_moe.experts.52.w1", "model.layers.53.block_sparse_moe.experts.53.w1", "model.layers.53.block_sparse_moe.experts.54.w1", "model.layers.53.block_sparse_moe.experts.55.w1", "model.layers.53.block_sparse_moe.experts.56.w1", "model.layers.53.block_sparse_moe.experts.57.w1", "model.layers.53.block_sparse_moe.experts.58.w1", "model.layers.53.block_sparse_moe.experts.59.w1", "model.layers.53.block_sparse_moe.experts.60.w1", "model.layers.53.block_sparse_moe.experts.61.w1", "model.layers.53.block_sparse_moe.experts.62.w1", "model.layers.53.block_sparse_moe.experts.63.w1", "model.layers.53.block_sparse_moe.experts.64.w1", "model.layers.53.block_sparse_moe.experts.65.w1", "model.layers.53.block_sparse_moe.experts.66.w1", "model.layers.53.block_sparse_moe.experts.67.w1", "model.layers.53.block_sparse_moe.experts.68.w1", "model.layers.53.block_sparse_moe.experts.69.w1", "model.layers.53.block_sparse_moe.experts.70.w1", "model.layers.53.block_sparse_moe.experts.71.w1", "model.layers.53.block_sparse_moe.experts.72.w1", "model.layers.53.block_sparse_moe.experts.73.w1", "model.layers.53.block_sparse_moe.experts.74.w1", "model.layers.53.block_sparse_moe.experts.75.w1", "model.layers.53.block_sparse_moe.experts.76.w1", "model.layers.53.block_sparse_moe.experts.77.w1", "model.layers.53.block_sparse_moe.experts.78.w1", "model.layers.53.block_sparse_moe.experts.79.w1", "model.layers.53.block_sparse_moe.experts.80.w1", "model.layers.53.block_sparse_moe.experts.81.w1", "model.layers.53.block_sparse_moe.experts.82.w1", "model.layers.53.block_sparse_moe.experts.83.w1", "model.layers.53.block_sparse_moe.experts.84.w1", "model.layers.53.block_sparse_moe.experts.85.w1", "model.layers.53.block_sparse_moe.experts.86.w1", "model.layers.53.block_sparse_moe.experts.87.w1", "model.layers.53.block_sparse_moe.experts.88.w1", "model.layers.53.block_sparse_moe.experts.89.w1", "model.layers.53.block_sparse_moe.experts.90.w1", "model.layers.53.block_sparse_moe.experts.91.w1", "model.layers.53.block_sparse_moe.experts.92.w1", "model.layers.53.block_sparse_moe.experts.93.w1", "model.layers.53.block_sparse_moe.experts.94.w1", "model.layers.53.block_sparse_moe.experts.95.w1", "model.layers.53.block_sparse_moe.experts.96.w1", "model.layers.53.block_sparse_moe.experts.97.w1", "model.layers.53.block_sparse_moe.experts.98.w1", "model.layers.53.block_sparse_moe.experts.99.w1", "model.layers.53.block_sparse_moe.experts.100.w1", "model.layers.53.block_sparse_moe.experts.101.w1", "model.layers.53.block_sparse_moe.experts.102.w1", "model.layers.53.block_sparse_moe.experts.103.w1", "model.layers.53.block_sparse_moe.experts.104.w1", "model.layers.53.block_sparse_moe.experts.105.w1", "model.layers.53.block_sparse_moe.experts.106.w1", "model.layers.53.block_sparse_moe.experts.107.w1", "model.layers.53.block_sparse_moe.experts.108.w1", "model.layers.53.block_sparse_moe.experts.109.w1", "model.layers.53.block_sparse_moe.experts.110.w1", "model.layers.53.block_sparse_moe.experts.111.w1", "model.layers.53.block_sparse_moe.experts.112.w1", "model.layers.53.block_sparse_moe.experts.113.w1", "model.layers.53.block_sparse_moe.experts.114.w1", "model.layers.53.block_sparse_moe.experts.115.w1", "model.layers.53.block_sparse_moe.experts.116.w1", "model.layers.53.block_sparse_moe.experts.117.w1", "model.layers.53.block_sparse_moe.experts.118.w1", "model.layers.53.block_sparse_moe.experts.119.w1", "model.layers.53.block_sparse_moe.experts.120.w1", "model.layers.53.block_sparse_moe.experts.121.w1", "model.layers.53.block_sparse_moe.experts.122.w1", "model.layers.53.block_sparse_moe.experts.123.w1", "model.layers.53.block_sparse_moe.experts.124.w1", "model.layers.53.block_sparse_moe.experts.125.w1", "model.layers.53.block_sparse_moe.experts.126.w1", "model.layers.53.block_sparse_moe.experts.127.w1", "model.layers.53.block_sparse_moe.experts.128.w1", "model.layers.53.block_sparse_moe.experts.129.w1", "model.layers.53.block_sparse_moe.experts.130.w1", "model.layers.53.block_sparse_moe.experts.131.w1", "model.layers.53.block_sparse_moe.experts.132.w1", "model.layers.53.block_sparse_moe.experts.133.w1", "model.layers.53.block_sparse_moe.experts.134.w1", "model.layers.53.block_sparse_moe.experts.135.w1", "model.layers.53.block_sparse_moe.experts.136.w1", "model.layers.53.block_sparse_moe.experts.137.w1", "model.layers.53.block_sparse_moe.experts.138.w1", "model.layers.53.block_sparse_moe.experts.139.w1", "model.layers.53.block_sparse_moe.experts.140.w1", "model.layers.53.block_sparse_moe.experts.141.w1", "model.layers.53.block_sparse_moe.experts.142.w1", "model.layers.53.block_sparse_moe.experts.143.w1", "model.layers.53.block_sparse_moe.experts.144.w1", "model.layers.53.block_sparse_moe.experts.145.w1", "model.layers.53.block_sparse_moe.experts.146.w1", "model.layers.53.block_sparse_moe.experts.147.w1", "model.layers.53.block_sparse_moe.experts.148.w1", "model.layers.53.block_sparse_moe.experts.149.w1", "model.layers.53.block_sparse_moe.experts.150.w1", "model.layers.53.block_sparse_moe.experts.151.w1", "model.layers.53.block_sparse_moe.experts.152.w1", "model.layers.53.block_sparse_moe.experts.153.w1", "model.layers.53.block_sparse_moe.experts.154.w1", "model.layers.53.block_sparse_moe.experts.155.w1", "model.layers.53.block_sparse_moe.experts.156.w1", "model.layers.53.block_sparse_moe.experts.157.w1", "model.layers.53.block_sparse_moe.experts.158.w1", "model.layers.53.block_sparse_moe.experts.159.w1", "model.layers.53.block_sparse_moe.experts.160.w1", "model.layers.53.block_sparse_moe.experts.161.w1", "model.layers.53.block_sparse_moe.experts.162.w1", "model.layers.53.block_sparse_moe.experts.163.w1", "model.layers.53.block_sparse_moe.experts.164.w1", "model.layers.53.block_sparse_moe.experts.165.w1", "model.layers.53.block_sparse_moe.experts.166.w1", "model.layers.53.block_sparse_moe.experts.167.w1", "model.layers.53.block_sparse_moe.experts.168.w1", "model.layers.53.block_sparse_moe.experts.169.w1", "model.layers.53.block_sparse_moe.experts.170.w1", "model.layers.53.block_sparse_moe.experts.171.w1", "model.layers.53.block_sparse_moe.experts.172.w1", "model.layers.53.block_sparse_moe.experts.173.w1", "model.layers.53.block_sparse_moe.experts.174.w1", "model.layers.53.block_sparse_moe.experts.175.w1", "model.layers.53.block_sparse_moe.experts.176.w1", "model.layers.53.block_sparse_moe.experts.177.w1", "model.layers.53.block_sparse_moe.experts.178.w1", "model.layers.53.block_sparse_moe.experts.179.w1", "model.layers.53.block_sparse_moe.experts.180.w1", "model.layers.53.block_sparse_moe.experts.181.w1", "model.layers.53.block_sparse_moe.experts.182.w1", "model.layers.53.block_sparse_moe.experts.183.w1", "model.layers.53.block_sparse_moe.experts.184.w1", "model.layers.53.block_sparse_moe.experts.185.w1", "model.layers.53.block_sparse_moe.experts.186.w1", "model.layers.53.block_sparse_moe.experts.187.w1", "model.layers.53.block_sparse_moe.experts.188.w1", "model.layers.53.block_sparse_moe.experts.189.w1", "model.layers.53.block_sparse_moe.experts.190.w1", "model.layers.53.block_sparse_moe.experts.191.w1", "model.layers.53.block_sparse_moe.experts.192.w1", "model.layers.53.block_sparse_moe.experts.193.w1", "model.layers.53.block_sparse_moe.experts.194.w1", "model.layers.53.block_sparse_moe.experts.195.w1", "model.layers.53.block_sparse_moe.experts.196.w1", "model.layers.53.block_sparse_moe.experts.197.w1", "model.layers.53.block_sparse_moe.experts.198.w1", "model.layers.53.block_sparse_moe.experts.199.w1", "model.layers.53.block_sparse_moe.experts.200.w1", "model.layers.53.block_sparse_moe.experts.201.w1", "model.layers.53.block_sparse_moe.experts.202.w1", "model.layers.53.block_sparse_moe.experts.203.w1", "model.layers.53.block_sparse_moe.experts.204.w1", "model.layers.53.block_sparse_moe.experts.205.w1", "model.layers.53.block_sparse_moe.experts.206.w1", "model.layers.53.block_sparse_moe.experts.207.w1", "model.layers.53.block_sparse_moe.experts.208.w1", "model.layers.53.block_sparse_moe.experts.209.w1", "model.layers.53.block_sparse_moe.experts.210.w1", "model.layers.53.block_sparse_moe.experts.211.w1", "model.layers.53.block_sparse_moe.experts.212.w1", "model.layers.53.block_sparse_moe.experts.213.w1", "model.layers.53.block_sparse_moe.experts.214.w1", "model.layers.53.block_sparse_moe.experts.215.w1", "model.layers.53.block_sparse_moe.experts.216.w1", "model.layers.53.block_sparse_moe.experts.217.w1", "model.layers.53.block_sparse_moe.experts.218.w1", "model.layers.53.block_sparse_moe.experts.219.w1", "model.layers.53.block_sparse_moe.experts.220.w1", "model.layers.53.block_sparse_moe.experts.221.w1", "model.layers.53.block_sparse_moe.experts.222.w1", "model.layers.53.block_sparse_moe.experts.223.w1", "model.layers.53.block_sparse_moe.experts.224.w1", "model.layers.53.block_sparse_moe.experts.225.w1", "model.layers.53.block_sparse_moe.experts.226.w1", "model.layers.53.block_sparse_moe.experts.227.w1", "model.layers.53.block_sparse_moe.experts.228.w1", "model.layers.53.block_sparse_moe.experts.229.w1", "model.layers.53.block_sparse_moe.experts.230.w1", "model.layers.53.block_sparse_moe.experts.231.w1", "model.layers.53.block_sparse_moe.experts.232.w1", "model.layers.53.block_sparse_moe.experts.233.w1", "model.layers.53.block_sparse_moe.experts.234.w1", "model.layers.53.block_sparse_moe.experts.235.w1", "model.layers.53.block_sparse_moe.experts.236.w1", "model.layers.53.block_sparse_moe.experts.237.w1", "model.layers.53.block_sparse_moe.experts.238.w1", "model.layers.53.block_sparse_moe.experts.239.w1", "model.layers.53.block_sparse_moe.experts.240.w1", "model.layers.53.block_sparse_moe.experts.241.w1", "model.layers.53.block_sparse_moe.experts.242.w1", "model.layers.53.block_sparse_moe.experts.243.w1", "model.layers.53.block_sparse_moe.experts.244.w1", "model.layers.53.block_sparse_moe.experts.245.w1", "model.layers.53.block_sparse_moe.experts.246.w1", "model.layers.53.block_sparse_moe.experts.247.w1", "model.layers.53.block_sparse_moe.experts.248.w1", "model.layers.53.block_sparse_moe.experts.249.w1", "model.layers.53.block_sparse_moe.experts.250.w1", "model.layers.53.block_sparse_moe.experts.251.w1", "model.layers.53.block_sparse_moe.experts.252.w1", "model.layers.53.block_sparse_moe.experts.253.w1", "model.layers.53.block_sparse_moe.experts.254.w1", "model.layers.53.block_sparse_moe.experts.255.w1", "model.layers.53.block_sparse_moe.experts.0.w3", "model.layers.53.block_sparse_moe.experts.1.w3", "model.layers.53.block_sparse_moe.experts.2.w3", "model.layers.53.block_sparse_moe.experts.3.w3", "model.layers.53.block_sparse_moe.experts.4.w3", "model.layers.53.block_sparse_moe.experts.5.w3", "model.layers.53.block_sparse_moe.experts.6.w3", "model.layers.53.block_sparse_moe.experts.7.w3", "model.layers.53.block_sparse_moe.experts.8.w3", "model.layers.53.block_sparse_moe.experts.9.w3", "model.layers.53.block_sparse_moe.experts.10.w3", "model.layers.53.block_sparse_moe.experts.11.w3", "model.layers.53.block_sparse_moe.experts.12.w3", "model.layers.53.block_sparse_moe.experts.13.w3", "model.layers.53.block_sparse_moe.experts.14.w3", "model.layers.53.block_sparse_moe.experts.15.w3", "model.layers.53.block_sparse_moe.experts.16.w3", "model.layers.53.block_sparse_moe.experts.17.w3", "model.layers.53.block_sparse_moe.experts.18.w3", "model.layers.53.block_sparse_moe.experts.19.w3", "model.layers.53.block_sparse_moe.experts.20.w3", "model.layers.53.block_sparse_moe.experts.21.w3", "model.layers.53.block_sparse_moe.experts.22.w3", "model.layers.53.block_sparse_moe.experts.23.w3", "model.layers.53.block_sparse_moe.experts.24.w3", "model.layers.53.block_sparse_moe.experts.25.w3", "model.layers.53.block_sparse_moe.experts.26.w3", "model.layers.53.block_sparse_moe.experts.27.w3", "model.layers.53.block_sparse_moe.experts.28.w3", "model.layers.53.block_sparse_moe.experts.29.w3", "model.layers.53.block_sparse_moe.experts.30.w3", "model.layers.53.block_sparse_moe.experts.31.w3", "model.layers.53.block_sparse_moe.experts.32.w3", "model.layers.53.block_sparse_moe.experts.33.w3", "model.layers.53.block_sparse_moe.experts.34.w3", "model.layers.53.block_sparse_moe.experts.35.w3", "model.layers.53.block_sparse_moe.experts.36.w3", "model.layers.53.block_sparse_moe.experts.37.w3", "model.layers.53.block_sparse_moe.experts.38.w3", "model.layers.53.block_sparse_moe.experts.39.w3", "model.layers.53.block_sparse_moe.experts.40.w3", "model.layers.53.block_sparse_moe.experts.41.w3", "model.layers.53.block_sparse_moe.experts.42.w3", "model.layers.53.block_sparse_moe.experts.43.w3", "model.layers.53.block_sparse_moe.experts.44.w3", "model.layers.53.block_sparse_moe.experts.45.w3", "model.layers.53.block_sparse_moe.experts.46.w3", "model.layers.53.block_sparse_moe.experts.47.w3", "model.layers.53.block_sparse_moe.experts.48.w3", "model.layers.53.block_sparse_moe.experts.49.w3", "model.layers.53.block_sparse_moe.experts.50.w3", "model.layers.53.block_sparse_moe.experts.51.w3", "model.layers.53.block_sparse_moe.experts.52.w3", "model.layers.53.block_sparse_moe.experts.53.w3", "model.layers.53.block_sparse_moe.experts.54.w3", "model.layers.53.block_sparse_moe.experts.55.w3", "model.layers.53.block_sparse_moe.experts.56.w3", "model.layers.53.block_sparse_moe.experts.57.w3", "model.layers.53.block_sparse_moe.experts.58.w3", "model.layers.53.block_sparse_moe.experts.59.w3", "model.layers.53.block_sparse_moe.experts.60.w3", "model.layers.53.block_sparse_moe.experts.61.w3", "model.layers.53.block_sparse_moe.experts.62.w3", "model.layers.53.block_sparse_moe.experts.63.w3", "model.layers.53.block_sparse_moe.experts.64.w3", "model.layers.53.block_sparse_moe.experts.65.w3", "model.layers.53.block_sparse_moe.experts.66.w3", "model.layers.53.block_sparse_moe.experts.67.w3", "model.layers.53.block_sparse_moe.experts.68.w3", "model.layers.53.block_sparse_moe.experts.69.w3", "model.layers.53.block_sparse_moe.experts.70.w3", "model.layers.53.block_sparse_moe.experts.71.w3", "model.layers.53.block_sparse_moe.experts.72.w3", "model.layers.53.block_sparse_moe.experts.73.w3", "model.layers.53.block_sparse_moe.experts.74.w3", "model.layers.53.block_sparse_moe.experts.75.w3", "model.layers.53.block_sparse_moe.experts.76.w3", "model.layers.53.block_sparse_moe.experts.77.w3", "model.layers.53.block_sparse_moe.experts.78.w3", "model.layers.53.block_sparse_moe.experts.79.w3", "model.layers.53.block_sparse_moe.experts.80.w3", "model.layers.53.block_sparse_moe.experts.81.w3", "model.layers.53.block_sparse_moe.experts.82.w3", "model.layers.53.block_sparse_moe.experts.83.w3", "model.layers.53.block_sparse_moe.experts.84.w3", "model.layers.53.block_sparse_moe.experts.85.w3", "model.layers.53.block_sparse_moe.experts.86.w3", "model.layers.53.block_sparse_moe.experts.87.w3", "model.layers.53.block_sparse_moe.experts.88.w3", "model.layers.53.block_sparse_moe.experts.89.w3", "model.layers.53.block_sparse_moe.experts.90.w3", "model.layers.53.block_sparse_moe.experts.91.w3", "model.layers.53.block_sparse_moe.experts.92.w3", "model.layers.53.block_sparse_moe.experts.93.w3", "model.layers.53.block_sparse_moe.experts.94.w3", "model.layers.53.block_sparse_moe.experts.95.w3", "model.layers.53.block_sparse_moe.experts.96.w3", "model.layers.53.block_sparse_moe.experts.97.w3", "model.layers.53.block_sparse_moe.experts.98.w3", "model.layers.53.block_sparse_moe.experts.99.w3", "model.layers.53.block_sparse_moe.experts.100.w3", "model.layers.53.block_sparse_moe.experts.101.w3", "model.layers.53.block_sparse_moe.experts.102.w3", "model.layers.53.block_sparse_moe.experts.103.w3", "model.layers.53.block_sparse_moe.experts.104.w3", "model.layers.53.block_sparse_moe.experts.105.w3", "model.layers.53.block_sparse_moe.experts.106.w3", "model.layers.53.block_sparse_moe.experts.107.w3", "model.layers.53.block_sparse_moe.experts.108.w3", "model.layers.53.block_sparse_moe.experts.109.w3", "model.layers.53.block_sparse_moe.experts.110.w3", "model.layers.53.block_sparse_moe.experts.111.w3", "model.layers.53.block_sparse_moe.experts.112.w3", "model.layers.53.block_sparse_moe.experts.113.w3", "model.layers.53.block_sparse_moe.experts.114.w3", "model.layers.53.block_sparse_moe.experts.115.w3", "model.layers.53.block_sparse_moe.experts.116.w3", "model.layers.53.block_sparse_moe.experts.117.w3", "model.layers.53.block_sparse_moe.experts.118.w3", "model.layers.53.block_sparse_moe.experts.119.w3", "model.layers.53.block_sparse_moe.experts.120.w3", "model.layers.53.block_sparse_moe.experts.121.w3", "model.layers.53.block_sparse_moe.experts.122.w3", "model.layers.53.block_sparse_moe.experts.123.w3", "model.layers.53.block_sparse_moe.experts.124.w3", "model.layers.53.block_sparse_moe.experts.125.w3", "model.layers.53.block_sparse_moe.experts.126.w3", "model.layers.53.block_sparse_moe.experts.127.w3", "model.layers.53.block_sparse_moe.experts.128.w3", "model.layers.53.block_sparse_moe.experts.129.w3", "model.layers.53.block_sparse_moe.experts.130.w3", "model.layers.53.block_sparse_moe.experts.131.w3", "model.layers.53.block_sparse_moe.experts.132.w3", "model.layers.53.block_sparse_moe.experts.133.w3", "model.layers.53.block_sparse_moe.experts.134.w3", "model.layers.53.block_sparse_moe.experts.135.w3", "model.layers.53.block_sparse_moe.experts.136.w3", "model.layers.53.block_sparse_moe.experts.137.w3", "model.layers.53.block_sparse_moe.experts.138.w3", "model.layers.53.block_sparse_moe.experts.139.w3", "model.layers.53.block_sparse_moe.experts.140.w3", "model.layers.53.block_sparse_moe.experts.141.w3", "model.layers.53.block_sparse_moe.experts.142.w3", "model.layers.53.block_sparse_moe.experts.143.w3", "model.layers.53.block_sparse_moe.experts.144.w3", "model.layers.53.block_sparse_moe.experts.145.w3", "model.layers.53.block_sparse_moe.experts.146.w3", "model.layers.53.block_sparse_moe.experts.147.w3", "model.layers.53.block_sparse_moe.experts.148.w3", "model.layers.53.block_sparse_moe.experts.149.w3", "model.layers.53.block_sparse_moe.experts.150.w3", "model.layers.53.block_sparse_moe.experts.151.w3", "model.layers.53.block_sparse_moe.experts.152.w3", "model.layers.53.block_sparse_moe.experts.153.w3", "model.layers.53.block_sparse_moe.experts.154.w3", "model.layers.53.block_sparse_moe.experts.155.w3", "model.layers.53.block_sparse_moe.experts.156.w3", "model.layers.53.block_sparse_moe.experts.157.w3", "model.layers.53.block_sparse_moe.experts.158.w3", "model.layers.53.block_sparse_moe.experts.159.w3", "model.layers.53.block_sparse_moe.experts.160.w3", "model.layers.53.block_sparse_moe.experts.161.w3", "model.layers.53.block_sparse_moe.experts.162.w3", "model.layers.53.block_sparse_moe.experts.163.w3", "model.layers.53.block_sparse_moe.experts.164.w3", "model.layers.53.block_sparse_moe.experts.165.w3", "model.layers.53.block_sparse_moe.experts.166.w3", "model.layers.53.block_sparse_moe.experts.167.w3", "model.layers.53.block_sparse_moe.experts.168.w3", "model.layers.53.block_sparse_moe.experts.169.w3", "model.layers.53.block_sparse_moe.experts.170.w3", "model.layers.53.block_sparse_moe.experts.171.w3", "model.layers.53.block_sparse_moe.experts.172.w3", "model.layers.53.block_sparse_moe.experts.173.w3", "model.layers.53.block_sparse_moe.experts.174.w3", "model.layers.53.block_sparse_moe.experts.175.w3", "model.layers.53.block_sparse_moe.experts.176.w3", "model.layers.53.block_sparse_moe.experts.177.w3", "model.layers.53.block_sparse_moe.experts.178.w3", "model.layers.53.block_sparse_moe.experts.179.w3", "model.layers.53.block_sparse_moe.experts.180.w3", "model.layers.53.block_sparse_moe.experts.181.w3", "model.layers.53.block_sparse_moe.experts.182.w3", "model.layers.53.block_sparse_moe.experts.183.w3", "model.layers.53.block_sparse_moe.experts.184.w3", "model.layers.53.block_sparse_moe.experts.185.w3", "model.layers.53.block_sparse_moe.experts.186.w3", "model.layers.53.block_sparse_moe.experts.187.w3", "model.layers.53.block_sparse_moe.experts.188.w3", "model.layers.53.block_sparse_moe.experts.189.w3", "model.layers.53.block_sparse_moe.experts.190.w3", "model.layers.53.block_sparse_moe.experts.191.w3", "model.layers.53.block_sparse_moe.experts.192.w3", "model.layers.53.block_sparse_moe.experts.193.w3", "model.layers.53.block_sparse_moe.experts.194.w3", "model.layers.53.block_sparse_moe.experts.195.w3", "model.layers.53.block_sparse_moe.experts.196.w3", "model.layers.53.block_sparse_moe.experts.197.w3", "model.layers.53.block_sparse_moe.experts.198.w3", "model.layers.53.block_sparse_moe.experts.199.w3", "model.layers.53.block_sparse_moe.experts.200.w3", "model.layers.53.block_sparse_moe.experts.201.w3", "model.layers.53.block_sparse_moe.experts.202.w3", "model.layers.53.block_sparse_moe.experts.203.w3", "model.layers.53.block_sparse_moe.experts.204.w3", "model.layers.53.block_sparse_moe.experts.205.w3", "model.layers.53.block_sparse_moe.experts.206.w3", "model.layers.53.block_sparse_moe.experts.207.w3", "model.layers.53.block_sparse_moe.experts.208.w3", "model.layers.53.block_sparse_moe.experts.209.w3", "model.layers.53.block_sparse_moe.experts.210.w3", "model.layers.53.block_sparse_moe.experts.211.w3", "model.layers.53.block_sparse_moe.experts.212.w3", "model.layers.53.block_sparse_moe.experts.213.w3", "model.layers.53.block_sparse_moe.experts.214.w3", "model.layers.53.block_sparse_moe.experts.215.w3", "model.layers.53.block_sparse_moe.experts.216.w3", "model.layers.53.block_sparse_moe.experts.217.w3", "model.layers.53.block_sparse_moe.experts.218.w3", "model.layers.53.block_sparse_moe.experts.219.w3", "model.layers.53.block_sparse_moe.experts.220.w3", "model.layers.53.block_sparse_moe.experts.221.w3", "model.layers.53.block_sparse_moe.experts.222.w3", "model.layers.53.block_sparse_moe.experts.223.w3", "model.layers.53.block_sparse_moe.experts.224.w3", "model.layers.53.block_sparse_moe.experts.225.w3", "model.layers.53.block_sparse_moe.experts.226.w3", "model.layers.53.block_sparse_moe.experts.227.w3", "model.layers.53.block_sparse_moe.experts.228.w3", "model.layers.53.block_sparse_moe.experts.229.w3", "model.layers.53.block_sparse_moe.experts.230.w3", "model.layers.53.block_sparse_moe.experts.231.w3", "model.layers.53.block_sparse_moe.experts.232.w3", "model.layers.53.block_sparse_moe.experts.233.w3", "model.layers.53.block_sparse_moe.experts.234.w3", "model.layers.53.block_sparse_moe.experts.235.w3", "model.layers.53.block_sparse_moe.experts.236.w3", "model.layers.53.block_sparse_moe.experts.237.w3", "model.layers.53.block_sparse_moe.experts.238.w3", "model.layers.53.block_sparse_moe.experts.239.w3", "model.layers.53.block_sparse_moe.experts.240.w3", "model.layers.53.block_sparse_moe.experts.241.w3", "model.layers.53.block_sparse_moe.experts.242.w3", "model.layers.53.block_sparse_moe.experts.243.w3", "model.layers.53.block_sparse_moe.experts.244.w3", "model.layers.53.block_sparse_moe.experts.245.w3", "model.layers.53.block_sparse_moe.experts.246.w3", "model.layers.53.block_sparse_moe.experts.247.w3", "model.layers.53.block_sparse_moe.experts.248.w3", "model.layers.53.block_sparse_moe.experts.249.w3", "model.layers.53.block_sparse_moe.experts.250.w3", "model.layers.53.block_sparse_moe.experts.251.w3", "model.layers.53.block_sparse_moe.experts.252.w3", "model.layers.53.block_sparse_moe.experts.253.w3", "model.layers.53.block_sparse_moe.experts.254.w3", "model.layers.53.block_sparse_moe.experts.255.w3", "model.layers.53.block_sparse_moe.experts.0.w2", "model.layers.53.block_sparse_moe.experts.1.w2", "model.layers.53.block_sparse_moe.experts.2.w2", "model.layers.53.block_sparse_moe.experts.3.w2", "model.layers.53.block_sparse_moe.experts.4.w2", "model.layers.53.block_sparse_moe.experts.5.w2", "model.layers.53.block_sparse_moe.experts.6.w2", "model.layers.53.block_sparse_moe.experts.7.w2", "model.layers.53.block_sparse_moe.experts.8.w2", "model.layers.53.block_sparse_moe.experts.9.w2", "model.layers.53.block_sparse_moe.experts.10.w2", "model.layers.53.block_sparse_moe.experts.11.w2", "model.layers.53.block_sparse_moe.experts.12.w2", "model.layers.53.block_sparse_moe.experts.13.w2", "model.layers.53.block_sparse_moe.experts.14.w2", "model.layers.53.block_sparse_moe.experts.15.w2", "model.layers.53.block_sparse_moe.experts.16.w2", "model.layers.53.block_sparse_moe.experts.17.w2", "model.layers.53.block_sparse_moe.experts.18.w2", "model.layers.53.block_sparse_moe.experts.19.w2", "model.layers.53.block_sparse_moe.experts.20.w2", "model.layers.53.block_sparse_moe.experts.21.w2", "model.layers.53.block_sparse_moe.experts.22.w2", "model.layers.53.block_sparse_moe.experts.23.w2", "model.layers.53.block_sparse_moe.experts.24.w2", "model.layers.53.block_sparse_moe.experts.25.w2", "model.layers.53.block_sparse_moe.experts.26.w2", "model.layers.53.block_sparse_moe.experts.27.w2", "model.layers.53.block_sparse_moe.experts.28.w2", "model.layers.53.block_sparse_moe.experts.29.w2", "model.layers.53.block_sparse_moe.experts.30.w2", "model.layers.53.block_sparse_moe.experts.31.w2", "model.layers.53.block_sparse_moe.experts.32.w2", "model.layers.53.block_sparse_moe.experts.33.w2", "model.layers.53.block_sparse_moe.experts.34.w2", "model.layers.53.block_sparse_moe.experts.35.w2", "model.layers.53.block_sparse_moe.experts.36.w2", "model.layers.53.block_sparse_moe.experts.37.w2", "model.layers.53.block_sparse_moe.experts.38.w2", "model.layers.53.block_sparse_moe.experts.39.w2", "model.layers.53.block_sparse_moe.experts.40.w2", "model.layers.53.block_sparse_moe.experts.41.w2", "model.layers.53.block_sparse_moe.experts.42.w2", "model.layers.53.block_sparse_moe.experts.43.w2", "model.layers.53.block_sparse_moe.experts.44.w2", "model.layers.53.block_sparse_moe.experts.45.w2", "model.layers.53.block_sparse_moe.experts.46.w2", "model.layers.53.block_sparse_moe.experts.47.w2", "model.layers.53.block_sparse_moe.experts.48.w2", "model.layers.53.block_sparse_moe.experts.49.w2", "model.layers.53.block_sparse_moe.experts.50.w2", "model.layers.53.block_sparse_moe.experts.51.w2", "model.layers.53.block_sparse_moe.experts.52.w2", "model.layers.53.block_sparse_moe.experts.53.w2", "model.layers.53.block_sparse_moe.experts.54.w2", "model.layers.53.block_sparse_moe.experts.55.w2", "model.layers.53.block_sparse_moe.experts.56.w2", "model.layers.53.block_sparse_moe.experts.57.w2", "model.layers.53.block_sparse_moe.experts.58.w2", "model.layers.53.block_sparse_moe.experts.59.w2", "model.layers.53.block_sparse_moe.experts.60.w2", "model.layers.53.block_sparse_moe.experts.61.w2", "model.layers.53.block_sparse_moe.experts.62.w2", "model.layers.53.block_sparse_moe.experts.63.w2", "model.layers.53.block_sparse_moe.experts.64.w2", "model.layers.53.block_sparse_moe.experts.65.w2", "model.layers.53.block_sparse_moe.experts.66.w2", "model.layers.53.block_sparse_moe.experts.67.w2", "model.layers.53.block_sparse_moe.experts.68.w2", "model.layers.53.block_sparse_moe.experts.69.w2", "model.layers.53.block_sparse_moe.experts.70.w2", "model.layers.53.block_sparse_moe.experts.71.w2", "model.layers.53.block_sparse_moe.experts.72.w2", "model.layers.53.block_sparse_moe.experts.73.w2", "model.layers.53.block_sparse_moe.experts.74.w2", "model.layers.53.block_sparse_moe.experts.75.w2", "model.layers.53.block_sparse_moe.experts.76.w2", "model.layers.53.block_sparse_moe.experts.77.w2", "model.layers.53.block_sparse_moe.experts.78.w2", "model.layers.53.block_sparse_moe.experts.79.w2", "model.layers.53.block_sparse_moe.experts.80.w2", "model.layers.53.block_sparse_moe.experts.81.w2", "model.layers.53.block_sparse_moe.experts.82.w2", "model.layers.53.block_sparse_moe.experts.83.w2", "model.layers.53.block_sparse_moe.experts.84.w2", "model.layers.53.block_sparse_moe.experts.85.w2", "model.layers.53.block_sparse_moe.experts.86.w2", "model.layers.53.block_sparse_moe.experts.87.w2", "model.layers.53.block_sparse_moe.experts.88.w2", "model.layers.53.block_sparse_moe.experts.89.w2", "model.layers.53.block_sparse_moe.experts.90.w2", "model.layers.53.block_sparse_moe.experts.91.w2", "model.layers.53.block_sparse_moe.experts.92.w2", "model.layers.53.block_sparse_moe.experts.93.w2", "model.layers.53.block_sparse_moe.experts.94.w2", "model.layers.53.block_sparse_moe.experts.95.w2", "model.layers.53.block_sparse_moe.experts.96.w2", "model.layers.53.block_sparse_moe.experts.97.w2", "model.layers.53.block_sparse_moe.experts.98.w2", "model.layers.53.block_sparse_moe.experts.99.w2", "model.layers.53.block_sparse_moe.experts.100.w2", "model.layers.53.block_sparse_moe.experts.101.w2", "model.layers.53.block_sparse_moe.experts.102.w2", "model.layers.53.block_sparse_moe.experts.103.w2", "model.layers.53.block_sparse_moe.experts.104.w2", "model.layers.53.block_sparse_moe.experts.105.w2", "model.layers.53.block_sparse_moe.experts.106.w2", "model.layers.53.block_sparse_moe.experts.107.w2", "model.layers.53.block_sparse_moe.experts.108.w2", "model.layers.53.block_sparse_moe.experts.109.w2", "model.layers.53.block_sparse_moe.experts.110.w2", "model.layers.53.block_sparse_moe.experts.111.w2", "model.layers.53.block_sparse_moe.experts.112.w2", "model.layers.53.block_sparse_moe.experts.113.w2", "model.layers.53.block_sparse_moe.experts.114.w2", "model.layers.53.block_sparse_moe.experts.115.w2", "model.layers.53.block_sparse_moe.experts.116.w2", "model.layers.53.block_sparse_moe.experts.117.w2", "model.layers.53.block_sparse_moe.experts.118.w2", "model.layers.53.block_sparse_moe.experts.119.w2", "model.layers.53.block_sparse_moe.experts.120.w2", "model.layers.53.block_sparse_moe.experts.121.w2", "model.layers.53.block_sparse_moe.experts.122.w2", "model.layers.53.block_sparse_moe.experts.123.w2", "model.layers.53.block_sparse_moe.experts.124.w2", "model.layers.53.block_sparse_moe.experts.125.w2", "model.layers.53.block_sparse_moe.experts.126.w2", "model.layers.53.block_sparse_moe.experts.127.w2", "model.layers.53.block_sparse_moe.experts.128.w2", "model.layers.53.block_sparse_moe.experts.129.w2", "model.layers.53.block_sparse_moe.experts.130.w2", "model.layers.53.block_sparse_moe.experts.131.w2", "model.layers.53.block_sparse_moe.experts.132.w2", "model.layers.53.block_sparse_moe.experts.133.w2", "model.layers.53.block_sparse_moe.experts.134.w2", "model.layers.53.block_sparse_moe.experts.135.w2", "model.layers.53.block_sparse_moe.experts.136.w2", "model.layers.53.block_sparse_moe.experts.137.w2", "model.layers.53.block_sparse_moe.experts.138.w2", "model.layers.53.block_sparse_moe.experts.139.w2", "model.layers.53.block_sparse_moe.experts.140.w2", "model.layers.53.block_sparse_moe.experts.141.w2", "model.layers.53.block_sparse_moe.experts.142.w2", "model.layers.53.block_sparse_moe.experts.143.w2", "model.layers.53.block_sparse_moe.experts.144.w2", "model.layers.53.block_sparse_moe.experts.145.w2", "model.layers.53.block_sparse_moe.experts.146.w2", "model.layers.53.block_sparse_moe.experts.147.w2", "model.layers.53.block_sparse_moe.experts.148.w2", "model.layers.53.block_sparse_moe.experts.149.w2", "model.layers.53.block_sparse_moe.experts.150.w2", "model.layers.53.block_sparse_moe.experts.151.w2", "model.layers.53.block_sparse_moe.experts.152.w2", "model.layers.53.block_sparse_moe.experts.153.w2", "model.layers.53.block_sparse_moe.experts.154.w2", "model.layers.53.block_sparse_moe.experts.155.w2", "model.layers.53.block_sparse_moe.experts.156.w2", "model.layers.53.block_sparse_moe.experts.157.w2", "model.layers.53.block_sparse_moe.experts.158.w2", "model.layers.53.block_sparse_moe.experts.159.w2", "model.layers.53.block_sparse_moe.experts.160.w2", "model.layers.53.block_sparse_moe.experts.161.w2", "model.layers.53.block_sparse_moe.experts.162.w2", "model.layers.53.block_sparse_moe.experts.163.w2", "model.layers.53.block_sparse_moe.experts.164.w2", "model.layers.53.block_sparse_moe.experts.165.w2", "model.layers.53.block_sparse_moe.experts.166.w2", "model.layers.53.block_sparse_moe.experts.167.w2", "model.layers.53.block_sparse_moe.experts.168.w2", "model.layers.53.block_sparse_moe.experts.169.w2", "model.layers.53.block_sparse_moe.experts.170.w2", "model.layers.53.block_sparse_moe.experts.171.w2", "model.layers.53.block_sparse_moe.experts.172.w2", "model.layers.53.block_sparse_moe.experts.173.w2", "model.layers.53.block_sparse_moe.experts.174.w2", "model.layers.53.block_sparse_moe.experts.175.w2", "model.layers.53.block_sparse_moe.experts.176.w2", "model.layers.53.block_sparse_moe.experts.177.w2", "model.layers.53.block_sparse_moe.experts.178.w2", "model.layers.53.block_sparse_moe.experts.179.w2", "model.layers.53.block_sparse_moe.experts.180.w2", "model.layers.53.block_sparse_moe.experts.181.w2", "model.layers.53.block_sparse_moe.experts.182.w2", "model.layers.53.block_sparse_moe.experts.183.w2", "model.layers.53.block_sparse_moe.experts.184.w2", "model.layers.53.block_sparse_moe.experts.185.w2", "model.layers.53.block_sparse_moe.experts.186.w2", "model.layers.53.block_sparse_moe.experts.187.w2", "model.layers.53.block_sparse_moe.experts.188.w2", "model.layers.53.block_sparse_moe.experts.189.w2", "model.layers.53.block_sparse_moe.experts.190.w2", "model.layers.53.block_sparse_moe.experts.191.w2", "model.layers.53.block_sparse_moe.experts.192.w2", "model.layers.53.block_sparse_moe.experts.193.w2", "model.layers.53.block_sparse_moe.experts.194.w2", "model.layers.53.block_sparse_moe.experts.195.w2", "model.layers.53.block_sparse_moe.experts.196.w2", "model.layers.53.block_sparse_moe.experts.197.w2", "model.layers.53.block_sparse_moe.experts.198.w2", "model.layers.53.block_sparse_moe.experts.199.w2", "model.layers.53.block_sparse_moe.experts.200.w2", "model.layers.53.block_sparse_moe.experts.201.w2", "model.layers.53.block_sparse_moe.experts.202.w2", "model.layers.53.block_sparse_moe.experts.203.w2", "model.layers.53.block_sparse_moe.experts.204.w2", "model.layers.53.block_sparse_moe.experts.205.w2", "model.layers.53.block_sparse_moe.experts.206.w2", "model.layers.53.block_sparse_moe.experts.207.w2", "model.layers.53.block_sparse_moe.experts.208.w2", "model.layers.53.block_sparse_moe.experts.209.w2", "model.layers.53.block_sparse_moe.experts.210.w2", "model.layers.53.block_sparse_moe.experts.211.w2", "model.layers.53.block_sparse_moe.experts.212.w2", "model.layers.53.block_sparse_moe.experts.213.w2", "model.layers.53.block_sparse_moe.experts.214.w2", "model.layers.53.block_sparse_moe.experts.215.w2", "model.layers.53.block_sparse_moe.experts.216.w2", "model.layers.53.block_sparse_moe.experts.217.w2", "model.layers.53.block_sparse_moe.experts.218.w2", "model.layers.53.block_sparse_moe.experts.219.w2", "model.layers.53.block_sparse_moe.experts.220.w2", "model.layers.53.block_sparse_moe.experts.221.w2", "model.layers.53.block_sparse_moe.experts.222.w2", "model.layers.53.block_sparse_moe.experts.223.w2", "model.layers.53.block_sparse_moe.experts.224.w2", "model.layers.53.block_sparse_moe.experts.225.w2", "model.layers.53.block_sparse_moe.experts.226.w2", "model.layers.53.block_sparse_moe.experts.227.w2", "model.layers.53.block_sparse_moe.experts.228.w2", "model.layers.53.block_sparse_moe.experts.229.w2", "model.layers.53.block_sparse_moe.experts.230.w2", "model.layers.53.block_sparse_moe.experts.231.w2", "model.layers.53.block_sparse_moe.experts.232.w2", "model.layers.53.block_sparse_moe.experts.233.w2", "model.layers.53.block_sparse_moe.experts.234.w2", "model.layers.53.block_sparse_moe.experts.235.w2", "model.layers.53.block_sparse_moe.experts.236.w2", "model.layers.53.block_sparse_moe.experts.237.w2", "model.layers.53.block_sparse_moe.experts.238.w2", "model.layers.53.block_sparse_moe.experts.239.w2", "model.layers.53.block_sparse_moe.experts.240.w2", "model.layers.53.block_sparse_moe.experts.241.w2", "model.layers.53.block_sparse_moe.experts.242.w2", "model.layers.53.block_sparse_moe.experts.243.w2", "model.layers.53.block_sparse_moe.experts.244.w2", "model.layers.53.block_sparse_moe.experts.245.w2", "model.layers.53.block_sparse_moe.experts.246.w2", "model.layers.53.block_sparse_moe.experts.247.w2", "model.layers.53.block_sparse_moe.experts.248.w2", "model.layers.53.block_sparse_moe.experts.249.w2", "model.layers.53.block_sparse_moe.experts.250.w2", "model.layers.53.block_sparse_moe.experts.251.w2", "model.layers.53.block_sparse_moe.experts.252.w2", "model.layers.53.block_sparse_moe.experts.253.w2", "model.layers.53.block_sparse_moe.experts.254.w2", "model.layers.53.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00040992163121700287, "dbits": 3623878656 } ] }, { "idx": 108, "layers": [ "model.layers.54.self_attn.q_proj", "model.layers.54.self_attn.k_proj", "model.layers.54.self_attn.v_proj", "model.layers.54.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001507522910833392, "dbits": 44040192 } ] }, { "idx": 109, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w1", "model.layers.54.block_sparse_moe.experts.1.w1", "model.layers.54.block_sparse_moe.experts.2.w1", "model.layers.54.block_sparse_moe.experts.3.w1", "model.layers.54.block_sparse_moe.experts.4.w1", "model.layers.54.block_sparse_moe.experts.5.w1", "model.layers.54.block_sparse_moe.experts.6.w1", "model.layers.54.block_sparse_moe.experts.7.w1", "model.layers.54.block_sparse_moe.experts.8.w1", "model.layers.54.block_sparse_moe.experts.9.w1", "model.layers.54.block_sparse_moe.experts.10.w1", "model.layers.54.block_sparse_moe.experts.11.w1", "model.layers.54.block_sparse_moe.experts.12.w1", "model.layers.54.block_sparse_moe.experts.13.w1", "model.layers.54.block_sparse_moe.experts.14.w1", "model.layers.54.block_sparse_moe.experts.15.w1", "model.layers.54.block_sparse_moe.experts.16.w1", "model.layers.54.block_sparse_moe.experts.17.w1", "model.layers.54.block_sparse_moe.experts.18.w1", "model.layers.54.block_sparse_moe.experts.19.w1", "model.layers.54.block_sparse_moe.experts.20.w1", "model.layers.54.block_sparse_moe.experts.21.w1", "model.layers.54.block_sparse_moe.experts.22.w1", "model.layers.54.block_sparse_moe.experts.23.w1", "model.layers.54.block_sparse_moe.experts.24.w1", "model.layers.54.block_sparse_moe.experts.25.w1", "model.layers.54.block_sparse_moe.experts.26.w1", "model.layers.54.block_sparse_moe.experts.27.w1", "model.layers.54.block_sparse_moe.experts.28.w1", "model.layers.54.block_sparse_moe.experts.29.w1", "model.layers.54.block_sparse_moe.experts.30.w1", "model.layers.54.block_sparse_moe.experts.31.w1", "model.layers.54.block_sparse_moe.experts.32.w1", "model.layers.54.block_sparse_moe.experts.33.w1", "model.layers.54.block_sparse_moe.experts.34.w1", "model.layers.54.block_sparse_moe.experts.35.w1", "model.layers.54.block_sparse_moe.experts.36.w1", "model.layers.54.block_sparse_moe.experts.37.w1", "model.layers.54.block_sparse_moe.experts.38.w1", "model.layers.54.block_sparse_moe.experts.39.w1", "model.layers.54.block_sparse_moe.experts.40.w1", "model.layers.54.block_sparse_moe.experts.41.w1", "model.layers.54.block_sparse_moe.experts.42.w1", "model.layers.54.block_sparse_moe.experts.43.w1", "model.layers.54.block_sparse_moe.experts.44.w1", "model.layers.54.block_sparse_moe.experts.45.w1", "model.layers.54.block_sparse_moe.experts.46.w1", "model.layers.54.block_sparse_moe.experts.47.w1", "model.layers.54.block_sparse_moe.experts.48.w1", "model.layers.54.block_sparse_moe.experts.49.w1", "model.layers.54.block_sparse_moe.experts.50.w1", "model.layers.54.block_sparse_moe.experts.51.w1", "model.layers.54.block_sparse_moe.experts.52.w1", "model.layers.54.block_sparse_moe.experts.53.w1", "model.layers.54.block_sparse_moe.experts.54.w1", "model.layers.54.block_sparse_moe.experts.55.w1", "model.layers.54.block_sparse_moe.experts.56.w1", "model.layers.54.block_sparse_moe.experts.57.w1", "model.layers.54.block_sparse_moe.experts.58.w1", "model.layers.54.block_sparse_moe.experts.59.w1", "model.layers.54.block_sparse_moe.experts.60.w1", "model.layers.54.block_sparse_moe.experts.61.w1", "model.layers.54.block_sparse_moe.experts.62.w1", "model.layers.54.block_sparse_moe.experts.63.w1", "model.layers.54.block_sparse_moe.experts.64.w1", "model.layers.54.block_sparse_moe.experts.65.w1", "model.layers.54.block_sparse_moe.experts.66.w1", "model.layers.54.block_sparse_moe.experts.67.w1", "model.layers.54.block_sparse_moe.experts.68.w1", "model.layers.54.block_sparse_moe.experts.69.w1", "model.layers.54.block_sparse_moe.experts.70.w1", "model.layers.54.block_sparse_moe.experts.71.w1", "model.layers.54.block_sparse_moe.experts.72.w1", "model.layers.54.block_sparse_moe.experts.73.w1", "model.layers.54.block_sparse_moe.experts.74.w1", "model.layers.54.block_sparse_moe.experts.75.w1", "model.layers.54.block_sparse_moe.experts.76.w1", "model.layers.54.block_sparse_moe.experts.77.w1", "model.layers.54.block_sparse_moe.experts.78.w1", "model.layers.54.block_sparse_moe.experts.79.w1", "model.layers.54.block_sparse_moe.experts.80.w1", "model.layers.54.block_sparse_moe.experts.81.w1", "model.layers.54.block_sparse_moe.experts.82.w1", "model.layers.54.block_sparse_moe.experts.83.w1", "model.layers.54.block_sparse_moe.experts.84.w1", "model.layers.54.block_sparse_moe.experts.85.w1", "model.layers.54.block_sparse_moe.experts.86.w1", "model.layers.54.block_sparse_moe.experts.87.w1", "model.layers.54.block_sparse_moe.experts.88.w1", "model.layers.54.block_sparse_moe.experts.89.w1", "model.layers.54.block_sparse_moe.experts.90.w1", "model.layers.54.block_sparse_moe.experts.91.w1", "model.layers.54.block_sparse_moe.experts.92.w1", "model.layers.54.block_sparse_moe.experts.93.w1", "model.layers.54.block_sparse_moe.experts.94.w1", "model.layers.54.block_sparse_moe.experts.95.w1", "model.layers.54.block_sparse_moe.experts.96.w1", "model.layers.54.block_sparse_moe.experts.97.w1", "model.layers.54.block_sparse_moe.experts.98.w1", "model.layers.54.block_sparse_moe.experts.99.w1", "model.layers.54.block_sparse_moe.experts.100.w1", "model.layers.54.block_sparse_moe.experts.101.w1", "model.layers.54.block_sparse_moe.experts.102.w1", "model.layers.54.block_sparse_moe.experts.103.w1", "model.layers.54.block_sparse_moe.experts.104.w1", "model.layers.54.block_sparse_moe.experts.105.w1", "model.layers.54.block_sparse_moe.experts.106.w1", "model.layers.54.block_sparse_moe.experts.107.w1", "model.layers.54.block_sparse_moe.experts.108.w1", "model.layers.54.block_sparse_moe.experts.109.w1", "model.layers.54.block_sparse_moe.experts.110.w1", "model.layers.54.block_sparse_moe.experts.111.w1", "model.layers.54.block_sparse_moe.experts.112.w1", "model.layers.54.block_sparse_moe.experts.113.w1", "model.layers.54.block_sparse_moe.experts.114.w1", "model.layers.54.block_sparse_moe.experts.115.w1", "model.layers.54.block_sparse_moe.experts.116.w1", "model.layers.54.block_sparse_moe.experts.117.w1", "model.layers.54.block_sparse_moe.experts.118.w1", "model.layers.54.block_sparse_moe.experts.119.w1", "model.layers.54.block_sparse_moe.experts.120.w1", "model.layers.54.block_sparse_moe.experts.121.w1", "model.layers.54.block_sparse_moe.experts.122.w1", "model.layers.54.block_sparse_moe.experts.123.w1", "model.layers.54.block_sparse_moe.experts.124.w1", "model.layers.54.block_sparse_moe.experts.125.w1", "model.layers.54.block_sparse_moe.experts.126.w1", "model.layers.54.block_sparse_moe.experts.127.w1", "model.layers.54.block_sparse_moe.experts.128.w1", "model.layers.54.block_sparse_moe.experts.129.w1", "model.layers.54.block_sparse_moe.experts.130.w1", "model.layers.54.block_sparse_moe.experts.131.w1", "model.layers.54.block_sparse_moe.experts.132.w1", "model.layers.54.block_sparse_moe.experts.133.w1", "model.layers.54.block_sparse_moe.experts.134.w1", "model.layers.54.block_sparse_moe.experts.135.w1", "model.layers.54.block_sparse_moe.experts.136.w1", "model.layers.54.block_sparse_moe.experts.137.w1", "model.layers.54.block_sparse_moe.experts.138.w1", "model.layers.54.block_sparse_moe.experts.139.w1", "model.layers.54.block_sparse_moe.experts.140.w1", "model.layers.54.block_sparse_moe.experts.141.w1", "model.layers.54.block_sparse_moe.experts.142.w1", "model.layers.54.block_sparse_moe.experts.143.w1", "model.layers.54.block_sparse_moe.experts.144.w1", "model.layers.54.block_sparse_moe.experts.145.w1", "model.layers.54.block_sparse_moe.experts.146.w1", "model.layers.54.block_sparse_moe.experts.147.w1", "model.layers.54.block_sparse_moe.experts.148.w1", "model.layers.54.block_sparse_moe.experts.149.w1", "model.layers.54.block_sparse_moe.experts.150.w1", "model.layers.54.block_sparse_moe.experts.151.w1", "model.layers.54.block_sparse_moe.experts.152.w1", "model.layers.54.block_sparse_moe.experts.153.w1", "model.layers.54.block_sparse_moe.experts.154.w1", "model.layers.54.block_sparse_moe.experts.155.w1", "model.layers.54.block_sparse_moe.experts.156.w1", "model.layers.54.block_sparse_moe.experts.157.w1", "model.layers.54.block_sparse_moe.experts.158.w1", "model.layers.54.block_sparse_moe.experts.159.w1", "model.layers.54.block_sparse_moe.experts.160.w1", "model.layers.54.block_sparse_moe.experts.161.w1", "model.layers.54.block_sparse_moe.experts.162.w1", "model.layers.54.block_sparse_moe.experts.163.w1", "model.layers.54.block_sparse_moe.experts.164.w1", "model.layers.54.block_sparse_moe.experts.165.w1", "model.layers.54.block_sparse_moe.experts.166.w1", "model.layers.54.block_sparse_moe.experts.167.w1", "model.layers.54.block_sparse_moe.experts.168.w1", "model.layers.54.block_sparse_moe.experts.169.w1", "model.layers.54.block_sparse_moe.experts.170.w1", "model.layers.54.block_sparse_moe.experts.171.w1", "model.layers.54.block_sparse_moe.experts.172.w1", "model.layers.54.block_sparse_moe.experts.173.w1", "model.layers.54.block_sparse_moe.experts.174.w1", "model.layers.54.block_sparse_moe.experts.175.w1", "model.layers.54.block_sparse_moe.experts.176.w1", "model.layers.54.block_sparse_moe.experts.177.w1", "model.layers.54.block_sparse_moe.experts.178.w1", "model.layers.54.block_sparse_moe.experts.179.w1", "model.layers.54.block_sparse_moe.experts.180.w1", "model.layers.54.block_sparse_moe.experts.181.w1", "model.layers.54.block_sparse_moe.experts.182.w1", "model.layers.54.block_sparse_moe.experts.183.w1", "model.layers.54.block_sparse_moe.experts.184.w1", "model.layers.54.block_sparse_moe.experts.185.w1", "model.layers.54.block_sparse_moe.experts.186.w1", "model.layers.54.block_sparse_moe.experts.187.w1", "model.layers.54.block_sparse_moe.experts.188.w1", "model.layers.54.block_sparse_moe.experts.189.w1", "model.layers.54.block_sparse_moe.experts.190.w1", "model.layers.54.block_sparse_moe.experts.191.w1", "model.layers.54.block_sparse_moe.experts.192.w1", "model.layers.54.block_sparse_moe.experts.193.w1", "model.layers.54.block_sparse_moe.experts.194.w1", "model.layers.54.block_sparse_moe.experts.195.w1", "model.layers.54.block_sparse_moe.experts.196.w1", "model.layers.54.block_sparse_moe.experts.197.w1", "model.layers.54.block_sparse_moe.experts.198.w1", "model.layers.54.block_sparse_moe.experts.199.w1", "model.layers.54.block_sparse_moe.experts.200.w1", "model.layers.54.block_sparse_moe.experts.201.w1", "model.layers.54.block_sparse_moe.experts.202.w1", "model.layers.54.block_sparse_moe.experts.203.w1", "model.layers.54.block_sparse_moe.experts.204.w1", "model.layers.54.block_sparse_moe.experts.205.w1", "model.layers.54.block_sparse_moe.experts.206.w1", "model.layers.54.block_sparse_moe.experts.207.w1", "model.layers.54.block_sparse_moe.experts.208.w1", "model.layers.54.block_sparse_moe.experts.209.w1", "model.layers.54.block_sparse_moe.experts.210.w1", "model.layers.54.block_sparse_moe.experts.211.w1", "model.layers.54.block_sparse_moe.experts.212.w1", "model.layers.54.block_sparse_moe.experts.213.w1", "model.layers.54.block_sparse_moe.experts.214.w1", "model.layers.54.block_sparse_moe.experts.215.w1", "model.layers.54.block_sparse_moe.experts.216.w1", "model.layers.54.block_sparse_moe.experts.217.w1", "model.layers.54.block_sparse_moe.experts.218.w1", "model.layers.54.block_sparse_moe.experts.219.w1", "model.layers.54.block_sparse_moe.experts.220.w1", "model.layers.54.block_sparse_moe.experts.221.w1", "model.layers.54.block_sparse_moe.experts.222.w1", "model.layers.54.block_sparse_moe.experts.223.w1", "model.layers.54.block_sparse_moe.experts.224.w1", "model.layers.54.block_sparse_moe.experts.225.w1", "model.layers.54.block_sparse_moe.experts.226.w1", "model.layers.54.block_sparse_moe.experts.227.w1", "model.layers.54.block_sparse_moe.experts.228.w1", "model.layers.54.block_sparse_moe.experts.229.w1", "model.layers.54.block_sparse_moe.experts.230.w1", "model.layers.54.block_sparse_moe.experts.231.w1", "model.layers.54.block_sparse_moe.experts.232.w1", "model.layers.54.block_sparse_moe.experts.233.w1", "model.layers.54.block_sparse_moe.experts.234.w1", "model.layers.54.block_sparse_moe.experts.235.w1", "model.layers.54.block_sparse_moe.experts.236.w1", "model.layers.54.block_sparse_moe.experts.237.w1", "model.layers.54.block_sparse_moe.experts.238.w1", "model.layers.54.block_sparse_moe.experts.239.w1", "model.layers.54.block_sparse_moe.experts.240.w1", "model.layers.54.block_sparse_moe.experts.241.w1", "model.layers.54.block_sparse_moe.experts.242.w1", "model.layers.54.block_sparse_moe.experts.243.w1", "model.layers.54.block_sparse_moe.experts.244.w1", "model.layers.54.block_sparse_moe.experts.245.w1", "model.layers.54.block_sparse_moe.experts.246.w1", "model.layers.54.block_sparse_moe.experts.247.w1", "model.layers.54.block_sparse_moe.experts.248.w1", "model.layers.54.block_sparse_moe.experts.249.w1", "model.layers.54.block_sparse_moe.experts.250.w1", "model.layers.54.block_sparse_moe.experts.251.w1", "model.layers.54.block_sparse_moe.experts.252.w1", "model.layers.54.block_sparse_moe.experts.253.w1", "model.layers.54.block_sparse_moe.experts.254.w1", "model.layers.54.block_sparse_moe.experts.255.w1", "model.layers.54.block_sparse_moe.experts.0.w3", "model.layers.54.block_sparse_moe.experts.1.w3", "model.layers.54.block_sparse_moe.experts.2.w3", "model.layers.54.block_sparse_moe.experts.3.w3", "model.layers.54.block_sparse_moe.experts.4.w3", "model.layers.54.block_sparse_moe.experts.5.w3", "model.layers.54.block_sparse_moe.experts.6.w3", "model.layers.54.block_sparse_moe.experts.7.w3", "model.layers.54.block_sparse_moe.experts.8.w3", "model.layers.54.block_sparse_moe.experts.9.w3", "model.layers.54.block_sparse_moe.experts.10.w3", "model.layers.54.block_sparse_moe.experts.11.w3", "model.layers.54.block_sparse_moe.experts.12.w3", "model.layers.54.block_sparse_moe.experts.13.w3", "model.layers.54.block_sparse_moe.experts.14.w3", "model.layers.54.block_sparse_moe.experts.15.w3", "model.layers.54.block_sparse_moe.experts.16.w3", "model.layers.54.block_sparse_moe.experts.17.w3", "model.layers.54.block_sparse_moe.experts.18.w3", "model.layers.54.block_sparse_moe.experts.19.w3", "model.layers.54.block_sparse_moe.experts.20.w3", "model.layers.54.block_sparse_moe.experts.21.w3", "model.layers.54.block_sparse_moe.experts.22.w3", "model.layers.54.block_sparse_moe.experts.23.w3", "model.layers.54.block_sparse_moe.experts.24.w3", "model.layers.54.block_sparse_moe.experts.25.w3", "model.layers.54.block_sparse_moe.experts.26.w3", "model.layers.54.block_sparse_moe.experts.27.w3", "model.layers.54.block_sparse_moe.experts.28.w3", "model.layers.54.block_sparse_moe.experts.29.w3", "model.layers.54.block_sparse_moe.experts.30.w3", "model.layers.54.block_sparse_moe.experts.31.w3", "model.layers.54.block_sparse_moe.experts.32.w3", "model.layers.54.block_sparse_moe.experts.33.w3", "model.layers.54.block_sparse_moe.experts.34.w3", "model.layers.54.block_sparse_moe.experts.35.w3", "model.layers.54.block_sparse_moe.experts.36.w3", "model.layers.54.block_sparse_moe.experts.37.w3", "model.layers.54.block_sparse_moe.experts.38.w3", "model.layers.54.block_sparse_moe.experts.39.w3", "model.layers.54.block_sparse_moe.experts.40.w3", "model.layers.54.block_sparse_moe.experts.41.w3", "model.layers.54.block_sparse_moe.experts.42.w3", "model.layers.54.block_sparse_moe.experts.43.w3", "model.layers.54.block_sparse_moe.experts.44.w3", "model.layers.54.block_sparse_moe.experts.45.w3", "model.layers.54.block_sparse_moe.experts.46.w3", "model.layers.54.block_sparse_moe.experts.47.w3", "model.layers.54.block_sparse_moe.experts.48.w3", "model.layers.54.block_sparse_moe.experts.49.w3", "model.layers.54.block_sparse_moe.experts.50.w3", "model.layers.54.block_sparse_moe.experts.51.w3", "model.layers.54.block_sparse_moe.experts.52.w3", "model.layers.54.block_sparse_moe.experts.53.w3", "model.layers.54.block_sparse_moe.experts.54.w3", "model.layers.54.block_sparse_moe.experts.55.w3", "model.layers.54.block_sparse_moe.experts.56.w3", "model.layers.54.block_sparse_moe.experts.57.w3", "model.layers.54.block_sparse_moe.experts.58.w3", "model.layers.54.block_sparse_moe.experts.59.w3", "model.layers.54.block_sparse_moe.experts.60.w3", "model.layers.54.block_sparse_moe.experts.61.w3", "model.layers.54.block_sparse_moe.experts.62.w3", "model.layers.54.block_sparse_moe.experts.63.w3", "model.layers.54.block_sparse_moe.experts.64.w3", "model.layers.54.block_sparse_moe.experts.65.w3", "model.layers.54.block_sparse_moe.experts.66.w3", "model.layers.54.block_sparse_moe.experts.67.w3", "model.layers.54.block_sparse_moe.experts.68.w3", "model.layers.54.block_sparse_moe.experts.69.w3", "model.layers.54.block_sparse_moe.experts.70.w3", "model.layers.54.block_sparse_moe.experts.71.w3", "model.layers.54.block_sparse_moe.experts.72.w3", "model.layers.54.block_sparse_moe.experts.73.w3", "model.layers.54.block_sparse_moe.experts.74.w3", "model.layers.54.block_sparse_moe.experts.75.w3", "model.layers.54.block_sparse_moe.experts.76.w3", "model.layers.54.block_sparse_moe.experts.77.w3", "model.layers.54.block_sparse_moe.experts.78.w3", "model.layers.54.block_sparse_moe.experts.79.w3", "model.layers.54.block_sparse_moe.experts.80.w3", "model.layers.54.block_sparse_moe.experts.81.w3", "model.layers.54.block_sparse_moe.experts.82.w3", "model.layers.54.block_sparse_moe.experts.83.w3", "model.layers.54.block_sparse_moe.experts.84.w3", "model.layers.54.block_sparse_moe.experts.85.w3", "model.layers.54.block_sparse_moe.experts.86.w3", "model.layers.54.block_sparse_moe.experts.87.w3", "model.layers.54.block_sparse_moe.experts.88.w3", "model.layers.54.block_sparse_moe.experts.89.w3", "model.layers.54.block_sparse_moe.experts.90.w3", "model.layers.54.block_sparse_moe.experts.91.w3", "model.layers.54.block_sparse_moe.experts.92.w3", "model.layers.54.block_sparse_moe.experts.93.w3", "model.layers.54.block_sparse_moe.experts.94.w3", "model.layers.54.block_sparse_moe.experts.95.w3", "model.layers.54.block_sparse_moe.experts.96.w3", "model.layers.54.block_sparse_moe.experts.97.w3", "model.layers.54.block_sparse_moe.experts.98.w3", "model.layers.54.block_sparse_moe.experts.99.w3", "model.layers.54.block_sparse_moe.experts.100.w3", "model.layers.54.block_sparse_moe.experts.101.w3", "model.layers.54.block_sparse_moe.experts.102.w3", "model.layers.54.block_sparse_moe.experts.103.w3", "model.layers.54.block_sparse_moe.experts.104.w3", "model.layers.54.block_sparse_moe.experts.105.w3", "model.layers.54.block_sparse_moe.experts.106.w3", "model.layers.54.block_sparse_moe.experts.107.w3", "model.layers.54.block_sparse_moe.experts.108.w3", "model.layers.54.block_sparse_moe.experts.109.w3", "model.layers.54.block_sparse_moe.experts.110.w3", "model.layers.54.block_sparse_moe.experts.111.w3", "model.layers.54.block_sparse_moe.experts.112.w3", "model.layers.54.block_sparse_moe.experts.113.w3", "model.layers.54.block_sparse_moe.experts.114.w3", "model.layers.54.block_sparse_moe.experts.115.w3", "model.layers.54.block_sparse_moe.experts.116.w3", "model.layers.54.block_sparse_moe.experts.117.w3", "model.layers.54.block_sparse_moe.experts.118.w3", "model.layers.54.block_sparse_moe.experts.119.w3", "model.layers.54.block_sparse_moe.experts.120.w3", "model.layers.54.block_sparse_moe.experts.121.w3", "model.layers.54.block_sparse_moe.experts.122.w3", "model.layers.54.block_sparse_moe.experts.123.w3", "model.layers.54.block_sparse_moe.experts.124.w3", "model.layers.54.block_sparse_moe.experts.125.w3", "model.layers.54.block_sparse_moe.experts.126.w3", "model.layers.54.block_sparse_moe.experts.127.w3", "model.layers.54.block_sparse_moe.experts.128.w3", "model.layers.54.block_sparse_moe.experts.129.w3", "model.layers.54.block_sparse_moe.experts.130.w3", "model.layers.54.block_sparse_moe.experts.131.w3", "model.layers.54.block_sparse_moe.experts.132.w3", "model.layers.54.block_sparse_moe.experts.133.w3", "model.layers.54.block_sparse_moe.experts.134.w3", "model.layers.54.block_sparse_moe.experts.135.w3", "model.layers.54.block_sparse_moe.experts.136.w3", "model.layers.54.block_sparse_moe.experts.137.w3", "model.layers.54.block_sparse_moe.experts.138.w3", "model.layers.54.block_sparse_moe.experts.139.w3", "model.layers.54.block_sparse_moe.experts.140.w3", "model.layers.54.block_sparse_moe.experts.141.w3", "model.layers.54.block_sparse_moe.experts.142.w3", "model.layers.54.block_sparse_moe.experts.143.w3", "model.layers.54.block_sparse_moe.experts.144.w3", "model.layers.54.block_sparse_moe.experts.145.w3", "model.layers.54.block_sparse_moe.experts.146.w3", "model.layers.54.block_sparse_moe.experts.147.w3", "model.layers.54.block_sparse_moe.experts.148.w3", "model.layers.54.block_sparse_moe.experts.149.w3", "model.layers.54.block_sparse_moe.experts.150.w3", "model.layers.54.block_sparse_moe.experts.151.w3", "model.layers.54.block_sparse_moe.experts.152.w3", "model.layers.54.block_sparse_moe.experts.153.w3", "model.layers.54.block_sparse_moe.experts.154.w3", "model.layers.54.block_sparse_moe.experts.155.w3", "model.layers.54.block_sparse_moe.experts.156.w3", "model.layers.54.block_sparse_moe.experts.157.w3", "model.layers.54.block_sparse_moe.experts.158.w3", "model.layers.54.block_sparse_moe.experts.159.w3", "model.layers.54.block_sparse_moe.experts.160.w3", "model.layers.54.block_sparse_moe.experts.161.w3", "model.layers.54.block_sparse_moe.experts.162.w3", "model.layers.54.block_sparse_moe.experts.163.w3", "model.layers.54.block_sparse_moe.experts.164.w3", "model.layers.54.block_sparse_moe.experts.165.w3", "model.layers.54.block_sparse_moe.experts.166.w3", "model.layers.54.block_sparse_moe.experts.167.w3", "model.layers.54.block_sparse_moe.experts.168.w3", "model.layers.54.block_sparse_moe.experts.169.w3", "model.layers.54.block_sparse_moe.experts.170.w3", "model.layers.54.block_sparse_moe.experts.171.w3", "model.layers.54.block_sparse_moe.experts.172.w3", "model.layers.54.block_sparse_moe.experts.173.w3", "model.layers.54.block_sparse_moe.experts.174.w3", "model.layers.54.block_sparse_moe.experts.175.w3", "model.layers.54.block_sparse_moe.experts.176.w3", "model.layers.54.block_sparse_moe.experts.177.w3", "model.layers.54.block_sparse_moe.experts.178.w3", "model.layers.54.block_sparse_moe.experts.179.w3", "model.layers.54.block_sparse_moe.experts.180.w3", "model.layers.54.block_sparse_moe.experts.181.w3", "model.layers.54.block_sparse_moe.experts.182.w3", "model.layers.54.block_sparse_moe.experts.183.w3", "model.layers.54.block_sparse_moe.experts.184.w3", "model.layers.54.block_sparse_moe.experts.185.w3", "model.layers.54.block_sparse_moe.experts.186.w3", "model.layers.54.block_sparse_moe.experts.187.w3", "model.layers.54.block_sparse_moe.experts.188.w3", "model.layers.54.block_sparse_moe.experts.189.w3", "model.layers.54.block_sparse_moe.experts.190.w3", "model.layers.54.block_sparse_moe.experts.191.w3", "model.layers.54.block_sparse_moe.experts.192.w3", "model.layers.54.block_sparse_moe.experts.193.w3", "model.layers.54.block_sparse_moe.experts.194.w3", "model.layers.54.block_sparse_moe.experts.195.w3", "model.layers.54.block_sparse_moe.experts.196.w3", "model.layers.54.block_sparse_moe.experts.197.w3", "model.layers.54.block_sparse_moe.experts.198.w3", "model.layers.54.block_sparse_moe.experts.199.w3", "model.layers.54.block_sparse_moe.experts.200.w3", "model.layers.54.block_sparse_moe.experts.201.w3", "model.layers.54.block_sparse_moe.experts.202.w3", "model.layers.54.block_sparse_moe.experts.203.w3", "model.layers.54.block_sparse_moe.experts.204.w3", "model.layers.54.block_sparse_moe.experts.205.w3", "model.layers.54.block_sparse_moe.experts.206.w3", "model.layers.54.block_sparse_moe.experts.207.w3", "model.layers.54.block_sparse_moe.experts.208.w3", "model.layers.54.block_sparse_moe.experts.209.w3", "model.layers.54.block_sparse_moe.experts.210.w3", "model.layers.54.block_sparse_moe.experts.211.w3", "model.layers.54.block_sparse_moe.experts.212.w3", "model.layers.54.block_sparse_moe.experts.213.w3", "model.layers.54.block_sparse_moe.experts.214.w3", "model.layers.54.block_sparse_moe.experts.215.w3", "model.layers.54.block_sparse_moe.experts.216.w3", "model.layers.54.block_sparse_moe.experts.217.w3", "model.layers.54.block_sparse_moe.experts.218.w3", "model.layers.54.block_sparse_moe.experts.219.w3", "model.layers.54.block_sparse_moe.experts.220.w3", "model.layers.54.block_sparse_moe.experts.221.w3", "model.layers.54.block_sparse_moe.experts.222.w3", "model.layers.54.block_sparse_moe.experts.223.w3", "model.layers.54.block_sparse_moe.experts.224.w3", "model.layers.54.block_sparse_moe.experts.225.w3", "model.layers.54.block_sparse_moe.experts.226.w3", "model.layers.54.block_sparse_moe.experts.227.w3", "model.layers.54.block_sparse_moe.experts.228.w3", "model.layers.54.block_sparse_moe.experts.229.w3", "model.layers.54.block_sparse_moe.experts.230.w3", "model.layers.54.block_sparse_moe.experts.231.w3", "model.layers.54.block_sparse_moe.experts.232.w3", "model.layers.54.block_sparse_moe.experts.233.w3", "model.layers.54.block_sparse_moe.experts.234.w3", "model.layers.54.block_sparse_moe.experts.235.w3", "model.layers.54.block_sparse_moe.experts.236.w3", "model.layers.54.block_sparse_moe.experts.237.w3", "model.layers.54.block_sparse_moe.experts.238.w3", "model.layers.54.block_sparse_moe.experts.239.w3", "model.layers.54.block_sparse_moe.experts.240.w3", "model.layers.54.block_sparse_moe.experts.241.w3", "model.layers.54.block_sparse_moe.experts.242.w3", "model.layers.54.block_sparse_moe.experts.243.w3", "model.layers.54.block_sparse_moe.experts.244.w3", "model.layers.54.block_sparse_moe.experts.245.w3", "model.layers.54.block_sparse_moe.experts.246.w3", "model.layers.54.block_sparse_moe.experts.247.w3", "model.layers.54.block_sparse_moe.experts.248.w3", "model.layers.54.block_sparse_moe.experts.249.w3", "model.layers.54.block_sparse_moe.experts.250.w3", "model.layers.54.block_sparse_moe.experts.251.w3", "model.layers.54.block_sparse_moe.experts.252.w3", "model.layers.54.block_sparse_moe.experts.253.w3", "model.layers.54.block_sparse_moe.experts.254.w3", "model.layers.54.block_sparse_moe.experts.255.w3", "model.layers.54.block_sparse_moe.experts.0.w2", "model.layers.54.block_sparse_moe.experts.1.w2", "model.layers.54.block_sparse_moe.experts.2.w2", "model.layers.54.block_sparse_moe.experts.3.w2", "model.layers.54.block_sparse_moe.experts.4.w2", "model.layers.54.block_sparse_moe.experts.5.w2", "model.layers.54.block_sparse_moe.experts.6.w2", "model.layers.54.block_sparse_moe.experts.7.w2", "model.layers.54.block_sparse_moe.experts.8.w2", "model.layers.54.block_sparse_moe.experts.9.w2", "model.layers.54.block_sparse_moe.experts.10.w2", "model.layers.54.block_sparse_moe.experts.11.w2", "model.layers.54.block_sparse_moe.experts.12.w2", "model.layers.54.block_sparse_moe.experts.13.w2", "model.layers.54.block_sparse_moe.experts.14.w2", "model.layers.54.block_sparse_moe.experts.15.w2", "model.layers.54.block_sparse_moe.experts.16.w2", "model.layers.54.block_sparse_moe.experts.17.w2", "model.layers.54.block_sparse_moe.experts.18.w2", "model.layers.54.block_sparse_moe.experts.19.w2", "model.layers.54.block_sparse_moe.experts.20.w2", "model.layers.54.block_sparse_moe.experts.21.w2", "model.layers.54.block_sparse_moe.experts.22.w2", "model.layers.54.block_sparse_moe.experts.23.w2", "model.layers.54.block_sparse_moe.experts.24.w2", "model.layers.54.block_sparse_moe.experts.25.w2", "model.layers.54.block_sparse_moe.experts.26.w2", "model.layers.54.block_sparse_moe.experts.27.w2", "model.layers.54.block_sparse_moe.experts.28.w2", "model.layers.54.block_sparse_moe.experts.29.w2", "model.layers.54.block_sparse_moe.experts.30.w2", "model.layers.54.block_sparse_moe.experts.31.w2", "model.layers.54.block_sparse_moe.experts.32.w2", "model.layers.54.block_sparse_moe.experts.33.w2", "model.layers.54.block_sparse_moe.experts.34.w2", "model.layers.54.block_sparse_moe.experts.35.w2", "model.layers.54.block_sparse_moe.experts.36.w2", "model.layers.54.block_sparse_moe.experts.37.w2", "model.layers.54.block_sparse_moe.experts.38.w2", "model.layers.54.block_sparse_moe.experts.39.w2", "model.layers.54.block_sparse_moe.experts.40.w2", "model.layers.54.block_sparse_moe.experts.41.w2", "model.layers.54.block_sparse_moe.experts.42.w2", "model.layers.54.block_sparse_moe.experts.43.w2", "model.layers.54.block_sparse_moe.experts.44.w2", "model.layers.54.block_sparse_moe.experts.45.w2", "model.layers.54.block_sparse_moe.experts.46.w2", "model.layers.54.block_sparse_moe.experts.47.w2", "model.layers.54.block_sparse_moe.experts.48.w2", "model.layers.54.block_sparse_moe.experts.49.w2", "model.layers.54.block_sparse_moe.experts.50.w2", "model.layers.54.block_sparse_moe.experts.51.w2", "model.layers.54.block_sparse_moe.experts.52.w2", "model.layers.54.block_sparse_moe.experts.53.w2", "model.layers.54.block_sparse_moe.experts.54.w2", "model.layers.54.block_sparse_moe.experts.55.w2", "model.layers.54.block_sparse_moe.experts.56.w2", "model.layers.54.block_sparse_moe.experts.57.w2", "model.layers.54.block_sparse_moe.experts.58.w2", "model.layers.54.block_sparse_moe.experts.59.w2", "model.layers.54.block_sparse_moe.experts.60.w2", "model.layers.54.block_sparse_moe.experts.61.w2", "model.layers.54.block_sparse_moe.experts.62.w2", "model.layers.54.block_sparse_moe.experts.63.w2", "model.layers.54.block_sparse_moe.experts.64.w2", "model.layers.54.block_sparse_moe.experts.65.w2", "model.layers.54.block_sparse_moe.experts.66.w2", "model.layers.54.block_sparse_moe.experts.67.w2", "model.layers.54.block_sparse_moe.experts.68.w2", "model.layers.54.block_sparse_moe.experts.69.w2", "model.layers.54.block_sparse_moe.experts.70.w2", "model.layers.54.block_sparse_moe.experts.71.w2", "model.layers.54.block_sparse_moe.experts.72.w2", "model.layers.54.block_sparse_moe.experts.73.w2", "model.layers.54.block_sparse_moe.experts.74.w2", "model.layers.54.block_sparse_moe.experts.75.w2", "model.layers.54.block_sparse_moe.experts.76.w2", "model.layers.54.block_sparse_moe.experts.77.w2", "model.layers.54.block_sparse_moe.experts.78.w2", "model.layers.54.block_sparse_moe.experts.79.w2", "model.layers.54.block_sparse_moe.experts.80.w2", "model.layers.54.block_sparse_moe.experts.81.w2", "model.layers.54.block_sparse_moe.experts.82.w2", "model.layers.54.block_sparse_moe.experts.83.w2", "model.layers.54.block_sparse_moe.experts.84.w2", "model.layers.54.block_sparse_moe.experts.85.w2", "model.layers.54.block_sparse_moe.experts.86.w2", "model.layers.54.block_sparse_moe.experts.87.w2", "model.layers.54.block_sparse_moe.experts.88.w2", "model.layers.54.block_sparse_moe.experts.89.w2", "model.layers.54.block_sparse_moe.experts.90.w2", "model.layers.54.block_sparse_moe.experts.91.w2", "model.layers.54.block_sparse_moe.experts.92.w2", "model.layers.54.block_sparse_moe.experts.93.w2", "model.layers.54.block_sparse_moe.experts.94.w2", "model.layers.54.block_sparse_moe.experts.95.w2", "model.layers.54.block_sparse_moe.experts.96.w2", "model.layers.54.block_sparse_moe.experts.97.w2", "model.layers.54.block_sparse_moe.experts.98.w2", "model.layers.54.block_sparse_moe.experts.99.w2", "model.layers.54.block_sparse_moe.experts.100.w2", "model.layers.54.block_sparse_moe.experts.101.w2", "model.layers.54.block_sparse_moe.experts.102.w2", "model.layers.54.block_sparse_moe.experts.103.w2", "model.layers.54.block_sparse_moe.experts.104.w2", "model.layers.54.block_sparse_moe.experts.105.w2", "model.layers.54.block_sparse_moe.experts.106.w2", "model.layers.54.block_sparse_moe.experts.107.w2", "model.layers.54.block_sparse_moe.experts.108.w2", "model.layers.54.block_sparse_moe.experts.109.w2", "model.layers.54.block_sparse_moe.experts.110.w2", "model.layers.54.block_sparse_moe.experts.111.w2", "model.layers.54.block_sparse_moe.experts.112.w2", "model.layers.54.block_sparse_moe.experts.113.w2", "model.layers.54.block_sparse_moe.experts.114.w2", "model.layers.54.block_sparse_moe.experts.115.w2", "model.layers.54.block_sparse_moe.experts.116.w2", "model.layers.54.block_sparse_moe.experts.117.w2", "model.layers.54.block_sparse_moe.experts.118.w2", "model.layers.54.block_sparse_moe.experts.119.w2", "model.layers.54.block_sparse_moe.experts.120.w2", "model.layers.54.block_sparse_moe.experts.121.w2", "model.layers.54.block_sparse_moe.experts.122.w2", "model.layers.54.block_sparse_moe.experts.123.w2", "model.layers.54.block_sparse_moe.experts.124.w2", "model.layers.54.block_sparse_moe.experts.125.w2", "model.layers.54.block_sparse_moe.experts.126.w2", "model.layers.54.block_sparse_moe.experts.127.w2", "model.layers.54.block_sparse_moe.experts.128.w2", "model.layers.54.block_sparse_moe.experts.129.w2", "model.layers.54.block_sparse_moe.experts.130.w2", "model.layers.54.block_sparse_moe.experts.131.w2", "model.layers.54.block_sparse_moe.experts.132.w2", "model.layers.54.block_sparse_moe.experts.133.w2", "model.layers.54.block_sparse_moe.experts.134.w2", "model.layers.54.block_sparse_moe.experts.135.w2", "model.layers.54.block_sparse_moe.experts.136.w2", "model.layers.54.block_sparse_moe.experts.137.w2", "model.layers.54.block_sparse_moe.experts.138.w2", "model.layers.54.block_sparse_moe.experts.139.w2", "model.layers.54.block_sparse_moe.experts.140.w2", "model.layers.54.block_sparse_moe.experts.141.w2", "model.layers.54.block_sparse_moe.experts.142.w2", "model.layers.54.block_sparse_moe.experts.143.w2", "model.layers.54.block_sparse_moe.experts.144.w2", "model.layers.54.block_sparse_moe.experts.145.w2", "model.layers.54.block_sparse_moe.experts.146.w2", "model.layers.54.block_sparse_moe.experts.147.w2", "model.layers.54.block_sparse_moe.experts.148.w2", "model.layers.54.block_sparse_moe.experts.149.w2", "model.layers.54.block_sparse_moe.experts.150.w2", "model.layers.54.block_sparse_moe.experts.151.w2", "model.layers.54.block_sparse_moe.experts.152.w2", "model.layers.54.block_sparse_moe.experts.153.w2", "model.layers.54.block_sparse_moe.experts.154.w2", "model.layers.54.block_sparse_moe.experts.155.w2", "model.layers.54.block_sparse_moe.experts.156.w2", "model.layers.54.block_sparse_moe.experts.157.w2", "model.layers.54.block_sparse_moe.experts.158.w2", "model.layers.54.block_sparse_moe.experts.159.w2", "model.layers.54.block_sparse_moe.experts.160.w2", "model.layers.54.block_sparse_moe.experts.161.w2", "model.layers.54.block_sparse_moe.experts.162.w2", "model.layers.54.block_sparse_moe.experts.163.w2", "model.layers.54.block_sparse_moe.experts.164.w2", "model.layers.54.block_sparse_moe.experts.165.w2", "model.layers.54.block_sparse_moe.experts.166.w2", "model.layers.54.block_sparse_moe.experts.167.w2", "model.layers.54.block_sparse_moe.experts.168.w2", "model.layers.54.block_sparse_moe.experts.169.w2", "model.layers.54.block_sparse_moe.experts.170.w2", "model.layers.54.block_sparse_moe.experts.171.w2", "model.layers.54.block_sparse_moe.experts.172.w2", "model.layers.54.block_sparse_moe.experts.173.w2", "model.layers.54.block_sparse_moe.experts.174.w2", "model.layers.54.block_sparse_moe.experts.175.w2", "model.layers.54.block_sparse_moe.experts.176.w2", "model.layers.54.block_sparse_moe.experts.177.w2", "model.layers.54.block_sparse_moe.experts.178.w2", "model.layers.54.block_sparse_moe.experts.179.w2", "model.layers.54.block_sparse_moe.experts.180.w2", "model.layers.54.block_sparse_moe.experts.181.w2", "model.layers.54.block_sparse_moe.experts.182.w2", "model.layers.54.block_sparse_moe.experts.183.w2", "model.layers.54.block_sparse_moe.experts.184.w2", "model.layers.54.block_sparse_moe.experts.185.w2", "model.layers.54.block_sparse_moe.experts.186.w2", "model.layers.54.block_sparse_moe.experts.187.w2", "model.layers.54.block_sparse_moe.experts.188.w2", "model.layers.54.block_sparse_moe.experts.189.w2", "model.layers.54.block_sparse_moe.experts.190.w2", "model.layers.54.block_sparse_moe.experts.191.w2", "model.layers.54.block_sparse_moe.experts.192.w2", "model.layers.54.block_sparse_moe.experts.193.w2", "model.layers.54.block_sparse_moe.experts.194.w2", "model.layers.54.block_sparse_moe.experts.195.w2", "model.layers.54.block_sparse_moe.experts.196.w2", "model.layers.54.block_sparse_moe.experts.197.w2", "model.layers.54.block_sparse_moe.experts.198.w2", "model.layers.54.block_sparse_moe.experts.199.w2", "model.layers.54.block_sparse_moe.experts.200.w2", "model.layers.54.block_sparse_moe.experts.201.w2", "model.layers.54.block_sparse_moe.experts.202.w2", "model.layers.54.block_sparse_moe.experts.203.w2", "model.layers.54.block_sparse_moe.experts.204.w2", "model.layers.54.block_sparse_moe.experts.205.w2", "model.layers.54.block_sparse_moe.experts.206.w2", "model.layers.54.block_sparse_moe.experts.207.w2", "model.layers.54.block_sparse_moe.experts.208.w2", "model.layers.54.block_sparse_moe.experts.209.w2", "model.layers.54.block_sparse_moe.experts.210.w2", "model.layers.54.block_sparse_moe.experts.211.w2", "model.layers.54.block_sparse_moe.experts.212.w2", "model.layers.54.block_sparse_moe.experts.213.w2", "model.layers.54.block_sparse_moe.experts.214.w2", "model.layers.54.block_sparse_moe.experts.215.w2", "model.layers.54.block_sparse_moe.experts.216.w2", "model.layers.54.block_sparse_moe.experts.217.w2", "model.layers.54.block_sparse_moe.experts.218.w2", "model.layers.54.block_sparse_moe.experts.219.w2", "model.layers.54.block_sparse_moe.experts.220.w2", "model.layers.54.block_sparse_moe.experts.221.w2", "model.layers.54.block_sparse_moe.experts.222.w2", "model.layers.54.block_sparse_moe.experts.223.w2", "model.layers.54.block_sparse_moe.experts.224.w2", "model.layers.54.block_sparse_moe.experts.225.w2", "model.layers.54.block_sparse_moe.experts.226.w2", "model.layers.54.block_sparse_moe.experts.227.w2", "model.layers.54.block_sparse_moe.experts.228.w2", "model.layers.54.block_sparse_moe.experts.229.w2", "model.layers.54.block_sparse_moe.experts.230.w2", "model.layers.54.block_sparse_moe.experts.231.w2", "model.layers.54.block_sparse_moe.experts.232.w2", "model.layers.54.block_sparse_moe.experts.233.w2", "model.layers.54.block_sparse_moe.experts.234.w2", "model.layers.54.block_sparse_moe.experts.235.w2", "model.layers.54.block_sparse_moe.experts.236.w2", "model.layers.54.block_sparse_moe.experts.237.w2", "model.layers.54.block_sparse_moe.experts.238.w2", "model.layers.54.block_sparse_moe.experts.239.w2", "model.layers.54.block_sparse_moe.experts.240.w2", "model.layers.54.block_sparse_moe.experts.241.w2", "model.layers.54.block_sparse_moe.experts.242.w2", "model.layers.54.block_sparse_moe.experts.243.w2", "model.layers.54.block_sparse_moe.experts.244.w2", "model.layers.54.block_sparse_moe.experts.245.w2", "model.layers.54.block_sparse_moe.experts.246.w2", "model.layers.54.block_sparse_moe.experts.247.w2", "model.layers.54.block_sparse_moe.experts.248.w2", "model.layers.54.block_sparse_moe.experts.249.w2", "model.layers.54.block_sparse_moe.experts.250.w2", "model.layers.54.block_sparse_moe.experts.251.w2", "model.layers.54.block_sparse_moe.experts.252.w2", "model.layers.54.block_sparse_moe.experts.253.w2", "model.layers.54.block_sparse_moe.experts.254.w2", "model.layers.54.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00021186918020249523, "dbits": 3623878656 } ] }, { "idx": 110, "layers": [ "model.layers.55.self_attn.q_proj", "model.layers.55.self_attn.k_proj", "model.layers.55.self_attn.v_proj", "model.layers.55.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0012974200770258792, "dbits": 44040192 } ] }, { "idx": 111, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w1", "model.layers.55.block_sparse_moe.experts.1.w1", "model.layers.55.block_sparse_moe.experts.2.w1", "model.layers.55.block_sparse_moe.experts.3.w1", "model.layers.55.block_sparse_moe.experts.4.w1", "model.layers.55.block_sparse_moe.experts.5.w1", "model.layers.55.block_sparse_moe.experts.6.w1", "model.layers.55.block_sparse_moe.experts.7.w1", "model.layers.55.block_sparse_moe.experts.8.w1", "model.layers.55.block_sparse_moe.experts.9.w1", "model.layers.55.block_sparse_moe.experts.10.w1", "model.layers.55.block_sparse_moe.experts.11.w1", "model.layers.55.block_sparse_moe.experts.12.w1", "model.layers.55.block_sparse_moe.experts.13.w1", "model.layers.55.block_sparse_moe.experts.14.w1", "model.layers.55.block_sparse_moe.experts.15.w1", "model.layers.55.block_sparse_moe.experts.16.w1", "model.layers.55.block_sparse_moe.experts.17.w1", "model.layers.55.block_sparse_moe.experts.18.w1", "model.layers.55.block_sparse_moe.experts.19.w1", "model.layers.55.block_sparse_moe.experts.20.w1", "model.layers.55.block_sparse_moe.experts.21.w1", "model.layers.55.block_sparse_moe.experts.22.w1", "model.layers.55.block_sparse_moe.experts.23.w1", "model.layers.55.block_sparse_moe.experts.24.w1", "model.layers.55.block_sparse_moe.experts.25.w1", "model.layers.55.block_sparse_moe.experts.26.w1", "model.layers.55.block_sparse_moe.experts.27.w1", "model.layers.55.block_sparse_moe.experts.28.w1", "model.layers.55.block_sparse_moe.experts.29.w1", "model.layers.55.block_sparse_moe.experts.30.w1", "model.layers.55.block_sparse_moe.experts.31.w1", "model.layers.55.block_sparse_moe.experts.32.w1", "model.layers.55.block_sparse_moe.experts.33.w1", "model.layers.55.block_sparse_moe.experts.34.w1", "model.layers.55.block_sparse_moe.experts.35.w1", "model.layers.55.block_sparse_moe.experts.36.w1", "model.layers.55.block_sparse_moe.experts.37.w1", "model.layers.55.block_sparse_moe.experts.38.w1", "model.layers.55.block_sparse_moe.experts.39.w1", "model.layers.55.block_sparse_moe.experts.40.w1", "model.layers.55.block_sparse_moe.experts.41.w1", "model.layers.55.block_sparse_moe.experts.42.w1", "model.layers.55.block_sparse_moe.experts.43.w1", "model.layers.55.block_sparse_moe.experts.44.w1", "model.layers.55.block_sparse_moe.experts.45.w1", "model.layers.55.block_sparse_moe.experts.46.w1", "model.layers.55.block_sparse_moe.experts.47.w1", "model.layers.55.block_sparse_moe.experts.48.w1", "model.layers.55.block_sparse_moe.experts.49.w1", "model.layers.55.block_sparse_moe.experts.50.w1", "model.layers.55.block_sparse_moe.experts.51.w1", "model.layers.55.block_sparse_moe.experts.52.w1", "model.layers.55.block_sparse_moe.experts.53.w1", "model.layers.55.block_sparse_moe.experts.54.w1", "model.layers.55.block_sparse_moe.experts.55.w1", "model.layers.55.block_sparse_moe.experts.56.w1", "model.layers.55.block_sparse_moe.experts.57.w1", "model.layers.55.block_sparse_moe.experts.58.w1", "model.layers.55.block_sparse_moe.experts.59.w1", "model.layers.55.block_sparse_moe.experts.60.w1", "model.layers.55.block_sparse_moe.experts.61.w1", "model.layers.55.block_sparse_moe.experts.62.w1", "model.layers.55.block_sparse_moe.experts.63.w1", "model.layers.55.block_sparse_moe.experts.64.w1", "model.layers.55.block_sparse_moe.experts.65.w1", "model.layers.55.block_sparse_moe.experts.66.w1", "model.layers.55.block_sparse_moe.experts.67.w1", "model.layers.55.block_sparse_moe.experts.68.w1", "model.layers.55.block_sparse_moe.experts.69.w1", "model.layers.55.block_sparse_moe.experts.70.w1", "model.layers.55.block_sparse_moe.experts.71.w1", "model.layers.55.block_sparse_moe.experts.72.w1", "model.layers.55.block_sparse_moe.experts.73.w1", "model.layers.55.block_sparse_moe.experts.74.w1", "model.layers.55.block_sparse_moe.experts.75.w1", "model.layers.55.block_sparse_moe.experts.76.w1", "model.layers.55.block_sparse_moe.experts.77.w1", "model.layers.55.block_sparse_moe.experts.78.w1", "model.layers.55.block_sparse_moe.experts.79.w1", "model.layers.55.block_sparse_moe.experts.80.w1", "model.layers.55.block_sparse_moe.experts.81.w1", "model.layers.55.block_sparse_moe.experts.82.w1", "model.layers.55.block_sparse_moe.experts.83.w1", "model.layers.55.block_sparse_moe.experts.84.w1", "model.layers.55.block_sparse_moe.experts.85.w1", "model.layers.55.block_sparse_moe.experts.86.w1", "model.layers.55.block_sparse_moe.experts.87.w1", "model.layers.55.block_sparse_moe.experts.88.w1", "model.layers.55.block_sparse_moe.experts.89.w1", "model.layers.55.block_sparse_moe.experts.90.w1", "model.layers.55.block_sparse_moe.experts.91.w1", "model.layers.55.block_sparse_moe.experts.92.w1", "model.layers.55.block_sparse_moe.experts.93.w1", "model.layers.55.block_sparse_moe.experts.94.w1", "model.layers.55.block_sparse_moe.experts.95.w1", "model.layers.55.block_sparse_moe.experts.96.w1", "model.layers.55.block_sparse_moe.experts.97.w1", "model.layers.55.block_sparse_moe.experts.98.w1", "model.layers.55.block_sparse_moe.experts.99.w1", "model.layers.55.block_sparse_moe.experts.100.w1", "model.layers.55.block_sparse_moe.experts.101.w1", "model.layers.55.block_sparse_moe.experts.102.w1", "model.layers.55.block_sparse_moe.experts.103.w1", "model.layers.55.block_sparse_moe.experts.104.w1", "model.layers.55.block_sparse_moe.experts.105.w1", "model.layers.55.block_sparse_moe.experts.106.w1", "model.layers.55.block_sparse_moe.experts.107.w1", "model.layers.55.block_sparse_moe.experts.108.w1", "model.layers.55.block_sparse_moe.experts.109.w1", "model.layers.55.block_sparse_moe.experts.110.w1", "model.layers.55.block_sparse_moe.experts.111.w1", "model.layers.55.block_sparse_moe.experts.112.w1", "model.layers.55.block_sparse_moe.experts.113.w1", "model.layers.55.block_sparse_moe.experts.114.w1", "model.layers.55.block_sparse_moe.experts.115.w1", "model.layers.55.block_sparse_moe.experts.116.w1", "model.layers.55.block_sparse_moe.experts.117.w1", "model.layers.55.block_sparse_moe.experts.118.w1", "model.layers.55.block_sparse_moe.experts.119.w1", "model.layers.55.block_sparse_moe.experts.120.w1", "model.layers.55.block_sparse_moe.experts.121.w1", "model.layers.55.block_sparse_moe.experts.122.w1", "model.layers.55.block_sparse_moe.experts.123.w1", "model.layers.55.block_sparse_moe.experts.124.w1", "model.layers.55.block_sparse_moe.experts.125.w1", "model.layers.55.block_sparse_moe.experts.126.w1", "model.layers.55.block_sparse_moe.experts.127.w1", "model.layers.55.block_sparse_moe.experts.128.w1", "model.layers.55.block_sparse_moe.experts.129.w1", "model.layers.55.block_sparse_moe.experts.130.w1", "model.layers.55.block_sparse_moe.experts.131.w1", "model.layers.55.block_sparse_moe.experts.132.w1", "model.layers.55.block_sparse_moe.experts.133.w1", "model.layers.55.block_sparse_moe.experts.134.w1", "model.layers.55.block_sparse_moe.experts.135.w1", "model.layers.55.block_sparse_moe.experts.136.w1", "model.layers.55.block_sparse_moe.experts.137.w1", "model.layers.55.block_sparse_moe.experts.138.w1", "model.layers.55.block_sparse_moe.experts.139.w1", "model.layers.55.block_sparse_moe.experts.140.w1", "model.layers.55.block_sparse_moe.experts.141.w1", "model.layers.55.block_sparse_moe.experts.142.w1", "model.layers.55.block_sparse_moe.experts.143.w1", "model.layers.55.block_sparse_moe.experts.144.w1", "model.layers.55.block_sparse_moe.experts.145.w1", "model.layers.55.block_sparse_moe.experts.146.w1", "model.layers.55.block_sparse_moe.experts.147.w1", "model.layers.55.block_sparse_moe.experts.148.w1", "model.layers.55.block_sparse_moe.experts.149.w1", "model.layers.55.block_sparse_moe.experts.150.w1", "model.layers.55.block_sparse_moe.experts.151.w1", "model.layers.55.block_sparse_moe.experts.152.w1", "model.layers.55.block_sparse_moe.experts.153.w1", "model.layers.55.block_sparse_moe.experts.154.w1", "model.layers.55.block_sparse_moe.experts.155.w1", "model.layers.55.block_sparse_moe.experts.156.w1", "model.layers.55.block_sparse_moe.experts.157.w1", "model.layers.55.block_sparse_moe.experts.158.w1", "model.layers.55.block_sparse_moe.experts.159.w1", "model.layers.55.block_sparse_moe.experts.160.w1", "model.layers.55.block_sparse_moe.experts.161.w1", "model.layers.55.block_sparse_moe.experts.162.w1", "model.layers.55.block_sparse_moe.experts.163.w1", "model.layers.55.block_sparse_moe.experts.164.w1", "model.layers.55.block_sparse_moe.experts.165.w1", "model.layers.55.block_sparse_moe.experts.166.w1", "model.layers.55.block_sparse_moe.experts.167.w1", "model.layers.55.block_sparse_moe.experts.168.w1", "model.layers.55.block_sparse_moe.experts.169.w1", "model.layers.55.block_sparse_moe.experts.170.w1", "model.layers.55.block_sparse_moe.experts.171.w1", "model.layers.55.block_sparse_moe.experts.172.w1", "model.layers.55.block_sparse_moe.experts.173.w1", "model.layers.55.block_sparse_moe.experts.174.w1", "model.layers.55.block_sparse_moe.experts.175.w1", "model.layers.55.block_sparse_moe.experts.176.w1", "model.layers.55.block_sparse_moe.experts.177.w1", "model.layers.55.block_sparse_moe.experts.178.w1", "model.layers.55.block_sparse_moe.experts.179.w1", "model.layers.55.block_sparse_moe.experts.180.w1", "model.layers.55.block_sparse_moe.experts.181.w1", "model.layers.55.block_sparse_moe.experts.182.w1", "model.layers.55.block_sparse_moe.experts.183.w1", "model.layers.55.block_sparse_moe.experts.184.w1", "model.layers.55.block_sparse_moe.experts.185.w1", "model.layers.55.block_sparse_moe.experts.186.w1", "model.layers.55.block_sparse_moe.experts.187.w1", "model.layers.55.block_sparse_moe.experts.188.w1", "model.layers.55.block_sparse_moe.experts.189.w1", "model.layers.55.block_sparse_moe.experts.190.w1", "model.layers.55.block_sparse_moe.experts.191.w1", "model.layers.55.block_sparse_moe.experts.192.w1", "model.layers.55.block_sparse_moe.experts.193.w1", "model.layers.55.block_sparse_moe.experts.194.w1", "model.layers.55.block_sparse_moe.experts.195.w1", "model.layers.55.block_sparse_moe.experts.196.w1", "model.layers.55.block_sparse_moe.experts.197.w1", "model.layers.55.block_sparse_moe.experts.198.w1", "model.layers.55.block_sparse_moe.experts.199.w1", "model.layers.55.block_sparse_moe.experts.200.w1", "model.layers.55.block_sparse_moe.experts.201.w1", "model.layers.55.block_sparse_moe.experts.202.w1", "model.layers.55.block_sparse_moe.experts.203.w1", "model.layers.55.block_sparse_moe.experts.204.w1", "model.layers.55.block_sparse_moe.experts.205.w1", "model.layers.55.block_sparse_moe.experts.206.w1", "model.layers.55.block_sparse_moe.experts.207.w1", "model.layers.55.block_sparse_moe.experts.208.w1", "model.layers.55.block_sparse_moe.experts.209.w1", "model.layers.55.block_sparse_moe.experts.210.w1", "model.layers.55.block_sparse_moe.experts.211.w1", "model.layers.55.block_sparse_moe.experts.212.w1", "model.layers.55.block_sparse_moe.experts.213.w1", "model.layers.55.block_sparse_moe.experts.214.w1", "model.layers.55.block_sparse_moe.experts.215.w1", "model.layers.55.block_sparse_moe.experts.216.w1", "model.layers.55.block_sparse_moe.experts.217.w1", "model.layers.55.block_sparse_moe.experts.218.w1", "model.layers.55.block_sparse_moe.experts.219.w1", "model.layers.55.block_sparse_moe.experts.220.w1", "model.layers.55.block_sparse_moe.experts.221.w1", "model.layers.55.block_sparse_moe.experts.222.w1", "model.layers.55.block_sparse_moe.experts.223.w1", "model.layers.55.block_sparse_moe.experts.224.w1", "model.layers.55.block_sparse_moe.experts.225.w1", "model.layers.55.block_sparse_moe.experts.226.w1", "model.layers.55.block_sparse_moe.experts.227.w1", "model.layers.55.block_sparse_moe.experts.228.w1", "model.layers.55.block_sparse_moe.experts.229.w1", "model.layers.55.block_sparse_moe.experts.230.w1", "model.layers.55.block_sparse_moe.experts.231.w1", "model.layers.55.block_sparse_moe.experts.232.w1", "model.layers.55.block_sparse_moe.experts.233.w1", "model.layers.55.block_sparse_moe.experts.234.w1", "model.layers.55.block_sparse_moe.experts.235.w1", "model.layers.55.block_sparse_moe.experts.236.w1", "model.layers.55.block_sparse_moe.experts.237.w1", "model.layers.55.block_sparse_moe.experts.238.w1", "model.layers.55.block_sparse_moe.experts.239.w1", "model.layers.55.block_sparse_moe.experts.240.w1", "model.layers.55.block_sparse_moe.experts.241.w1", "model.layers.55.block_sparse_moe.experts.242.w1", "model.layers.55.block_sparse_moe.experts.243.w1", "model.layers.55.block_sparse_moe.experts.244.w1", "model.layers.55.block_sparse_moe.experts.245.w1", "model.layers.55.block_sparse_moe.experts.246.w1", "model.layers.55.block_sparse_moe.experts.247.w1", "model.layers.55.block_sparse_moe.experts.248.w1", "model.layers.55.block_sparse_moe.experts.249.w1", "model.layers.55.block_sparse_moe.experts.250.w1", "model.layers.55.block_sparse_moe.experts.251.w1", "model.layers.55.block_sparse_moe.experts.252.w1", "model.layers.55.block_sparse_moe.experts.253.w1", "model.layers.55.block_sparse_moe.experts.254.w1", "model.layers.55.block_sparse_moe.experts.255.w1", "model.layers.55.block_sparse_moe.experts.0.w3", "model.layers.55.block_sparse_moe.experts.1.w3", "model.layers.55.block_sparse_moe.experts.2.w3", "model.layers.55.block_sparse_moe.experts.3.w3", "model.layers.55.block_sparse_moe.experts.4.w3", "model.layers.55.block_sparse_moe.experts.5.w3", "model.layers.55.block_sparse_moe.experts.6.w3", "model.layers.55.block_sparse_moe.experts.7.w3", "model.layers.55.block_sparse_moe.experts.8.w3", "model.layers.55.block_sparse_moe.experts.9.w3", "model.layers.55.block_sparse_moe.experts.10.w3", "model.layers.55.block_sparse_moe.experts.11.w3", "model.layers.55.block_sparse_moe.experts.12.w3", "model.layers.55.block_sparse_moe.experts.13.w3", "model.layers.55.block_sparse_moe.experts.14.w3", "model.layers.55.block_sparse_moe.experts.15.w3", "model.layers.55.block_sparse_moe.experts.16.w3", "model.layers.55.block_sparse_moe.experts.17.w3", "model.layers.55.block_sparse_moe.experts.18.w3", "model.layers.55.block_sparse_moe.experts.19.w3", "model.layers.55.block_sparse_moe.experts.20.w3", "model.layers.55.block_sparse_moe.experts.21.w3", "model.layers.55.block_sparse_moe.experts.22.w3", "model.layers.55.block_sparse_moe.experts.23.w3", "model.layers.55.block_sparse_moe.experts.24.w3", "model.layers.55.block_sparse_moe.experts.25.w3", "model.layers.55.block_sparse_moe.experts.26.w3", "model.layers.55.block_sparse_moe.experts.27.w3", "model.layers.55.block_sparse_moe.experts.28.w3", "model.layers.55.block_sparse_moe.experts.29.w3", "model.layers.55.block_sparse_moe.experts.30.w3", "model.layers.55.block_sparse_moe.experts.31.w3", "model.layers.55.block_sparse_moe.experts.32.w3", "model.layers.55.block_sparse_moe.experts.33.w3", "model.layers.55.block_sparse_moe.experts.34.w3", "model.layers.55.block_sparse_moe.experts.35.w3", "model.layers.55.block_sparse_moe.experts.36.w3", "model.layers.55.block_sparse_moe.experts.37.w3", "model.layers.55.block_sparse_moe.experts.38.w3", "model.layers.55.block_sparse_moe.experts.39.w3", "model.layers.55.block_sparse_moe.experts.40.w3", "model.layers.55.block_sparse_moe.experts.41.w3", "model.layers.55.block_sparse_moe.experts.42.w3", "model.layers.55.block_sparse_moe.experts.43.w3", "model.layers.55.block_sparse_moe.experts.44.w3", "model.layers.55.block_sparse_moe.experts.45.w3", "model.layers.55.block_sparse_moe.experts.46.w3", "model.layers.55.block_sparse_moe.experts.47.w3", "model.layers.55.block_sparse_moe.experts.48.w3", "model.layers.55.block_sparse_moe.experts.49.w3", "model.layers.55.block_sparse_moe.experts.50.w3", "model.layers.55.block_sparse_moe.experts.51.w3", "model.layers.55.block_sparse_moe.experts.52.w3", "model.layers.55.block_sparse_moe.experts.53.w3", "model.layers.55.block_sparse_moe.experts.54.w3", "model.layers.55.block_sparse_moe.experts.55.w3", "model.layers.55.block_sparse_moe.experts.56.w3", "model.layers.55.block_sparse_moe.experts.57.w3", "model.layers.55.block_sparse_moe.experts.58.w3", "model.layers.55.block_sparse_moe.experts.59.w3", "model.layers.55.block_sparse_moe.experts.60.w3", "model.layers.55.block_sparse_moe.experts.61.w3", "model.layers.55.block_sparse_moe.experts.62.w3", "model.layers.55.block_sparse_moe.experts.63.w3", "model.layers.55.block_sparse_moe.experts.64.w3", "model.layers.55.block_sparse_moe.experts.65.w3", "model.layers.55.block_sparse_moe.experts.66.w3", "model.layers.55.block_sparse_moe.experts.67.w3", "model.layers.55.block_sparse_moe.experts.68.w3", "model.layers.55.block_sparse_moe.experts.69.w3", "model.layers.55.block_sparse_moe.experts.70.w3", "model.layers.55.block_sparse_moe.experts.71.w3", "model.layers.55.block_sparse_moe.experts.72.w3", "model.layers.55.block_sparse_moe.experts.73.w3", "model.layers.55.block_sparse_moe.experts.74.w3", "model.layers.55.block_sparse_moe.experts.75.w3", "model.layers.55.block_sparse_moe.experts.76.w3", "model.layers.55.block_sparse_moe.experts.77.w3", "model.layers.55.block_sparse_moe.experts.78.w3", "model.layers.55.block_sparse_moe.experts.79.w3", "model.layers.55.block_sparse_moe.experts.80.w3", "model.layers.55.block_sparse_moe.experts.81.w3", "model.layers.55.block_sparse_moe.experts.82.w3", "model.layers.55.block_sparse_moe.experts.83.w3", "model.layers.55.block_sparse_moe.experts.84.w3", "model.layers.55.block_sparse_moe.experts.85.w3", "model.layers.55.block_sparse_moe.experts.86.w3", "model.layers.55.block_sparse_moe.experts.87.w3", "model.layers.55.block_sparse_moe.experts.88.w3", "model.layers.55.block_sparse_moe.experts.89.w3", "model.layers.55.block_sparse_moe.experts.90.w3", "model.layers.55.block_sparse_moe.experts.91.w3", "model.layers.55.block_sparse_moe.experts.92.w3", "model.layers.55.block_sparse_moe.experts.93.w3", "model.layers.55.block_sparse_moe.experts.94.w3", "model.layers.55.block_sparse_moe.experts.95.w3", "model.layers.55.block_sparse_moe.experts.96.w3", "model.layers.55.block_sparse_moe.experts.97.w3", "model.layers.55.block_sparse_moe.experts.98.w3", "model.layers.55.block_sparse_moe.experts.99.w3", "model.layers.55.block_sparse_moe.experts.100.w3", "model.layers.55.block_sparse_moe.experts.101.w3", "model.layers.55.block_sparse_moe.experts.102.w3", "model.layers.55.block_sparse_moe.experts.103.w3", "model.layers.55.block_sparse_moe.experts.104.w3", "model.layers.55.block_sparse_moe.experts.105.w3", "model.layers.55.block_sparse_moe.experts.106.w3", "model.layers.55.block_sparse_moe.experts.107.w3", "model.layers.55.block_sparse_moe.experts.108.w3", "model.layers.55.block_sparse_moe.experts.109.w3", "model.layers.55.block_sparse_moe.experts.110.w3", "model.layers.55.block_sparse_moe.experts.111.w3", "model.layers.55.block_sparse_moe.experts.112.w3", "model.layers.55.block_sparse_moe.experts.113.w3", "model.layers.55.block_sparse_moe.experts.114.w3", "model.layers.55.block_sparse_moe.experts.115.w3", "model.layers.55.block_sparse_moe.experts.116.w3", "model.layers.55.block_sparse_moe.experts.117.w3", "model.layers.55.block_sparse_moe.experts.118.w3", "model.layers.55.block_sparse_moe.experts.119.w3", "model.layers.55.block_sparse_moe.experts.120.w3", "model.layers.55.block_sparse_moe.experts.121.w3", "model.layers.55.block_sparse_moe.experts.122.w3", "model.layers.55.block_sparse_moe.experts.123.w3", "model.layers.55.block_sparse_moe.experts.124.w3", "model.layers.55.block_sparse_moe.experts.125.w3", "model.layers.55.block_sparse_moe.experts.126.w3", "model.layers.55.block_sparse_moe.experts.127.w3", "model.layers.55.block_sparse_moe.experts.128.w3", "model.layers.55.block_sparse_moe.experts.129.w3", "model.layers.55.block_sparse_moe.experts.130.w3", "model.layers.55.block_sparse_moe.experts.131.w3", "model.layers.55.block_sparse_moe.experts.132.w3", "model.layers.55.block_sparse_moe.experts.133.w3", "model.layers.55.block_sparse_moe.experts.134.w3", "model.layers.55.block_sparse_moe.experts.135.w3", "model.layers.55.block_sparse_moe.experts.136.w3", "model.layers.55.block_sparse_moe.experts.137.w3", "model.layers.55.block_sparse_moe.experts.138.w3", "model.layers.55.block_sparse_moe.experts.139.w3", "model.layers.55.block_sparse_moe.experts.140.w3", "model.layers.55.block_sparse_moe.experts.141.w3", "model.layers.55.block_sparse_moe.experts.142.w3", "model.layers.55.block_sparse_moe.experts.143.w3", "model.layers.55.block_sparse_moe.experts.144.w3", "model.layers.55.block_sparse_moe.experts.145.w3", "model.layers.55.block_sparse_moe.experts.146.w3", "model.layers.55.block_sparse_moe.experts.147.w3", "model.layers.55.block_sparse_moe.experts.148.w3", "model.layers.55.block_sparse_moe.experts.149.w3", "model.layers.55.block_sparse_moe.experts.150.w3", "model.layers.55.block_sparse_moe.experts.151.w3", "model.layers.55.block_sparse_moe.experts.152.w3", "model.layers.55.block_sparse_moe.experts.153.w3", "model.layers.55.block_sparse_moe.experts.154.w3", "model.layers.55.block_sparse_moe.experts.155.w3", "model.layers.55.block_sparse_moe.experts.156.w3", "model.layers.55.block_sparse_moe.experts.157.w3", "model.layers.55.block_sparse_moe.experts.158.w3", "model.layers.55.block_sparse_moe.experts.159.w3", "model.layers.55.block_sparse_moe.experts.160.w3", "model.layers.55.block_sparse_moe.experts.161.w3", "model.layers.55.block_sparse_moe.experts.162.w3", "model.layers.55.block_sparse_moe.experts.163.w3", "model.layers.55.block_sparse_moe.experts.164.w3", "model.layers.55.block_sparse_moe.experts.165.w3", "model.layers.55.block_sparse_moe.experts.166.w3", "model.layers.55.block_sparse_moe.experts.167.w3", "model.layers.55.block_sparse_moe.experts.168.w3", "model.layers.55.block_sparse_moe.experts.169.w3", "model.layers.55.block_sparse_moe.experts.170.w3", "model.layers.55.block_sparse_moe.experts.171.w3", "model.layers.55.block_sparse_moe.experts.172.w3", "model.layers.55.block_sparse_moe.experts.173.w3", "model.layers.55.block_sparse_moe.experts.174.w3", "model.layers.55.block_sparse_moe.experts.175.w3", "model.layers.55.block_sparse_moe.experts.176.w3", "model.layers.55.block_sparse_moe.experts.177.w3", "model.layers.55.block_sparse_moe.experts.178.w3", "model.layers.55.block_sparse_moe.experts.179.w3", "model.layers.55.block_sparse_moe.experts.180.w3", "model.layers.55.block_sparse_moe.experts.181.w3", "model.layers.55.block_sparse_moe.experts.182.w3", "model.layers.55.block_sparse_moe.experts.183.w3", "model.layers.55.block_sparse_moe.experts.184.w3", "model.layers.55.block_sparse_moe.experts.185.w3", "model.layers.55.block_sparse_moe.experts.186.w3", "model.layers.55.block_sparse_moe.experts.187.w3", "model.layers.55.block_sparse_moe.experts.188.w3", "model.layers.55.block_sparse_moe.experts.189.w3", "model.layers.55.block_sparse_moe.experts.190.w3", "model.layers.55.block_sparse_moe.experts.191.w3", "model.layers.55.block_sparse_moe.experts.192.w3", "model.layers.55.block_sparse_moe.experts.193.w3", "model.layers.55.block_sparse_moe.experts.194.w3", "model.layers.55.block_sparse_moe.experts.195.w3", "model.layers.55.block_sparse_moe.experts.196.w3", "model.layers.55.block_sparse_moe.experts.197.w3", "model.layers.55.block_sparse_moe.experts.198.w3", "model.layers.55.block_sparse_moe.experts.199.w3", "model.layers.55.block_sparse_moe.experts.200.w3", "model.layers.55.block_sparse_moe.experts.201.w3", "model.layers.55.block_sparse_moe.experts.202.w3", "model.layers.55.block_sparse_moe.experts.203.w3", "model.layers.55.block_sparse_moe.experts.204.w3", "model.layers.55.block_sparse_moe.experts.205.w3", "model.layers.55.block_sparse_moe.experts.206.w3", "model.layers.55.block_sparse_moe.experts.207.w3", "model.layers.55.block_sparse_moe.experts.208.w3", "model.layers.55.block_sparse_moe.experts.209.w3", "model.layers.55.block_sparse_moe.experts.210.w3", "model.layers.55.block_sparse_moe.experts.211.w3", "model.layers.55.block_sparse_moe.experts.212.w3", "model.layers.55.block_sparse_moe.experts.213.w3", "model.layers.55.block_sparse_moe.experts.214.w3", "model.layers.55.block_sparse_moe.experts.215.w3", "model.layers.55.block_sparse_moe.experts.216.w3", "model.layers.55.block_sparse_moe.experts.217.w3", "model.layers.55.block_sparse_moe.experts.218.w3", "model.layers.55.block_sparse_moe.experts.219.w3", "model.layers.55.block_sparse_moe.experts.220.w3", "model.layers.55.block_sparse_moe.experts.221.w3", "model.layers.55.block_sparse_moe.experts.222.w3", "model.layers.55.block_sparse_moe.experts.223.w3", "model.layers.55.block_sparse_moe.experts.224.w3", "model.layers.55.block_sparse_moe.experts.225.w3", "model.layers.55.block_sparse_moe.experts.226.w3", "model.layers.55.block_sparse_moe.experts.227.w3", "model.layers.55.block_sparse_moe.experts.228.w3", "model.layers.55.block_sparse_moe.experts.229.w3", "model.layers.55.block_sparse_moe.experts.230.w3", "model.layers.55.block_sparse_moe.experts.231.w3", "model.layers.55.block_sparse_moe.experts.232.w3", "model.layers.55.block_sparse_moe.experts.233.w3", "model.layers.55.block_sparse_moe.experts.234.w3", "model.layers.55.block_sparse_moe.experts.235.w3", "model.layers.55.block_sparse_moe.experts.236.w3", "model.layers.55.block_sparse_moe.experts.237.w3", "model.layers.55.block_sparse_moe.experts.238.w3", "model.layers.55.block_sparse_moe.experts.239.w3", "model.layers.55.block_sparse_moe.experts.240.w3", "model.layers.55.block_sparse_moe.experts.241.w3", "model.layers.55.block_sparse_moe.experts.242.w3", "model.layers.55.block_sparse_moe.experts.243.w3", "model.layers.55.block_sparse_moe.experts.244.w3", "model.layers.55.block_sparse_moe.experts.245.w3", "model.layers.55.block_sparse_moe.experts.246.w3", "model.layers.55.block_sparse_moe.experts.247.w3", "model.layers.55.block_sparse_moe.experts.248.w3", "model.layers.55.block_sparse_moe.experts.249.w3", "model.layers.55.block_sparse_moe.experts.250.w3", "model.layers.55.block_sparse_moe.experts.251.w3", "model.layers.55.block_sparse_moe.experts.252.w3", "model.layers.55.block_sparse_moe.experts.253.w3", "model.layers.55.block_sparse_moe.experts.254.w3", "model.layers.55.block_sparse_moe.experts.255.w3", "model.layers.55.block_sparse_moe.experts.0.w2", "model.layers.55.block_sparse_moe.experts.1.w2", "model.layers.55.block_sparse_moe.experts.2.w2", "model.layers.55.block_sparse_moe.experts.3.w2", "model.layers.55.block_sparse_moe.experts.4.w2", "model.layers.55.block_sparse_moe.experts.5.w2", "model.layers.55.block_sparse_moe.experts.6.w2", "model.layers.55.block_sparse_moe.experts.7.w2", "model.layers.55.block_sparse_moe.experts.8.w2", "model.layers.55.block_sparse_moe.experts.9.w2", "model.layers.55.block_sparse_moe.experts.10.w2", "model.layers.55.block_sparse_moe.experts.11.w2", "model.layers.55.block_sparse_moe.experts.12.w2", "model.layers.55.block_sparse_moe.experts.13.w2", "model.layers.55.block_sparse_moe.experts.14.w2", "model.layers.55.block_sparse_moe.experts.15.w2", "model.layers.55.block_sparse_moe.experts.16.w2", "model.layers.55.block_sparse_moe.experts.17.w2", "model.layers.55.block_sparse_moe.experts.18.w2", "model.layers.55.block_sparse_moe.experts.19.w2", "model.layers.55.block_sparse_moe.experts.20.w2", "model.layers.55.block_sparse_moe.experts.21.w2", "model.layers.55.block_sparse_moe.experts.22.w2", "model.layers.55.block_sparse_moe.experts.23.w2", "model.layers.55.block_sparse_moe.experts.24.w2", "model.layers.55.block_sparse_moe.experts.25.w2", "model.layers.55.block_sparse_moe.experts.26.w2", "model.layers.55.block_sparse_moe.experts.27.w2", "model.layers.55.block_sparse_moe.experts.28.w2", "model.layers.55.block_sparse_moe.experts.29.w2", "model.layers.55.block_sparse_moe.experts.30.w2", "model.layers.55.block_sparse_moe.experts.31.w2", "model.layers.55.block_sparse_moe.experts.32.w2", "model.layers.55.block_sparse_moe.experts.33.w2", "model.layers.55.block_sparse_moe.experts.34.w2", "model.layers.55.block_sparse_moe.experts.35.w2", "model.layers.55.block_sparse_moe.experts.36.w2", "model.layers.55.block_sparse_moe.experts.37.w2", "model.layers.55.block_sparse_moe.experts.38.w2", "model.layers.55.block_sparse_moe.experts.39.w2", "model.layers.55.block_sparse_moe.experts.40.w2", "model.layers.55.block_sparse_moe.experts.41.w2", "model.layers.55.block_sparse_moe.experts.42.w2", "model.layers.55.block_sparse_moe.experts.43.w2", "model.layers.55.block_sparse_moe.experts.44.w2", "model.layers.55.block_sparse_moe.experts.45.w2", "model.layers.55.block_sparse_moe.experts.46.w2", "model.layers.55.block_sparse_moe.experts.47.w2", "model.layers.55.block_sparse_moe.experts.48.w2", "model.layers.55.block_sparse_moe.experts.49.w2", "model.layers.55.block_sparse_moe.experts.50.w2", "model.layers.55.block_sparse_moe.experts.51.w2", "model.layers.55.block_sparse_moe.experts.52.w2", "model.layers.55.block_sparse_moe.experts.53.w2", "model.layers.55.block_sparse_moe.experts.54.w2", "model.layers.55.block_sparse_moe.experts.55.w2", "model.layers.55.block_sparse_moe.experts.56.w2", "model.layers.55.block_sparse_moe.experts.57.w2", "model.layers.55.block_sparse_moe.experts.58.w2", "model.layers.55.block_sparse_moe.experts.59.w2", "model.layers.55.block_sparse_moe.experts.60.w2", "model.layers.55.block_sparse_moe.experts.61.w2", "model.layers.55.block_sparse_moe.experts.62.w2", "model.layers.55.block_sparse_moe.experts.63.w2", "model.layers.55.block_sparse_moe.experts.64.w2", "model.layers.55.block_sparse_moe.experts.65.w2", "model.layers.55.block_sparse_moe.experts.66.w2", "model.layers.55.block_sparse_moe.experts.67.w2", "model.layers.55.block_sparse_moe.experts.68.w2", "model.layers.55.block_sparse_moe.experts.69.w2", "model.layers.55.block_sparse_moe.experts.70.w2", "model.layers.55.block_sparse_moe.experts.71.w2", "model.layers.55.block_sparse_moe.experts.72.w2", "model.layers.55.block_sparse_moe.experts.73.w2", "model.layers.55.block_sparse_moe.experts.74.w2", "model.layers.55.block_sparse_moe.experts.75.w2", "model.layers.55.block_sparse_moe.experts.76.w2", "model.layers.55.block_sparse_moe.experts.77.w2", "model.layers.55.block_sparse_moe.experts.78.w2", "model.layers.55.block_sparse_moe.experts.79.w2", "model.layers.55.block_sparse_moe.experts.80.w2", "model.layers.55.block_sparse_moe.experts.81.w2", "model.layers.55.block_sparse_moe.experts.82.w2", "model.layers.55.block_sparse_moe.experts.83.w2", "model.layers.55.block_sparse_moe.experts.84.w2", "model.layers.55.block_sparse_moe.experts.85.w2", "model.layers.55.block_sparse_moe.experts.86.w2", "model.layers.55.block_sparse_moe.experts.87.w2", "model.layers.55.block_sparse_moe.experts.88.w2", "model.layers.55.block_sparse_moe.experts.89.w2", "model.layers.55.block_sparse_moe.experts.90.w2", "model.layers.55.block_sparse_moe.experts.91.w2", "model.layers.55.block_sparse_moe.experts.92.w2", "model.layers.55.block_sparse_moe.experts.93.w2", "model.layers.55.block_sparse_moe.experts.94.w2", "model.layers.55.block_sparse_moe.experts.95.w2", "model.layers.55.block_sparse_moe.experts.96.w2", "model.layers.55.block_sparse_moe.experts.97.w2", "model.layers.55.block_sparse_moe.experts.98.w2", "model.layers.55.block_sparse_moe.experts.99.w2", "model.layers.55.block_sparse_moe.experts.100.w2", "model.layers.55.block_sparse_moe.experts.101.w2", "model.layers.55.block_sparse_moe.experts.102.w2", "model.layers.55.block_sparse_moe.experts.103.w2", "model.layers.55.block_sparse_moe.experts.104.w2", "model.layers.55.block_sparse_moe.experts.105.w2", "model.layers.55.block_sparse_moe.experts.106.w2", "model.layers.55.block_sparse_moe.experts.107.w2", "model.layers.55.block_sparse_moe.experts.108.w2", "model.layers.55.block_sparse_moe.experts.109.w2", "model.layers.55.block_sparse_moe.experts.110.w2", "model.layers.55.block_sparse_moe.experts.111.w2", "model.layers.55.block_sparse_moe.experts.112.w2", "model.layers.55.block_sparse_moe.experts.113.w2", "model.layers.55.block_sparse_moe.experts.114.w2", "model.layers.55.block_sparse_moe.experts.115.w2", "model.layers.55.block_sparse_moe.experts.116.w2", "model.layers.55.block_sparse_moe.experts.117.w2", "model.layers.55.block_sparse_moe.experts.118.w2", "model.layers.55.block_sparse_moe.experts.119.w2", "model.layers.55.block_sparse_moe.experts.120.w2", "model.layers.55.block_sparse_moe.experts.121.w2", "model.layers.55.block_sparse_moe.experts.122.w2", "model.layers.55.block_sparse_moe.experts.123.w2", "model.layers.55.block_sparse_moe.experts.124.w2", "model.layers.55.block_sparse_moe.experts.125.w2", "model.layers.55.block_sparse_moe.experts.126.w2", "model.layers.55.block_sparse_moe.experts.127.w2", "model.layers.55.block_sparse_moe.experts.128.w2", "model.layers.55.block_sparse_moe.experts.129.w2", "model.layers.55.block_sparse_moe.experts.130.w2", "model.layers.55.block_sparse_moe.experts.131.w2", "model.layers.55.block_sparse_moe.experts.132.w2", "model.layers.55.block_sparse_moe.experts.133.w2", "model.layers.55.block_sparse_moe.experts.134.w2", "model.layers.55.block_sparse_moe.experts.135.w2", "model.layers.55.block_sparse_moe.experts.136.w2", "model.layers.55.block_sparse_moe.experts.137.w2", "model.layers.55.block_sparse_moe.experts.138.w2", "model.layers.55.block_sparse_moe.experts.139.w2", "model.layers.55.block_sparse_moe.experts.140.w2", "model.layers.55.block_sparse_moe.experts.141.w2", "model.layers.55.block_sparse_moe.experts.142.w2", "model.layers.55.block_sparse_moe.experts.143.w2", "model.layers.55.block_sparse_moe.experts.144.w2", "model.layers.55.block_sparse_moe.experts.145.w2", "model.layers.55.block_sparse_moe.experts.146.w2", "model.layers.55.block_sparse_moe.experts.147.w2", "model.layers.55.block_sparse_moe.experts.148.w2", "model.layers.55.block_sparse_moe.experts.149.w2", "model.layers.55.block_sparse_moe.experts.150.w2", "model.layers.55.block_sparse_moe.experts.151.w2", "model.layers.55.block_sparse_moe.experts.152.w2", "model.layers.55.block_sparse_moe.experts.153.w2", "model.layers.55.block_sparse_moe.experts.154.w2", "model.layers.55.block_sparse_moe.experts.155.w2", "model.layers.55.block_sparse_moe.experts.156.w2", "model.layers.55.block_sparse_moe.experts.157.w2", "model.layers.55.block_sparse_moe.experts.158.w2", "model.layers.55.block_sparse_moe.experts.159.w2", "model.layers.55.block_sparse_moe.experts.160.w2", "model.layers.55.block_sparse_moe.experts.161.w2", "model.layers.55.block_sparse_moe.experts.162.w2", "model.layers.55.block_sparse_moe.experts.163.w2", "model.layers.55.block_sparse_moe.experts.164.w2", "model.layers.55.block_sparse_moe.experts.165.w2", "model.layers.55.block_sparse_moe.experts.166.w2", "model.layers.55.block_sparse_moe.experts.167.w2", "model.layers.55.block_sparse_moe.experts.168.w2", "model.layers.55.block_sparse_moe.experts.169.w2", "model.layers.55.block_sparse_moe.experts.170.w2", "model.layers.55.block_sparse_moe.experts.171.w2", "model.layers.55.block_sparse_moe.experts.172.w2", "model.layers.55.block_sparse_moe.experts.173.w2", "model.layers.55.block_sparse_moe.experts.174.w2", "model.layers.55.block_sparse_moe.experts.175.w2", "model.layers.55.block_sparse_moe.experts.176.w2", "model.layers.55.block_sparse_moe.experts.177.w2", "model.layers.55.block_sparse_moe.experts.178.w2", "model.layers.55.block_sparse_moe.experts.179.w2", "model.layers.55.block_sparse_moe.experts.180.w2", "model.layers.55.block_sparse_moe.experts.181.w2", "model.layers.55.block_sparse_moe.experts.182.w2", "model.layers.55.block_sparse_moe.experts.183.w2", "model.layers.55.block_sparse_moe.experts.184.w2", "model.layers.55.block_sparse_moe.experts.185.w2", "model.layers.55.block_sparse_moe.experts.186.w2", "model.layers.55.block_sparse_moe.experts.187.w2", "model.layers.55.block_sparse_moe.experts.188.w2", "model.layers.55.block_sparse_moe.experts.189.w2", "model.layers.55.block_sparse_moe.experts.190.w2", "model.layers.55.block_sparse_moe.experts.191.w2", "model.layers.55.block_sparse_moe.experts.192.w2", "model.layers.55.block_sparse_moe.experts.193.w2", "model.layers.55.block_sparse_moe.experts.194.w2", "model.layers.55.block_sparse_moe.experts.195.w2", "model.layers.55.block_sparse_moe.experts.196.w2", "model.layers.55.block_sparse_moe.experts.197.w2", "model.layers.55.block_sparse_moe.experts.198.w2", "model.layers.55.block_sparse_moe.experts.199.w2", "model.layers.55.block_sparse_moe.experts.200.w2", "model.layers.55.block_sparse_moe.experts.201.w2", "model.layers.55.block_sparse_moe.experts.202.w2", "model.layers.55.block_sparse_moe.experts.203.w2", "model.layers.55.block_sparse_moe.experts.204.w2", "model.layers.55.block_sparse_moe.experts.205.w2", "model.layers.55.block_sparse_moe.experts.206.w2", "model.layers.55.block_sparse_moe.experts.207.w2", "model.layers.55.block_sparse_moe.experts.208.w2", "model.layers.55.block_sparse_moe.experts.209.w2", "model.layers.55.block_sparse_moe.experts.210.w2", "model.layers.55.block_sparse_moe.experts.211.w2", "model.layers.55.block_sparse_moe.experts.212.w2", "model.layers.55.block_sparse_moe.experts.213.w2", "model.layers.55.block_sparse_moe.experts.214.w2", "model.layers.55.block_sparse_moe.experts.215.w2", "model.layers.55.block_sparse_moe.experts.216.w2", "model.layers.55.block_sparse_moe.experts.217.w2", "model.layers.55.block_sparse_moe.experts.218.w2", "model.layers.55.block_sparse_moe.experts.219.w2", "model.layers.55.block_sparse_moe.experts.220.w2", "model.layers.55.block_sparse_moe.experts.221.w2", "model.layers.55.block_sparse_moe.experts.222.w2", "model.layers.55.block_sparse_moe.experts.223.w2", "model.layers.55.block_sparse_moe.experts.224.w2", "model.layers.55.block_sparse_moe.experts.225.w2", "model.layers.55.block_sparse_moe.experts.226.w2", "model.layers.55.block_sparse_moe.experts.227.w2", "model.layers.55.block_sparse_moe.experts.228.w2", "model.layers.55.block_sparse_moe.experts.229.w2", "model.layers.55.block_sparse_moe.experts.230.w2", "model.layers.55.block_sparse_moe.experts.231.w2", "model.layers.55.block_sparse_moe.experts.232.w2", "model.layers.55.block_sparse_moe.experts.233.w2", "model.layers.55.block_sparse_moe.experts.234.w2", "model.layers.55.block_sparse_moe.experts.235.w2", "model.layers.55.block_sparse_moe.experts.236.w2", "model.layers.55.block_sparse_moe.experts.237.w2", "model.layers.55.block_sparse_moe.experts.238.w2", "model.layers.55.block_sparse_moe.experts.239.w2", "model.layers.55.block_sparse_moe.experts.240.w2", "model.layers.55.block_sparse_moe.experts.241.w2", "model.layers.55.block_sparse_moe.experts.242.w2", "model.layers.55.block_sparse_moe.experts.243.w2", "model.layers.55.block_sparse_moe.experts.244.w2", "model.layers.55.block_sparse_moe.experts.245.w2", "model.layers.55.block_sparse_moe.experts.246.w2", "model.layers.55.block_sparse_moe.experts.247.w2", "model.layers.55.block_sparse_moe.experts.248.w2", "model.layers.55.block_sparse_moe.experts.249.w2", "model.layers.55.block_sparse_moe.experts.250.w2", "model.layers.55.block_sparse_moe.experts.251.w2", "model.layers.55.block_sparse_moe.experts.252.w2", "model.layers.55.block_sparse_moe.experts.253.w2", "model.layers.55.block_sparse_moe.experts.254.w2", "model.layers.55.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -3.6801397800434454e-05, "dbits": 3623878656 } ] }, { "idx": 112, "layers": [ "model.layers.56.self_attn.q_proj", "model.layers.56.self_attn.k_proj", "model.layers.56.self_attn.v_proj", "model.layers.56.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001961943134665467, "dbits": 44040192 } ] }, { "idx": 113, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w1", "model.layers.56.block_sparse_moe.experts.1.w1", "model.layers.56.block_sparse_moe.experts.2.w1", "model.layers.56.block_sparse_moe.experts.3.w1", "model.layers.56.block_sparse_moe.experts.4.w1", "model.layers.56.block_sparse_moe.experts.5.w1", "model.layers.56.block_sparse_moe.experts.6.w1", "model.layers.56.block_sparse_moe.experts.7.w1", "model.layers.56.block_sparse_moe.experts.8.w1", "model.layers.56.block_sparse_moe.experts.9.w1", "model.layers.56.block_sparse_moe.experts.10.w1", "model.layers.56.block_sparse_moe.experts.11.w1", "model.layers.56.block_sparse_moe.experts.12.w1", "model.layers.56.block_sparse_moe.experts.13.w1", "model.layers.56.block_sparse_moe.experts.14.w1", "model.layers.56.block_sparse_moe.experts.15.w1", "model.layers.56.block_sparse_moe.experts.16.w1", "model.layers.56.block_sparse_moe.experts.17.w1", "model.layers.56.block_sparse_moe.experts.18.w1", "model.layers.56.block_sparse_moe.experts.19.w1", "model.layers.56.block_sparse_moe.experts.20.w1", "model.layers.56.block_sparse_moe.experts.21.w1", "model.layers.56.block_sparse_moe.experts.22.w1", "model.layers.56.block_sparse_moe.experts.23.w1", "model.layers.56.block_sparse_moe.experts.24.w1", "model.layers.56.block_sparse_moe.experts.25.w1", "model.layers.56.block_sparse_moe.experts.26.w1", "model.layers.56.block_sparse_moe.experts.27.w1", "model.layers.56.block_sparse_moe.experts.28.w1", "model.layers.56.block_sparse_moe.experts.29.w1", "model.layers.56.block_sparse_moe.experts.30.w1", "model.layers.56.block_sparse_moe.experts.31.w1", "model.layers.56.block_sparse_moe.experts.32.w1", "model.layers.56.block_sparse_moe.experts.33.w1", "model.layers.56.block_sparse_moe.experts.34.w1", "model.layers.56.block_sparse_moe.experts.35.w1", "model.layers.56.block_sparse_moe.experts.36.w1", "model.layers.56.block_sparse_moe.experts.37.w1", "model.layers.56.block_sparse_moe.experts.38.w1", "model.layers.56.block_sparse_moe.experts.39.w1", "model.layers.56.block_sparse_moe.experts.40.w1", "model.layers.56.block_sparse_moe.experts.41.w1", "model.layers.56.block_sparse_moe.experts.42.w1", "model.layers.56.block_sparse_moe.experts.43.w1", "model.layers.56.block_sparse_moe.experts.44.w1", "model.layers.56.block_sparse_moe.experts.45.w1", "model.layers.56.block_sparse_moe.experts.46.w1", "model.layers.56.block_sparse_moe.experts.47.w1", "model.layers.56.block_sparse_moe.experts.48.w1", "model.layers.56.block_sparse_moe.experts.49.w1", "model.layers.56.block_sparse_moe.experts.50.w1", "model.layers.56.block_sparse_moe.experts.51.w1", "model.layers.56.block_sparse_moe.experts.52.w1", "model.layers.56.block_sparse_moe.experts.53.w1", "model.layers.56.block_sparse_moe.experts.54.w1", "model.layers.56.block_sparse_moe.experts.55.w1", "model.layers.56.block_sparse_moe.experts.56.w1", "model.layers.56.block_sparse_moe.experts.57.w1", "model.layers.56.block_sparse_moe.experts.58.w1", "model.layers.56.block_sparse_moe.experts.59.w1", "model.layers.56.block_sparse_moe.experts.60.w1", "model.layers.56.block_sparse_moe.experts.61.w1", "model.layers.56.block_sparse_moe.experts.62.w1", "model.layers.56.block_sparse_moe.experts.63.w1", "model.layers.56.block_sparse_moe.experts.64.w1", "model.layers.56.block_sparse_moe.experts.65.w1", "model.layers.56.block_sparse_moe.experts.66.w1", "model.layers.56.block_sparse_moe.experts.67.w1", "model.layers.56.block_sparse_moe.experts.68.w1", "model.layers.56.block_sparse_moe.experts.69.w1", "model.layers.56.block_sparse_moe.experts.70.w1", "model.layers.56.block_sparse_moe.experts.71.w1", "model.layers.56.block_sparse_moe.experts.72.w1", "model.layers.56.block_sparse_moe.experts.73.w1", "model.layers.56.block_sparse_moe.experts.74.w1", "model.layers.56.block_sparse_moe.experts.75.w1", "model.layers.56.block_sparse_moe.experts.76.w1", "model.layers.56.block_sparse_moe.experts.77.w1", "model.layers.56.block_sparse_moe.experts.78.w1", "model.layers.56.block_sparse_moe.experts.79.w1", "model.layers.56.block_sparse_moe.experts.80.w1", "model.layers.56.block_sparse_moe.experts.81.w1", "model.layers.56.block_sparse_moe.experts.82.w1", "model.layers.56.block_sparse_moe.experts.83.w1", "model.layers.56.block_sparse_moe.experts.84.w1", "model.layers.56.block_sparse_moe.experts.85.w1", "model.layers.56.block_sparse_moe.experts.86.w1", "model.layers.56.block_sparse_moe.experts.87.w1", "model.layers.56.block_sparse_moe.experts.88.w1", "model.layers.56.block_sparse_moe.experts.89.w1", "model.layers.56.block_sparse_moe.experts.90.w1", "model.layers.56.block_sparse_moe.experts.91.w1", "model.layers.56.block_sparse_moe.experts.92.w1", "model.layers.56.block_sparse_moe.experts.93.w1", "model.layers.56.block_sparse_moe.experts.94.w1", "model.layers.56.block_sparse_moe.experts.95.w1", "model.layers.56.block_sparse_moe.experts.96.w1", "model.layers.56.block_sparse_moe.experts.97.w1", "model.layers.56.block_sparse_moe.experts.98.w1", "model.layers.56.block_sparse_moe.experts.99.w1", "model.layers.56.block_sparse_moe.experts.100.w1", "model.layers.56.block_sparse_moe.experts.101.w1", "model.layers.56.block_sparse_moe.experts.102.w1", "model.layers.56.block_sparse_moe.experts.103.w1", "model.layers.56.block_sparse_moe.experts.104.w1", "model.layers.56.block_sparse_moe.experts.105.w1", "model.layers.56.block_sparse_moe.experts.106.w1", "model.layers.56.block_sparse_moe.experts.107.w1", "model.layers.56.block_sparse_moe.experts.108.w1", "model.layers.56.block_sparse_moe.experts.109.w1", "model.layers.56.block_sparse_moe.experts.110.w1", "model.layers.56.block_sparse_moe.experts.111.w1", "model.layers.56.block_sparse_moe.experts.112.w1", "model.layers.56.block_sparse_moe.experts.113.w1", "model.layers.56.block_sparse_moe.experts.114.w1", "model.layers.56.block_sparse_moe.experts.115.w1", "model.layers.56.block_sparse_moe.experts.116.w1", "model.layers.56.block_sparse_moe.experts.117.w1", "model.layers.56.block_sparse_moe.experts.118.w1", "model.layers.56.block_sparse_moe.experts.119.w1", "model.layers.56.block_sparse_moe.experts.120.w1", "model.layers.56.block_sparse_moe.experts.121.w1", "model.layers.56.block_sparse_moe.experts.122.w1", "model.layers.56.block_sparse_moe.experts.123.w1", "model.layers.56.block_sparse_moe.experts.124.w1", "model.layers.56.block_sparse_moe.experts.125.w1", "model.layers.56.block_sparse_moe.experts.126.w1", "model.layers.56.block_sparse_moe.experts.127.w1", "model.layers.56.block_sparse_moe.experts.128.w1", "model.layers.56.block_sparse_moe.experts.129.w1", "model.layers.56.block_sparse_moe.experts.130.w1", "model.layers.56.block_sparse_moe.experts.131.w1", "model.layers.56.block_sparse_moe.experts.132.w1", "model.layers.56.block_sparse_moe.experts.133.w1", "model.layers.56.block_sparse_moe.experts.134.w1", "model.layers.56.block_sparse_moe.experts.135.w1", "model.layers.56.block_sparse_moe.experts.136.w1", "model.layers.56.block_sparse_moe.experts.137.w1", "model.layers.56.block_sparse_moe.experts.138.w1", "model.layers.56.block_sparse_moe.experts.139.w1", "model.layers.56.block_sparse_moe.experts.140.w1", "model.layers.56.block_sparse_moe.experts.141.w1", "model.layers.56.block_sparse_moe.experts.142.w1", "model.layers.56.block_sparse_moe.experts.143.w1", "model.layers.56.block_sparse_moe.experts.144.w1", "model.layers.56.block_sparse_moe.experts.145.w1", "model.layers.56.block_sparse_moe.experts.146.w1", "model.layers.56.block_sparse_moe.experts.147.w1", "model.layers.56.block_sparse_moe.experts.148.w1", "model.layers.56.block_sparse_moe.experts.149.w1", "model.layers.56.block_sparse_moe.experts.150.w1", "model.layers.56.block_sparse_moe.experts.151.w1", "model.layers.56.block_sparse_moe.experts.152.w1", "model.layers.56.block_sparse_moe.experts.153.w1", "model.layers.56.block_sparse_moe.experts.154.w1", "model.layers.56.block_sparse_moe.experts.155.w1", "model.layers.56.block_sparse_moe.experts.156.w1", "model.layers.56.block_sparse_moe.experts.157.w1", "model.layers.56.block_sparse_moe.experts.158.w1", "model.layers.56.block_sparse_moe.experts.159.w1", "model.layers.56.block_sparse_moe.experts.160.w1", "model.layers.56.block_sparse_moe.experts.161.w1", "model.layers.56.block_sparse_moe.experts.162.w1", "model.layers.56.block_sparse_moe.experts.163.w1", "model.layers.56.block_sparse_moe.experts.164.w1", "model.layers.56.block_sparse_moe.experts.165.w1", "model.layers.56.block_sparse_moe.experts.166.w1", "model.layers.56.block_sparse_moe.experts.167.w1", "model.layers.56.block_sparse_moe.experts.168.w1", "model.layers.56.block_sparse_moe.experts.169.w1", "model.layers.56.block_sparse_moe.experts.170.w1", "model.layers.56.block_sparse_moe.experts.171.w1", "model.layers.56.block_sparse_moe.experts.172.w1", "model.layers.56.block_sparse_moe.experts.173.w1", "model.layers.56.block_sparse_moe.experts.174.w1", "model.layers.56.block_sparse_moe.experts.175.w1", "model.layers.56.block_sparse_moe.experts.176.w1", "model.layers.56.block_sparse_moe.experts.177.w1", "model.layers.56.block_sparse_moe.experts.178.w1", "model.layers.56.block_sparse_moe.experts.179.w1", "model.layers.56.block_sparse_moe.experts.180.w1", "model.layers.56.block_sparse_moe.experts.181.w1", "model.layers.56.block_sparse_moe.experts.182.w1", "model.layers.56.block_sparse_moe.experts.183.w1", "model.layers.56.block_sparse_moe.experts.184.w1", "model.layers.56.block_sparse_moe.experts.185.w1", "model.layers.56.block_sparse_moe.experts.186.w1", "model.layers.56.block_sparse_moe.experts.187.w1", "model.layers.56.block_sparse_moe.experts.188.w1", "model.layers.56.block_sparse_moe.experts.189.w1", "model.layers.56.block_sparse_moe.experts.190.w1", "model.layers.56.block_sparse_moe.experts.191.w1", "model.layers.56.block_sparse_moe.experts.192.w1", "model.layers.56.block_sparse_moe.experts.193.w1", "model.layers.56.block_sparse_moe.experts.194.w1", "model.layers.56.block_sparse_moe.experts.195.w1", "model.layers.56.block_sparse_moe.experts.196.w1", "model.layers.56.block_sparse_moe.experts.197.w1", "model.layers.56.block_sparse_moe.experts.198.w1", "model.layers.56.block_sparse_moe.experts.199.w1", "model.layers.56.block_sparse_moe.experts.200.w1", "model.layers.56.block_sparse_moe.experts.201.w1", "model.layers.56.block_sparse_moe.experts.202.w1", "model.layers.56.block_sparse_moe.experts.203.w1", "model.layers.56.block_sparse_moe.experts.204.w1", "model.layers.56.block_sparse_moe.experts.205.w1", "model.layers.56.block_sparse_moe.experts.206.w1", "model.layers.56.block_sparse_moe.experts.207.w1", "model.layers.56.block_sparse_moe.experts.208.w1", "model.layers.56.block_sparse_moe.experts.209.w1", "model.layers.56.block_sparse_moe.experts.210.w1", "model.layers.56.block_sparse_moe.experts.211.w1", "model.layers.56.block_sparse_moe.experts.212.w1", "model.layers.56.block_sparse_moe.experts.213.w1", "model.layers.56.block_sparse_moe.experts.214.w1", "model.layers.56.block_sparse_moe.experts.215.w1", "model.layers.56.block_sparse_moe.experts.216.w1", "model.layers.56.block_sparse_moe.experts.217.w1", "model.layers.56.block_sparse_moe.experts.218.w1", "model.layers.56.block_sparse_moe.experts.219.w1", "model.layers.56.block_sparse_moe.experts.220.w1", "model.layers.56.block_sparse_moe.experts.221.w1", "model.layers.56.block_sparse_moe.experts.222.w1", "model.layers.56.block_sparse_moe.experts.223.w1", "model.layers.56.block_sparse_moe.experts.224.w1", "model.layers.56.block_sparse_moe.experts.225.w1", "model.layers.56.block_sparse_moe.experts.226.w1", "model.layers.56.block_sparse_moe.experts.227.w1", "model.layers.56.block_sparse_moe.experts.228.w1", "model.layers.56.block_sparse_moe.experts.229.w1", "model.layers.56.block_sparse_moe.experts.230.w1", "model.layers.56.block_sparse_moe.experts.231.w1", "model.layers.56.block_sparse_moe.experts.232.w1", "model.layers.56.block_sparse_moe.experts.233.w1", "model.layers.56.block_sparse_moe.experts.234.w1", "model.layers.56.block_sparse_moe.experts.235.w1", "model.layers.56.block_sparse_moe.experts.236.w1", "model.layers.56.block_sparse_moe.experts.237.w1", "model.layers.56.block_sparse_moe.experts.238.w1", "model.layers.56.block_sparse_moe.experts.239.w1", "model.layers.56.block_sparse_moe.experts.240.w1", "model.layers.56.block_sparse_moe.experts.241.w1", "model.layers.56.block_sparse_moe.experts.242.w1", "model.layers.56.block_sparse_moe.experts.243.w1", "model.layers.56.block_sparse_moe.experts.244.w1", "model.layers.56.block_sparse_moe.experts.245.w1", "model.layers.56.block_sparse_moe.experts.246.w1", "model.layers.56.block_sparse_moe.experts.247.w1", "model.layers.56.block_sparse_moe.experts.248.w1", "model.layers.56.block_sparse_moe.experts.249.w1", "model.layers.56.block_sparse_moe.experts.250.w1", "model.layers.56.block_sparse_moe.experts.251.w1", "model.layers.56.block_sparse_moe.experts.252.w1", "model.layers.56.block_sparse_moe.experts.253.w1", "model.layers.56.block_sparse_moe.experts.254.w1", "model.layers.56.block_sparse_moe.experts.255.w1", "model.layers.56.block_sparse_moe.experts.0.w3", "model.layers.56.block_sparse_moe.experts.1.w3", "model.layers.56.block_sparse_moe.experts.2.w3", "model.layers.56.block_sparse_moe.experts.3.w3", "model.layers.56.block_sparse_moe.experts.4.w3", "model.layers.56.block_sparse_moe.experts.5.w3", "model.layers.56.block_sparse_moe.experts.6.w3", "model.layers.56.block_sparse_moe.experts.7.w3", "model.layers.56.block_sparse_moe.experts.8.w3", "model.layers.56.block_sparse_moe.experts.9.w3", "model.layers.56.block_sparse_moe.experts.10.w3", "model.layers.56.block_sparse_moe.experts.11.w3", "model.layers.56.block_sparse_moe.experts.12.w3", "model.layers.56.block_sparse_moe.experts.13.w3", "model.layers.56.block_sparse_moe.experts.14.w3", "model.layers.56.block_sparse_moe.experts.15.w3", "model.layers.56.block_sparse_moe.experts.16.w3", "model.layers.56.block_sparse_moe.experts.17.w3", "model.layers.56.block_sparse_moe.experts.18.w3", "model.layers.56.block_sparse_moe.experts.19.w3", "model.layers.56.block_sparse_moe.experts.20.w3", "model.layers.56.block_sparse_moe.experts.21.w3", "model.layers.56.block_sparse_moe.experts.22.w3", "model.layers.56.block_sparse_moe.experts.23.w3", "model.layers.56.block_sparse_moe.experts.24.w3", "model.layers.56.block_sparse_moe.experts.25.w3", "model.layers.56.block_sparse_moe.experts.26.w3", "model.layers.56.block_sparse_moe.experts.27.w3", "model.layers.56.block_sparse_moe.experts.28.w3", "model.layers.56.block_sparse_moe.experts.29.w3", "model.layers.56.block_sparse_moe.experts.30.w3", "model.layers.56.block_sparse_moe.experts.31.w3", "model.layers.56.block_sparse_moe.experts.32.w3", "model.layers.56.block_sparse_moe.experts.33.w3", "model.layers.56.block_sparse_moe.experts.34.w3", "model.layers.56.block_sparse_moe.experts.35.w3", "model.layers.56.block_sparse_moe.experts.36.w3", "model.layers.56.block_sparse_moe.experts.37.w3", "model.layers.56.block_sparse_moe.experts.38.w3", "model.layers.56.block_sparse_moe.experts.39.w3", "model.layers.56.block_sparse_moe.experts.40.w3", "model.layers.56.block_sparse_moe.experts.41.w3", "model.layers.56.block_sparse_moe.experts.42.w3", "model.layers.56.block_sparse_moe.experts.43.w3", "model.layers.56.block_sparse_moe.experts.44.w3", "model.layers.56.block_sparse_moe.experts.45.w3", "model.layers.56.block_sparse_moe.experts.46.w3", "model.layers.56.block_sparse_moe.experts.47.w3", "model.layers.56.block_sparse_moe.experts.48.w3", "model.layers.56.block_sparse_moe.experts.49.w3", "model.layers.56.block_sparse_moe.experts.50.w3", "model.layers.56.block_sparse_moe.experts.51.w3", "model.layers.56.block_sparse_moe.experts.52.w3", "model.layers.56.block_sparse_moe.experts.53.w3", "model.layers.56.block_sparse_moe.experts.54.w3", "model.layers.56.block_sparse_moe.experts.55.w3", "model.layers.56.block_sparse_moe.experts.56.w3", "model.layers.56.block_sparse_moe.experts.57.w3", "model.layers.56.block_sparse_moe.experts.58.w3", "model.layers.56.block_sparse_moe.experts.59.w3", "model.layers.56.block_sparse_moe.experts.60.w3", "model.layers.56.block_sparse_moe.experts.61.w3", "model.layers.56.block_sparse_moe.experts.62.w3", "model.layers.56.block_sparse_moe.experts.63.w3", "model.layers.56.block_sparse_moe.experts.64.w3", "model.layers.56.block_sparse_moe.experts.65.w3", "model.layers.56.block_sparse_moe.experts.66.w3", "model.layers.56.block_sparse_moe.experts.67.w3", "model.layers.56.block_sparse_moe.experts.68.w3", "model.layers.56.block_sparse_moe.experts.69.w3", "model.layers.56.block_sparse_moe.experts.70.w3", "model.layers.56.block_sparse_moe.experts.71.w3", "model.layers.56.block_sparse_moe.experts.72.w3", "model.layers.56.block_sparse_moe.experts.73.w3", "model.layers.56.block_sparse_moe.experts.74.w3", "model.layers.56.block_sparse_moe.experts.75.w3", "model.layers.56.block_sparse_moe.experts.76.w3", "model.layers.56.block_sparse_moe.experts.77.w3", "model.layers.56.block_sparse_moe.experts.78.w3", "model.layers.56.block_sparse_moe.experts.79.w3", "model.layers.56.block_sparse_moe.experts.80.w3", "model.layers.56.block_sparse_moe.experts.81.w3", "model.layers.56.block_sparse_moe.experts.82.w3", "model.layers.56.block_sparse_moe.experts.83.w3", "model.layers.56.block_sparse_moe.experts.84.w3", "model.layers.56.block_sparse_moe.experts.85.w3", "model.layers.56.block_sparse_moe.experts.86.w3", "model.layers.56.block_sparse_moe.experts.87.w3", "model.layers.56.block_sparse_moe.experts.88.w3", "model.layers.56.block_sparse_moe.experts.89.w3", "model.layers.56.block_sparse_moe.experts.90.w3", "model.layers.56.block_sparse_moe.experts.91.w3", "model.layers.56.block_sparse_moe.experts.92.w3", "model.layers.56.block_sparse_moe.experts.93.w3", "model.layers.56.block_sparse_moe.experts.94.w3", "model.layers.56.block_sparse_moe.experts.95.w3", "model.layers.56.block_sparse_moe.experts.96.w3", "model.layers.56.block_sparse_moe.experts.97.w3", "model.layers.56.block_sparse_moe.experts.98.w3", "model.layers.56.block_sparse_moe.experts.99.w3", "model.layers.56.block_sparse_moe.experts.100.w3", "model.layers.56.block_sparse_moe.experts.101.w3", "model.layers.56.block_sparse_moe.experts.102.w3", "model.layers.56.block_sparse_moe.experts.103.w3", "model.layers.56.block_sparse_moe.experts.104.w3", "model.layers.56.block_sparse_moe.experts.105.w3", "model.layers.56.block_sparse_moe.experts.106.w3", "model.layers.56.block_sparse_moe.experts.107.w3", "model.layers.56.block_sparse_moe.experts.108.w3", "model.layers.56.block_sparse_moe.experts.109.w3", "model.layers.56.block_sparse_moe.experts.110.w3", "model.layers.56.block_sparse_moe.experts.111.w3", "model.layers.56.block_sparse_moe.experts.112.w3", "model.layers.56.block_sparse_moe.experts.113.w3", "model.layers.56.block_sparse_moe.experts.114.w3", "model.layers.56.block_sparse_moe.experts.115.w3", "model.layers.56.block_sparse_moe.experts.116.w3", "model.layers.56.block_sparse_moe.experts.117.w3", "model.layers.56.block_sparse_moe.experts.118.w3", "model.layers.56.block_sparse_moe.experts.119.w3", "model.layers.56.block_sparse_moe.experts.120.w3", "model.layers.56.block_sparse_moe.experts.121.w3", "model.layers.56.block_sparse_moe.experts.122.w3", "model.layers.56.block_sparse_moe.experts.123.w3", "model.layers.56.block_sparse_moe.experts.124.w3", "model.layers.56.block_sparse_moe.experts.125.w3", "model.layers.56.block_sparse_moe.experts.126.w3", "model.layers.56.block_sparse_moe.experts.127.w3", "model.layers.56.block_sparse_moe.experts.128.w3", "model.layers.56.block_sparse_moe.experts.129.w3", "model.layers.56.block_sparse_moe.experts.130.w3", "model.layers.56.block_sparse_moe.experts.131.w3", "model.layers.56.block_sparse_moe.experts.132.w3", "model.layers.56.block_sparse_moe.experts.133.w3", "model.layers.56.block_sparse_moe.experts.134.w3", "model.layers.56.block_sparse_moe.experts.135.w3", "model.layers.56.block_sparse_moe.experts.136.w3", "model.layers.56.block_sparse_moe.experts.137.w3", "model.layers.56.block_sparse_moe.experts.138.w3", "model.layers.56.block_sparse_moe.experts.139.w3", "model.layers.56.block_sparse_moe.experts.140.w3", "model.layers.56.block_sparse_moe.experts.141.w3", "model.layers.56.block_sparse_moe.experts.142.w3", "model.layers.56.block_sparse_moe.experts.143.w3", "model.layers.56.block_sparse_moe.experts.144.w3", "model.layers.56.block_sparse_moe.experts.145.w3", "model.layers.56.block_sparse_moe.experts.146.w3", "model.layers.56.block_sparse_moe.experts.147.w3", "model.layers.56.block_sparse_moe.experts.148.w3", "model.layers.56.block_sparse_moe.experts.149.w3", "model.layers.56.block_sparse_moe.experts.150.w3", "model.layers.56.block_sparse_moe.experts.151.w3", "model.layers.56.block_sparse_moe.experts.152.w3", "model.layers.56.block_sparse_moe.experts.153.w3", "model.layers.56.block_sparse_moe.experts.154.w3", "model.layers.56.block_sparse_moe.experts.155.w3", "model.layers.56.block_sparse_moe.experts.156.w3", "model.layers.56.block_sparse_moe.experts.157.w3", "model.layers.56.block_sparse_moe.experts.158.w3", "model.layers.56.block_sparse_moe.experts.159.w3", "model.layers.56.block_sparse_moe.experts.160.w3", "model.layers.56.block_sparse_moe.experts.161.w3", "model.layers.56.block_sparse_moe.experts.162.w3", "model.layers.56.block_sparse_moe.experts.163.w3", "model.layers.56.block_sparse_moe.experts.164.w3", "model.layers.56.block_sparse_moe.experts.165.w3", "model.layers.56.block_sparse_moe.experts.166.w3", "model.layers.56.block_sparse_moe.experts.167.w3", "model.layers.56.block_sparse_moe.experts.168.w3", "model.layers.56.block_sparse_moe.experts.169.w3", "model.layers.56.block_sparse_moe.experts.170.w3", "model.layers.56.block_sparse_moe.experts.171.w3", "model.layers.56.block_sparse_moe.experts.172.w3", "model.layers.56.block_sparse_moe.experts.173.w3", "model.layers.56.block_sparse_moe.experts.174.w3", "model.layers.56.block_sparse_moe.experts.175.w3", "model.layers.56.block_sparse_moe.experts.176.w3", "model.layers.56.block_sparse_moe.experts.177.w3", "model.layers.56.block_sparse_moe.experts.178.w3", "model.layers.56.block_sparse_moe.experts.179.w3", "model.layers.56.block_sparse_moe.experts.180.w3", "model.layers.56.block_sparse_moe.experts.181.w3", "model.layers.56.block_sparse_moe.experts.182.w3", "model.layers.56.block_sparse_moe.experts.183.w3", "model.layers.56.block_sparse_moe.experts.184.w3", "model.layers.56.block_sparse_moe.experts.185.w3", "model.layers.56.block_sparse_moe.experts.186.w3", "model.layers.56.block_sparse_moe.experts.187.w3", "model.layers.56.block_sparse_moe.experts.188.w3", "model.layers.56.block_sparse_moe.experts.189.w3", "model.layers.56.block_sparse_moe.experts.190.w3", "model.layers.56.block_sparse_moe.experts.191.w3", "model.layers.56.block_sparse_moe.experts.192.w3", "model.layers.56.block_sparse_moe.experts.193.w3", "model.layers.56.block_sparse_moe.experts.194.w3", "model.layers.56.block_sparse_moe.experts.195.w3", "model.layers.56.block_sparse_moe.experts.196.w3", "model.layers.56.block_sparse_moe.experts.197.w3", "model.layers.56.block_sparse_moe.experts.198.w3", "model.layers.56.block_sparse_moe.experts.199.w3", "model.layers.56.block_sparse_moe.experts.200.w3", "model.layers.56.block_sparse_moe.experts.201.w3", "model.layers.56.block_sparse_moe.experts.202.w3", "model.layers.56.block_sparse_moe.experts.203.w3", "model.layers.56.block_sparse_moe.experts.204.w3", "model.layers.56.block_sparse_moe.experts.205.w3", "model.layers.56.block_sparse_moe.experts.206.w3", "model.layers.56.block_sparse_moe.experts.207.w3", "model.layers.56.block_sparse_moe.experts.208.w3", "model.layers.56.block_sparse_moe.experts.209.w3", "model.layers.56.block_sparse_moe.experts.210.w3", "model.layers.56.block_sparse_moe.experts.211.w3", "model.layers.56.block_sparse_moe.experts.212.w3", "model.layers.56.block_sparse_moe.experts.213.w3", "model.layers.56.block_sparse_moe.experts.214.w3", "model.layers.56.block_sparse_moe.experts.215.w3", "model.layers.56.block_sparse_moe.experts.216.w3", "model.layers.56.block_sparse_moe.experts.217.w3", "model.layers.56.block_sparse_moe.experts.218.w3", "model.layers.56.block_sparse_moe.experts.219.w3", "model.layers.56.block_sparse_moe.experts.220.w3", "model.layers.56.block_sparse_moe.experts.221.w3", "model.layers.56.block_sparse_moe.experts.222.w3", "model.layers.56.block_sparse_moe.experts.223.w3", "model.layers.56.block_sparse_moe.experts.224.w3", "model.layers.56.block_sparse_moe.experts.225.w3", "model.layers.56.block_sparse_moe.experts.226.w3", "model.layers.56.block_sparse_moe.experts.227.w3", "model.layers.56.block_sparse_moe.experts.228.w3", "model.layers.56.block_sparse_moe.experts.229.w3", "model.layers.56.block_sparse_moe.experts.230.w3", "model.layers.56.block_sparse_moe.experts.231.w3", "model.layers.56.block_sparse_moe.experts.232.w3", "model.layers.56.block_sparse_moe.experts.233.w3", "model.layers.56.block_sparse_moe.experts.234.w3", "model.layers.56.block_sparse_moe.experts.235.w3", "model.layers.56.block_sparse_moe.experts.236.w3", "model.layers.56.block_sparse_moe.experts.237.w3", "model.layers.56.block_sparse_moe.experts.238.w3", "model.layers.56.block_sparse_moe.experts.239.w3", "model.layers.56.block_sparse_moe.experts.240.w3", "model.layers.56.block_sparse_moe.experts.241.w3", "model.layers.56.block_sparse_moe.experts.242.w3", "model.layers.56.block_sparse_moe.experts.243.w3", "model.layers.56.block_sparse_moe.experts.244.w3", "model.layers.56.block_sparse_moe.experts.245.w3", "model.layers.56.block_sparse_moe.experts.246.w3", "model.layers.56.block_sparse_moe.experts.247.w3", "model.layers.56.block_sparse_moe.experts.248.w3", "model.layers.56.block_sparse_moe.experts.249.w3", "model.layers.56.block_sparse_moe.experts.250.w3", "model.layers.56.block_sparse_moe.experts.251.w3", "model.layers.56.block_sparse_moe.experts.252.w3", "model.layers.56.block_sparse_moe.experts.253.w3", "model.layers.56.block_sparse_moe.experts.254.w3", "model.layers.56.block_sparse_moe.experts.255.w3", "model.layers.56.block_sparse_moe.experts.0.w2", "model.layers.56.block_sparse_moe.experts.1.w2", "model.layers.56.block_sparse_moe.experts.2.w2", "model.layers.56.block_sparse_moe.experts.3.w2", "model.layers.56.block_sparse_moe.experts.4.w2", "model.layers.56.block_sparse_moe.experts.5.w2", "model.layers.56.block_sparse_moe.experts.6.w2", "model.layers.56.block_sparse_moe.experts.7.w2", "model.layers.56.block_sparse_moe.experts.8.w2", "model.layers.56.block_sparse_moe.experts.9.w2", "model.layers.56.block_sparse_moe.experts.10.w2", "model.layers.56.block_sparse_moe.experts.11.w2", "model.layers.56.block_sparse_moe.experts.12.w2", "model.layers.56.block_sparse_moe.experts.13.w2", "model.layers.56.block_sparse_moe.experts.14.w2", "model.layers.56.block_sparse_moe.experts.15.w2", "model.layers.56.block_sparse_moe.experts.16.w2", "model.layers.56.block_sparse_moe.experts.17.w2", "model.layers.56.block_sparse_moe.experts.18.w2", "model.layers.56.block_sparse_moe.experts.19.w2", "model.layers.56.block_sparse_moe.experts.20.w2", "model.layers.56.block_sparse_moe.experts.21.w2", "model.layers.56.block_sparse_moe.experts.22.w2", "model.layers.56.block_sparse_moe.experts.23.w2", "model.layers.56.block_sparse_moe.experts.24.w2", "model.layers.56.block_sparse_moe.experts.25.w2", "model.layers.56.block_sparse_moe.experts.26.w2", "model.layers.56.block_sparse_moe.experts.27.w2", "model.layers.56.block_sparse_moe.experts.28.w2", "model.layers.56.block_sparse_moe.experts.29.w2", "model.layers.56.block_sparse_moe.experts.30.w2", "model.layers.56.block_sparse_moe.experts.31.w2", "model.layers.56.block_sparse_moe.experts.32.w2", "model.layers.56.block_sparse_moe.experts.33.w2", "model.layers.56.block_sparse_moe.experts.34.w2", "model.layers.56.block_sparse_moe.experts.35.w2", "model.layers.56.block_sparse_moe.experts.36.w2", "model.layers.56.block_sparse_moe.experts.37.w2", "model.layers.56.block_sparse_moe.experts.38.w2", "model.layers.56.block_sparse_moe.experts.39.w2", "model.layers.56.block_sparse_moe.experts.40.w2", "model.layers.56.block_sparse_moe.experts.41.w2", "model.layers.56.block_sparse_moe.experts.42.w2", "model.layers.56.block_sparse_moe.experts.43.w2", "model.layers.56.block_sparse_moe.experts.44.w2", "model.layers.56.block_sparse_moe.experts.45.w2", "model.layers.56.block_sparse_moe.experts.46.w2", "model.layers.56.block_sparse_moe.experts.47.w2", "model.layers.56.block_sparse_moe.experts.48.w2", "model.layers.56.block_sparse_moe.experts.49.w2", "model.layers.56.block_sparse_moe.experts.50.w2", "model.layers.56.block_sparse_moe.experts.51.w2", "model.layers.56.block_sparse_moe.experts.52.w2", "model.layers.56.block_sparse_moe.experts.53.w2", "model.layers.56.block_sparse_moe.experts.54.w2", "model.layers.56.block_sparse_moe.experts.55.w2", "model.layers.56.block_sparse_moe.experts.56.w2", "model.layers.56.block_sparse_moe.experts.57.w2", "model.layers.56.block_sparse_moe.experts.58.w2", "model.layers.56.block_sparse_moe.experts.59.w2", "model.layers.56.block_sparse_moe.experts.60.w2", "model.layers.56.block_sparse_moe.experts.61.w2", "model.layers.56.block_sparse_moe.experts.62.w2", "model.layers.56.block_sparse_moe.experts.63.w2", "model.layers.56.block_sparse_moe.experts.64.w2", "model.layers.56.block_sparse_moe.experts.65.w2", "model.layers.56.block_sparse_moe.experts.66.w2", "model.layers.56.block_sparse_moe.experts.67.w2", "model.layers.56.block_sparse_moe.experts.68.w2", "model.layers.56.block_sparse_moe.experts.69.w2", "model.layers.56.block_sparse_moe.experts.70.w2", "model.layers.56.block_sparse_moe.experts.71.w2", "model.layers.56.block_sparse_moe.experts.72.w2", "model.layers.56.block_sparse_moe.experts.73.w2", "model.layers.56.block_sparse_moe.experts.74.w2", "model.layers.56.block_sparse_moe.experts.75.w2", "model.layers.56.block_sparse_moe.experts.76.w2", "model.layers.56.block_sparse_moe.experts.77.w2", "model.layers.56.block_sparse_moe.experts.78.w2", "model.layers.56.block_sparse_moe.experts.79.w2", "model.layers.56.block_sparse_moe.experts.80.w2", "model.layers.56.block_sparse_moe.experts.81.w2", "model.layers.56.block_sparse_moe.experts.82.w2", "model.layers.56.block_sparse_moe.experts.83.w2", "model.layers.56.block_sparse_moe.experts.84.w2", "model.layers.56.block_sparse_moe.experts.85.w2", "model.layers.56.block_sparse_moe.experts.86.w2", "model.layers.56.block_sparse_moe.experts.87.w2", "model.layers.56.block_sparse_moe.experts.88.w2", "model.layers.56.block_sparse_moe.experts.89.w2", "model.layers.56.block_sparse_moe.experts.90.w2", "model.layers.56.block_sparse_moe.experts.91.w2", "model.layers.56.block_sparse_moe.experts.92.w2", "model.layers.56.block_sparse_moe.experts.93.w2", "model.layers.56.block_sparse_moe.experts.94.w2", "model.layers.56.block_sparse_moe.experts.95.w2", "model.layers.56.block_sparse_moe.experts.96.w2", "model.layers.56.block_sparse_moe.experts.97.w2", "model.layers.56.block_sparse_moe.experts.98.w2", "model.layers.56.block_sparse_moe.experts.99.w2", "model.layers.56.block_sparse_moe.experts.100.w2", "model.layers.56.block_sparse_moe.experts.101.w2", "model.layers.56.block_sparse_moe.experts.102.w2", "model.layers.56.block_sparse_moe.experts.103.w2", "model.layers.56.block_sparse_moe.experts.104.w2", "model.layers.56.block_sparse_moe.experts.105.w2", "model.layers.56.block_sparse_moe.experts.106.w2", "model.layers.56.block_sparse_moe.experts.107.w2", "model.layers.56.block_sparse_moe.experts.108.w2", "model.layers.56.block_sparse_moe.experts.109.w2", "model.layers.56.block_sparse_moe.experts.110.w2", "model.layers.56.block_sparse_moe.experts.111.w2", "model.layers.56.block_sparse_moe.experts.112.w2", "model.layers.56.block_sparse_moe.experts.113.w2", "model.layers.56.block_sparse_moe.experts.114.w2", "model.layers.56.block_sparse_moe.experts.115.w2", "model.layers.56.block_sparse_moe.experts.116.w2", "model.layers.56.block_sparse_moe.experts.117.w2", "model.layers.56.block_sparse_moe.experts.118.w2", "model.layers.56.block_sparse_moe.experts.119.w2", "model.layers.56.block_sparse_moe.experts.120.w2", "model.layers.56.block_sparse_moe.experts.121.w2", "model.layers.56.block_sparse_moe.experts.122.w2", "model.layers.56.block_sparse_moe.experts.123.w2", "model.layers.56.block_sparse_moe.experts.124.w2", "model.layers.56.block_sparse_moe.experts.125.w2", "model.layers.56.block_sparse_moe.experts.126.w2", "model.layers.56.block_sparse_moe.experts.127.w2", "model.layers.56.block_sparse_moe.experts.128.w2", "model.layers.56.block_sparse_moe.experts.129.w2", "model.layers.56.block_sparse_moe.experts.130.w2", "model.layers.56.block_sparse_moe.experts.131.w2", "model.layers.56.block_sparse_moe.experts.132.w2", "model.layers.56.block_sparse_moe.experts.133.w2", "model.layers.56.block_sparse_moe.experts.134.w2", "model.layers.56.block_sparse_moe.experts.135.w2", "model.layers.56.block_sparse_moe.experts.136.w2", "model.layers.56.block_sparse_moe.experts.137.w2", "model.layers.56.block_sparse_moe.experts.138.w2", "model.layers.56.block_sparse_moe.experts.139.w2", "model.layers.56.block_sparse_moe.experts.140.w2", "model.layers.56.block_sparse_moe.experts.141.w2", "model.layers.56.block_sparse_moe.experts.142.w2", "model.layers.56.block_sparse_moe.experts.143.w2", "model.layers.56.block_sparse_moe.experts.144.w2", "model.layers.56.block_sparse_moe.experts.145.w2", "model.layers.56.block_sparse_moe.experts.146.w2", "model.layers.56.block_sparse_moe.experts.147.w2", "model.layers.56.block_sparse_moe.experts.148.w2", "model.layers.56.block_sparse_moe.experts.149.w2", "model.layers.56.block_sparse_moe.experts.150.w2", "model.layers.56.block_sparse_moe.experts.151.w2", "model.layers.56.block_sparse_moe.experts.152.w2", "model.layers.56.block_sparse_moe.experts.153.w2", "model.layers.56.block_sparse_moe.experts.154.w2", "model.layers.56.block_sparse_moe.experts.155.w2", "model.layers.56.block_sparse_moe.experts.156.w2", "model.layers.56.block_sparse_moe.experts.157.w2", "model.layers.56.block_sparse_moe.experts.158.w2", "model.layers.56.block_sparse_moe.experts.159.w2", "model.layers.56.block_sparse_moe.experts.160.w2", "model.layers.56.block_sparse_moe.experts.161.w2", "model.layers.56.block_sparse_moe.experts.162.w2", "model.layers.56.block_sparse_moe.experts.163.w2", "model.layers.56.block_sparse_moe.experts.164.w2", "model.layers.56.block_sparse_moe.experts.165.w2", "model.layers.56.block_sparse_moe.experts.166.w2", "model.layers.56.block_sparse_moe.experts.167.w2", "model.layers.56.block_sparse_moe.experts.168.w2", "model.layers.56.block_sparse_moe.experts.169.w2", "model.layers.56.block_sparse_moe.experts.170.w2", "model.layers.56.block_sparse_moe.experts.171.w2", "model.layers.56.block_sparse_moe.experts.172.w2", "model.layers.56.block_sparse_moe.experts.173.w2", "model.layers.56.block_sparse_moe.experts.174.w2", "model.layers.56.block_sparse_moe.experts.175.w2", "model.layers.56.block_sparse_moe.experts.176.w2", "model.layers.56.block_sparse_moe.experts.177.w2", "model.layers.56.block_sparse_moe.experts.178.w2", "model.layers.56.block_sparse_moe.experts.179.w2", "model.layers.56.block_sparse_moe.experts.180.w2", "model.layers.56.block_sparse_moe.experts.181.w2", "model.layers.56.block_sparse_moe.experts.182.w2", "model.layers.56.block_sparse_moe.experts.183.w2", "model.layers.56.block_sparse_moe.experts.184.w2", "model.layers.56.block_sparse_moe.experts.185.w2", "model.layers.56.block_sparse_moe.experts.186.w2", "model.layers.56.block_sparse_moe.experts.187.w2", "model.layers.56.block_sparse_moe.experts.188.w2", "model.layers.56.block_sparse_moe.experts.189.w2", "model.layers.56.block_sparse_moe.experts.190.w2", "model.layers.56.block_sparse_moe.experts.191.w2", "model.layers.56.block_sparse_moe.experts.192.w2", "model.layers.56.block_sparse_moe.experts.193.w2", "model.layers.56.block_sparse_moe.experts.194.w2", "model.layers.56.block_sparse_moe.experts.195.w2", "model.layers.56.block_sparse_moe.experts.196.w2", "model.layers.56.block_sparse_moe.experts.197.w2", "model.layers.56.block_sparse_moe.experts.198.w2", "model.layers.56.block_sparse_moe.experts.199.w2", "model.layers.56.block_sparse_moe.experts.200.w2", "model.layers.56.block_sparse_moe.experts.201.w2", "model.layers.56.block_sparse_moe.experts.202.w2", "model.layers.56.block_sparse_moe.experts.203.w2", "model.layers.56.block_sparse_moe.experts.204.w2", "model.layers.56.block_sparse_moe.experts.205.w2", "model.layers.56.block_sparse_moe.experts.206.w2", "model.layers.56.block_sparse_moe.experts.207.w2", "model.layers.56.block_sparse_moe.experts.208.w2", "model.layers.56.block_sparse_moe.experts.209.w2", "model.layers.56.block_sparse_moe.experts.210.w2", "model.layers.56.block_sparse_moe.experts.211.w2", "model.layers.56.block_sparse_moe.experts.212.w2", "model.layers.56.block_sparse_moe.experts.213.w2", "model.layers.56.block_sparse_moe.experts.214.w2", "model.layers.56.block_sparse_moe.experts.215.w2", "model.layers.56.block_sparse_moe.experts.216.w2", "model.layers.56.block_sparse_moe.experts.217.w2", "model.layers.56.block_sparse_moe.experts.218.w2", "model.layers.56.block_sparse_moe.experts.219.w2", "model.layers.56.block_sparse_moe.experts.220.w2", "model.layers.56.block_sparse_moe.experts.221.w2", "model.layers.56.block_sparse_moe.experts.222.w2", "model.layers.56.block_sparse_moe.experts.223.w2", "model.layers.56.block_sparse_moe.experts.224.w2", "model.layers.56.block_sparse_moe.experts.225.w2", "model.layers.56.block_sparse_moe.experts.226.w2", "model.layers.56.block_sparse_moe.experts.227.w2", "model.layers.56.block_sparse_moe.experts.228.w2", "model.layers.56.block_sparse_moe.experts.229.w2", "model.layers.56.block_sparse_moe.experts.230.w2", "model.layers.56.block_sparse_moe.experts.231.w2", "model.layers.56.block_sparse_moe.experts.232.w2", "model.layers.56.block_sparse_moe.experts.233.w2", "model.layers.56.block_sparse_moe.experts.234.w2", "model.layers.56.block_sparse_moe.experts.235.w2", "model.layers.56.block_sparse_moe.experts.236.w2", "model.layers.56.block_sparse_moe.experts.237.w2", "model.layers.56.block_sparse_moe.experts.238.w2", "model.layers.56.block_sparse_moe.experts.239.w2", "model.layers.56.block_sparse_moe.experts.240.w2", "model.layers.56.block_sparse_moe.experts.241.w2", "model.layers.56.block_sparse_moe.experts.242.w2", "model.layers.56.block_sparse_moe.experts.243.w2", "model.layers.56.block_sparse_moe.experts.244.w2", "model.layers.56.block_sparse_moe.experts.245.w2", "model.layers.56.block_sparse_moe.experts.246.w2", "model.layers.56.block_sparse_moe.experts.247.w2", "model.layers.56.block_sparse_moe.experts.248.w2", "model.layers.56.block_sparse_moe.experts.249.w2", "model.layers.56.block_sparse_moe.experts.250.w2", "model.layers.56.block_sparse_moe.experts.251.w2", "model.layers.56.block_sparse_moe.experts.252.w2", "model.layers.56.block_sparse_moe.experts.253.w2", "model.layers.56.block_sparse_moe.experts.254.w2", "model.layers.56.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003104778006672859, "dbits": 3623878656 } ] }, { "idx": 114, "layers": [ "model.layers.57.self_attn.q_proj", "model.layers.57.self_attn.k_proj", "model.layers.57.self_attn.v_proj", "model.layers.57.self_attn.o_proj" ], "candidates": [ { "dkld": -2.994667738676071e-05, "dbits": 44040192 } ] }, { "idx": 115, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w1", "model.layers.57.block_sparse_moe.experts.1.w1", "model.layers.57.block_sparse_moe.experts.2.w1", "model.layers.57.block_sparse_moe.experts.3.w1", "model.layers.57.block_sparse_moe.experts.4.w1", "model.layers.57.block_sparse_moe.experts.5.w1", "model.layers.57.block_sparse_moe.experts.6.w1", "model.layers.57.block_sparse_moe.experts.7.w1", "model.layers.57.block_sparse_moe.experts.8.w1", "model.layers.57.block_sparse_moe.experts.9.w1", "model.layers.57.block_sparse_moe.experts.10.w1", "model.layers.57.block_sparse_moe.experts.11.w1", "model.layers.57.block_sparse_moe.experts.12.w1", "model.layers.57.block_sparse_moe.experts.13.w1", "model.layers.57.block_sparse_moe.experts.14.w1", "model.layers.57.block_sparse_moe.experts.15.w1", "model.layers.57.block_sparse_moe.experts.16.w1", "model.layers.57.block_sparse_moe.experts.17.w1", "model.layers.57.block_sparse_moe.experts.18.w1", "model.layers.57.block_sparse_moe.experts.19.w1", "model.layers.57.block_sparse_moe.experts.20.w1", "model.layers.57.block_sparse_moe.experts.21.w1", "model.layers.57.block_sparse_moe.experts.22.w1", "model.layers.57.block_sparse_moe.experts.23.w1", "model.layers.57.block_sparse_moe.experts.24.w1", "model.layers.57.block_sparse_moe.experts.25.w1", "model.layers.57.block_sparse_moe.experts.26.w1", "model.layers.57.block_sparse_moe.experts.27.w1", "model.layers.57.block_sparse_moe.experts.28.w1", "model.layers.57.block_sparse_moe.experts.29.w1", "model.layers.57.block_sparse_moe.experts.30.w1", "model.layers.57.block_sparse_moe.experts.31.w1", "model.layers.57.block_sparse_moe.experts.32.w1", "model.layers.57.block_sparse_moe.experts.33.w1", "model.layers.57.block_sparse_moe.experts.34.w1", "model.layers.57.block_sparse_moe.experts.35.w1", "model.layers.57.block_sparse_moe.experts.36.w1", "model.layers.57.block_sparse_moe.experts.37.w1", "model.layers.57.block_sparse_moe.experts.38.w1", "model.layers.57.block_sparse_moe.experts.39.w1", "model.layers.57.block_sparse_moe.experts.40.w1", "model.layers.57.block_sparse_moe.experts.41.w1", "model.layers.57.block_sparse_moe.experts.42.w1", "model.layers.57.block_sparse_moe.experts.43.w1", "model.layers.57.block_sparse_moe.experts.44.w1", "model.layers.57.block_sparse_moe.experts.45.w1", "model.layers.57.block_sparse_moe.experts.46.w1", "model.layers.57.block_sparse_moe.experts.47.w1", "model.layers.57.block_sparse_moe.experts.48.w1", "model.layers.57.block_sparse_moe.experts.49.w1", "model.layers.57.block_sparse_moe.experts.50.w1", "model.layers.57.block_sparse_moe.experts.51.w1", "model.layers.57.block_sparse_moe.experts.52.w1", "model.layers.57.block_sparse_moe.experts.53.w1", "model.layers.57.block_sparse_moe.experts.54.w1", "model.layers.57.block_sparse_moe.experts.55.w1", "model.layers.57.block_sparse_moe.experts.56.w1", "model.layers.57.block_sparse_moe.experts.57.w1", "model.layers.57.block_sparse_moe.experts.58.w1", "model.layers.57.block_sparse_moe.experts.59.w1", "model.layers.57.block_sparse_moe.experts.60.w1", "model.layers.57.block_sparse_moe.experts.61.w1", "model.layers.57.block_sparse_moe.experts.62.w1", "model.layers.57.block_sparse_moe.experts.63.w1", "model.layers.57.block_sparse_moe.experts.64.w1", "model.layers.57.block_sparse_moe.experts.65.w1", "model.layers.57.block_sparse_moe.experts.66.w1", "model.layers.57.block_sparse_moe.experts.67.w1", "model.layers.57.block_sparse_moe.experts.68.w1", "model.layers.57.block_sparse_moe.experts.69.w1", "model.layers.57.block_sparse_moe.experts.70.w1", "model.layers.57.block_sparse_moe.experts.71.w1", "model.layers.57.block_sparse_moe.experts.72.w1", "model.layers.57.block_sparse_moe.experts.73.w1", "model.layers.57.block_sparse_moe.experts.74.w1", "model.layers.57.block_sparse_moe.experts.75.w1", "model.layers.57.block_sparse_moe.experts.76.w1", "model.layers.57.block_sparse_moe.experts.77.w1", "model.layers.57.block_sparse_moe.experts.78.w1", "model.layers.57.block_sparse_moe.experts.79.w1", "model.layers.57.block_sparse_moe.experts.80.w1", "model.layers.57.block_sparse_moe.experts.81.w1", "model.layers.57.block_sparse_moe.experts.82.w1", "model.layers.57.block_sparse_moe.experts.83.w1", "model.layers.57.block_sparse_moe.experts.84.w1", "model.layers.57.block_sparse_moe.experts.85.w1", "model.layers.57.block_sparse_moe.experts.86.w1", "model.layers.57.block_sparse_moe.experts.87.w1", "model.layers.57.block_sparse_moe.experts.88.w1", "model.layers.57.block_sparse_moe.experts.89.w1", "model.layers.57.block_sparse_moe.experts.90.w1", "model.layers.57.block_sparse_moe.experts.91.w1", "model.layers.57.block_sparse_moe.experts.92.w1", "model.layers.57.block_sparse_moe.experts.93.w1", "model.layers.57.block_sparse_moe.experts.94.w1", "model.layers.57.block_sparse_moe.experts.95.w1", "model.layers.57.block_sparse_moe.experts.96.w1", "model.layers.57.block_sparse_moe.experts.97.w1", "model.layers.57.block_sparse_moe.experts.98.w1", "model.layers.57.block_sparse_moe.experts.99.w1", "model.layers.57.block_sparse_moe.experts.100.w1", "model.layers.57.block_sparse_moe.experts.101.w1", "model.layers.57.block_sparse_moe.experts.102.w1", "model.layers.57.block_sparse_moe.experts.103.w1", "model.layers.57.block_sparse_moe.experts.104.w1", "model.layers.57.block_sparse_moe.experts.105.w1", "model.layers.57.block_sparse_moe.experts.106.w1", "model.layers.57.block_sparse_moe.experts.107.w1", "model.layers.57.block_sparse_moe.experts.108.w1", "model.layers.57.block_sparse_moe.experts.109.w1", "model.layers.57.block_sparse_moe.experts.110.w1", "model.layers.57.block_sparse_moe.experts.111.w1", "model.layers.57.block_sparse_moe.experts.112.w1", "model.layers.57.block_sparse_moe.experts.113.w1", "model.layers.57.block_sparse_moe.experts.114.w1", "model.layers.57.block_sparse_moe.experts.115.w1", "model.layers.57.block_sparse_moe.experts.116.w1", "model.layers.57.block_sparse_moe.experts.117.w1", "model.layers.57.block_sparse_moe.experts.118.w1", "model.layers.57.block_sparse_moe.experts.119.w1", "model.layers.57.block_sparse_moe.experts.120.w1", "model.layers.57.block_sparse_moe.experts.121.w1", "model.layers.57.block_sparse_moe.experts.122.w1", "model.layers.57.block_sparse_moe.experts.123.w1", "model.layers.57.block_sparse_moe.experts.124.w1", "model.layers.57.block_sparse_moe.experts.125.w1", "model.layers.57.block_sparse_moe.experts.126.w1", "model.layers.57.block_sparse_moe.experts.127.w1", "model.layers.57.block_sparse_moe.experts.128.w1", "model.layers.57.block_sparse_moe.experts.129.w1", "model.layers.57.block_sparse_moe.experts.130.w1", "model.layers.57.block_sparse_moe.experts.131.w1", "model.layers.57.block_sparse_moe.experts.132.w1", "model.layers.57.block_sparse_moe.experts.133.w1", "model.layers.57.block_sparse_moe.experts.134.w1", "model.layers.57.block_sparse_moe.experts.135.w1", "model.layers.57.block_sparse_moe.experts.136.w1", "model.layers.57.block_sparse_moe.experts.137.w1", "model.layers.57.block_sparse_moe.experts.138.w1", "model.layers.57.block_sparse_moe.experts.139.w1", "model.layers.57.block_sparse_moe.experts.140.w1", "model.layers.57.block_sparse_moe.experts.141.w1", "model.layers.57.block_sparse_moe.experts.142.w1", "model.layers.57.block_sparse_moe.experts.143.w1", "model.layers.57.block_sparse_moe.experts.144.w1", "model.layers.57.block_sparse_moe.experts.145.w1", "model.layers.57.block_sparse_moe.experts.146.w1", "model.layers.57.block_sparse_moe.experts.147.w1", "model.layers.57.block_sparse_moe.experts.148.w1", "model.layers.57.block_sparse_moe.experts.149.w1", "model.layers.57.block_sparse_moe.experts.150.w1", "model.layers.57.block_sparse_moe.experts.151.w1", "model.layers.57.block_sparse_moe.experts.152.w1", "model.layers.57.block_sparse_moe.experts.153.w1", "model.layers.57.block_sparse_moe.experts.154.w1", "model.layers.57.block_sparse_moe.experts.155.w1", "model.layers.57.block_sparse_moe.experts.156.w1", "model.layers.57.block_sparse_moe.experts.157.w1", "model.layers.57.block_sparse_moe.experts.158.w1", "model.layers.57.block_sparse_moe.experts.159.w1", "model.layers.57.block_sparse_moe.experts.160.w1", "model.layers.57.block_sparse_moe.experts.161.w1", "model.layers.57.block_sparse_moe.experts.162.w1", "model.layers.57.block_sparse_moe.experts.163.w1", "model.layers.57.block_sparse_moe.experts.164.w1", "model.layers.57.block_sparse_moe.experts.165.w1", "model.layers.57.block_sparse_moe.experts.166.w1", "model.layers.57.block_sparse_moe.experts.167.w1", "model.layers.57.block_sparse_moe.experts.168.w1", "model.layers.57.block_sparse_moe.experts.169.w1", "model.layers.57.block_sparse_moe.experts.170.w1", "model.layers.57.block_sparse_moe.experts.171.w1", "model.layers.57.block_sparse_moe.experts.172.w1", "model.layers.57.block_sparse_moe.experts.173.w1", "model.layers.57.block_sparse_moe.experts.174.w1", "model.layers.57.block_sparse_moe.experts.175.w1", "model.layers.57.block_sparse_moe.experts.176.w1", "model.layers.57.block_sparse_moe.experts.177.w1", "model.layers.57.block_sparse_moe.experts.178.w1", "model.layers.57.block_sparse_moe.experts.179.w1", "model.layers.57.block_sparse_moe.experts.180.w1", "model.layers.57.block_sparse_moe.experts.181.w1", "model.layers.57.block_sparse_moe.experts.182.w1", "model.layers.57.block_sparse_moe.experts.183.w1", "model.layers.57.block_sparse_moe.experts.184.w1", "model.layers.57.block_sparse_moe.experts.185.w1", "model.layers.57.block_sparse_moe.experts.186.w1", "model.layers.57.block_sparse_moe.experts.187.w1", "model.layers.57.block_sparse_moe.experts.188.w1", "model.layers.57.block_sparse_moe.experts.189.w1", "model.layers.57.block_sparse_moe.experts.190.w1", "model.layers.57.block_sparse_moe.experts.191.w1", "model.layers.57.block_sparse_moe.experts.192.w1", "model.layers.57.block_sparse_moe.experts.193.w1", "model.layers.57.block_sparse_moe.experts.194.w1", "model.layers.57.block_sparse_moe.experts.195.w1", "model.layers.57.block_sparse_moe.experts.196.w1", "model.layers.57.block_sparse_moe.experts.197.w1", "model.layers.57.block_sparse_moe.experts.198.w1", "model.layers.57.block_sparse_moe.experts.199.w1", "model.layers.57.block_sparse_moe.experts.200.w1", "model.layers.57.block_sparse_moe.experts.201.w1", "model.layers.57.block_sparse_moe.experts.202.w1", "model.layers.57.block_sparse_moe.experts.203.w1", "model.layers.57.block_sparse_moe.experts.204.w1", "model.layers.57.block_sparse_moe.experts.205.w1", "model.layers.57.block_sparse_moe.experts.206.w1", "model.layers.57.block_sparse_moe.experts.207.w1", "model.layers.57.block_sparse_moe.experts.208.w1", "model.layers.57.block_sparse_moe.experts.209.w1", "model.layers.57.block_sparse_moe.experts.210.w1", "model.layers.57.block_sparse_moe.experts.211.w1", "model.layers.57.block_sparse_moe.experts.212.w1", "model.layers.57.block_sparse_moe.experts.213.w1", "model.layers.57.block_sparse_moe.experts.214.w1", "model.layers.57.block_sparse_moe.experts.215.w1", "model.layers.57.block_sparse_moe.experts.216.w1", "model.layers.57.block_sparse_moe.experts.217.w1", "model.layers.57.block_sparse_moe.experts.218.w1", "model.layers.57.block_sparse_moe.experts.219.w1", "model.layers.57.block_sparse_moe.experts.220.w1", "model.layers.57.block_sparse_moe.experts.221.w1", "model.layers.57.block_sparse_moe.experts.222.w1", "model.layers.57.block_sparse_moe.experts.223.w1", "model.layers.57.block_sparse_moe.experts.224.w1", "model.layers.57.block_sparse_moe.experts.225.w1", "model.layers.57.block_sparse_moe.experts.226.w1", "model.layers.57.block_sparse_moe.experts.227.w1", "model.layers.57.block_sparse_moe.experts.228.w1", "model.layers.57.block_sparse_moe.experts.229.w1", "model.layers.57.block_sparse_moe.experts.230.w1", "model.layers.57.block_sparse_moe.experts.231.w1", "model.layers.57.block_sparse_moe.experts.232.w1", "model.layers.57.block_sparse_moe.experts.233.w1", "model.layers.57.block_sparse_moe.experts.234.w1", "model.layers.57.block_sparse_moe.experts.235.w1", "model.layers.57.block_sparse_moe.experts.236.w1", "model.layers.57.block_sparse_moe.experts.237.w1", "model.layers.57.block_sparse_moe.experts.238.w1", "model.layers.57.block_sparse_moe.experts.239.w1", "model.layers.57.block_sparse_moe.experts.240.w1", "model.layers.57.block_sparse_moe.experts.241.w1", "model.layers.57.block_sparse_moe.experts.242.w1", "model.layers.57.block_sparse_moe.experts.243.w1", "model.layers.57.block_sparse_moe.experts.244.w1", "model.layers.57.block_sparse_moe.experts.245.w1", "model.layers.57.block_sparse_moe.experts.246.w1", "model.layers.57.block_sparse_moe.experts.247.w1", "model.layers.57.block_sparse_moe.experts.248.w1", "model.layers.57.block_sparse_moe.experts.249.w1", "model.layers.57.block_sparse_moe.experts.250.w1", "model.layers.57.block_sparse_moe.experts.251.w1", "model.layers.57.block_sparse_moe.experts.252.w1", "model.layers.57.block_sparse_moe.experts.253.w1", "model.layers.57.block_sparse_moe.experts.254.w1", "model.layers.57.block_sparse_moe.experts.255.w1", "model.layers.57.block_sparse_moe.experts.0.w3", "model.layers.57.block_sparse_moe.experts.1.w3", "model.layers.57.block_sparse_moe.experts.2.w3", "model.layers.57.block_sparse_moe.experts.3.w3", "model.layers.57.block_sparse_moe.experts.4.w3", "model.layers.57.block_sparse_moe.experts.5.w3", "model.layers.57.block_sparse_moe.experts.6.w3", "model.layers.57.block_sparse_moe.experts.7.w3", "model.layers.57.block_sparse_moe.experts.8.w3", "model.layers.57.block_sparse_moe.experts.9.w3", "model.layers.57.block_sparse_moe.experts.10.w3", "model.layers.57.block_sparse_moe.experts.11.w3", "model.layers.57.block_sparse_moe.experts.12.w3", "model.layers.57.block_sparse_moe.experts.13.w3", "model.layers.57.block_sparse_moe.experts.14.w3", "model.layers.57.block_sparse_moe.experts.15.w3", "model.layers.57.block_sparse_moe.experts.16.w3", "model.layers.57.block_sparse_moe.experts.17.w3", "model.layers.57.block_sparse_moe.experts.18.w3", "model.layers.57.block_sparse_moe.experts.19.w3", "model.layers.57.block_sparse_moe.experts.20.w3", "model.layers.57.block_sparse_moe.experts.21.w3", "model.layers.57.block_sparse_moe.experts.22.w3", "model.layers.57.block_sparse_moe.experts.23.w3", "model.layers.57.block_sparse_moe.experts.24.w3", "model.layers.57.block_sparse_moe.experts.25.w3", "model.layers.57.block_sparse_moe.experts.26.w3", "model.layers.57.block_sparse_moe.experts.27.w3", "model.layers.57.block_sparse_moe.experts.28.w3", "model.layers.57.block_sparse_moe.experts.29.w3", "model.layers.57.block_sparse_moe.experts.30.w3", "model.layers.57.block_sparse_moe.experts.31.w3", "model.layers.57.block_sparse_moe.experts.32.w3", "model.layers.57.block_sparse_moe.experts.33.w3", "model.layers.57.block_sparse_moe.experts.34.w3", "model.layers.57.block_sparse_moe.experts.35.w3", "model.layers.57.block_sparse_moe.experts.36.w3", "model.layers.57.block_sparse_moe.experts.37.w3", "model.layers.57.block_sparse_moe.experts.38.w3", "model.layers.57.block_sparse_moe.experts.39.w3", "model.layers.57.block_sparse_moe.experts.40.w3", "model.layers.57.block_sparse_moe.experts.41.w3", "model.layers.57.block_sparse_moe.experts.42.w3", "model.layers.57.block_sparse_moe.experts.43.w3", "model.layers.57.block_sparse_moe.experts.44.w3", "model.layers.57.block_sparse_moe.experts.45.w3", "model.layers.57.block_sparse_moe.experts.46.w3", "model.layers.57.block_sparse_moe.experts.47.w3", "model.layers.57.block_sparse_moe.experts.48.w3", "model.layers.57.block_sparse_moe.experts.49.w3", "model.layers.57.block_sparse_moe.experts.50.w3", "model.layers.57.block_sparse_moe.experts.51.w3", "model.layers.57.block_sparse_moe.experts.52.w3", "model.layers.57.block_sparse_moe.experts.53.w3", "model.layers.57.block_sparse_moe.experts.54.w3", "model.layers.57.block_sparse_moe.experts.55.w3", "model.layers.57.block_sparse_moe.experts.56.w3", "model.layers.57.block_sparse_moe.experts.57.w3", "model.layers.57.block_sparse_moe.experts.58.w3", "model.layers.57.block_sparse_moe.experts.59.w3", "model.layers.57.block_sparse_moe.experts.60.w3", "model.layers.57.block_sparse_moe.experts.61.w3", "model.layers.57.block_sparse_moe.experts.62.w3", "model.layers.57.block_sparse_moe.experts.63.w3", "model.layers.57.block_sparse_moe.experts.64.w3", "model.layers.57.block_sparse_moe.experts.65.w3", "model.layers.57.block_sparse_moe.experts.66.w3", "model.layers.57.block_sparse_moe.experts.67.w3", "model.layers.57.block_sparse_moe.experts.68.w3", "model.layers.57.block_sparse_moe.experts.69.w3", "model.layers.57.block_sparse_moe.experts.70.w3", "model.layers.57.block_sparse_moe.experts.71.w3", "model.layers.57.block_sparse_moe.experts.72.w3", "model.layers.57.block_sparse_moe.experts.73.w3", "model.layers.57.block_sparse_moe.experts.74.w3", "model.layers.57.block_sparse_moe.experts.75.w3", "model.layers.57.block_sparse_moe.experts.76.w3", "model.layers.57.block_sparse_moe.experts.77.w3", "model.layers.57.block_sparse_moe.experts.78.w3", "model.layers.57.block_sparse_moe.experts.79.w3", "model.layers.57.block_sparse_moe.experts.80.w3", "model.layers.57.block_sparse_moe.experts.81.w3", "model.layers.57.block_sparse_moe.experts.82.w3", "model.layers.57.block_sparse_moe.experts.83.w3", "model.layers.57.block_sparse_moe.experts.84.w3", "model.layers.57.block_sparse_moe.experts.85.w3", "model.layers.57.block_sparse_moe.experts.86.w3", "model.layers.57.block_sparse_moe.experts.87.w3", "model.layers.57.block_sparse_moe.experts.88.w3", "model.layers.57.block_sparse_moe.experts.89.w3", "model.layers.57.block_sparse_moe.experts.90.w3", "model.layers.57.block_sparse_moe.experts.91.w3", "model.layers.57.block_sparse_moe.experts.92.w3", "model.layers.57.block_sparse_moe.experts.93.w3", "model.layers.57.block_sparse_moe.experts.94.w3", "model.layers.57.block_sparse_moe.experts.95.w3", "model.layers.57.block_sparse_moe.experts.96.w3", "model.layers.57.block_sparse_moe.experts.97.w3", "model.layers.57.block_sparse_moe.experts.98.w3", "model.layers.57.block_sparse_moe.experts.99.w3", "model.layers.57.block_sparse_moe.experts.100.w3", "model.layers.57.block_sparse_moe.experts.101.w3", "model.layers.57.block_sparse_moe.experts.102.w3", "model.layers.57.block_sparse_moe.experts.103.w3", "model.layers.57.block_sparse_moe.experts.104.w3", "model.layers.57.block_sparse_moe.experts.105.w3", "model.layers.57.block_sparse_moe.experts.106.w3", "model.layers.57.block_sparse_moe.experts.107.w3", "model.layers.57.block_sparse_moe.experts.108.w3", "model.layers.57.block_sparse_moe.experts.109.w3", "model.layers.57.block_sparse_moe.experts.110.w3", "model.layers.57.block_sparse_moe.experts.111.w3", "model.layers.57.block_sparse_moe.experts.112.w3", "model.layers.57.block_sparse_moe.experts.113.w3", "model.layers.57.block_sparse_moe.experts.114.w3", "model.layers.57.block_sparse_moe.experts.115.w3", "model.layers.57.block_sparse_moe.experts.116.w3", "model.layers.57.block_sparse_moe.experts.117.w3", "model.layers.57.block_sparse_moe.experts.118.w3", "model.layers.57.block_sparse_moe.experts.119.w3", "model.layers.57.block_sparse_moe.experts.120.w3", "model.layers.57.block_sparse_moe.experts.121.w3", "model.layers.57.block_sparse_moe.experts.122.w3", "model.layers.57.block_sparse_moe.experts.123.w3", "model.layers.57.block_sparse_moe.experts.124.w3", "model.layers.57.block_sparse_moe.experts.125.w3", "model.layers.57.block_sparse_moe.experts.126.w3", "model.layers.57.block_sparse_moe.experts.127.w3", "model.layers.57.block_sparse_moe.experts.128.w3", "model.layers.57.block_sparse_moe.experts.129.w3", "model.layers.57.block_sparse_moe.experts.130.w3", "model.layers.57.block_sparse_moe.experts.131.w3", "model.layers.57.block_sparse_moe.experts.132.w3", "model.layers.57.block_sparse_moe.experts.133.w3", "model.layers.57.block_sparse_moe.experts.134.w3", "model.layers.57.block_sparse_moe.experts.135.w3", "model.layers.57.block_sparse_moe.experts.136.w3", "model.layers.57.block_sparse_moe.experts.137.w3", "model.layers.57.block_sparse_moe.experts.138.w3", "model.layers.57.block_sparse_moe.experts.139.w3", "model.layers.57.block_sparse_moe.experts.140.w3", "model.layers.57.block_sparse_moe.experts.141.w3", "model.layers.57.block_sparse_moe.experts.142.w3", "model.layers.57.block_sparse_moe.experts.143.w3", "model.layers.57.block_sparse_moe.experts.144.w3", "model.layers.57.block_sparse_moe.experts.145.w3", "model.layers.57.block_sparse_moe.experts.146.w3", "model.layers.57.block_sparse_moe.experts.147.w3", "model.layers.57.block_sparse_moe.experts.148.w3", "model.layers.57.block_sparse_moe.experts.149.w3", "model.layers.57.block_sparse_moe.experts.150.w3", "model.layers.57.block_sparse_moe.experts.151.w3", "model.layers.57.block_sparse_moe.experts.152.w3", "model.layers.57.block_sparse_moe.experts.153.w3", "model.layers.57.block_sparse_moe.experts.154.w3", "model.layers.57.block_sparse_moe.experts.155.w3", "model.layers.57.block_sparse_moe.experts.156.w3", "model.layers.57.block_sparse_moe.experts.157.w3", "model.layers.57.block_sparse_moe.experts.158.w3", "model.layers.57.block_sparse_moe.experts.159.w3", "model.layers.57.block_sparse_moe.experts.160.w3", "model.layers.57.block_sparse_moe.experts.161.w3", "model.layers.57.block_sparse_moe.experts.162.w3", "model.layers.57.block_sparse_moe.experts.163.w3", "model.layers.57.block_sparse_moe.experts.164.w3", "model.layers.57.block_sparse_moe.experts.165.w3", "model.layers.57.block_sparse_moe.experts.166.w3", "model.layers.57.block_sparse_moe.experts.167.w3", "model.layers.57.block_sparse_moe.experts.168.w3", "model.layers.57.block_sparse_moe.experts.169.w3", "model.layers.57.block_sparse_moe.experts.170.w3", "model.layers.57.block_sparse_moe.experts.171.w3", "model.layers.57.block_sparse_moe.experts.172.w3", "model.layers.57.block_sparse_moe.experts.173.w3", "model.layers.57.block_sparse_moe.experts.174.w3", "model.layers.57.block_sparse_moe.experts.175.w3", "model.layers.57.block_sparse_moe.experts.176.w3", "model.layers.57.block_sparse_moe.experts.177.w3", "model.layers.57.block_sparse_moe.experts.178.w3", "model.layers.57.block_sparse_moe.experts.179.w3", "model.layers.57.block_sparse_moe.experts.180.w3", "model.layers.57.block_sparse_moe.experts.181.w3", "model.layers.57.block_sparse_moe.experts.182.w3", "model.layers.57.block_sparse_moe.experts.183.w3", "model.layers.57.block_sparse_moe.experts.184.w3", "model.layers.57.block_sparse_moe.experts.185.w3", "model.layers.57.block_sparse_moe.experts.186.w3", "model.layers.57.block_sparse_moe.experts.187.w3", "model.layers.57.block_sparse_moe.experts.188.w3", "model.layers.57.block_sparse_moe.experts.189.w3", "model.layers.57.block_sparse_moe.experts.190.w3", "model.layers.57.block_sparse_moe.experts.191.w3", "model.layers.57.block_sparse_moe.experts.192.w3", "model.layers.57.block_sparse_moe.experts.193.w3", "model.layers.57.block_sparse_moe.experts.194.w3", "model.layers.57.block_sparse_moe.experts.195.w3", "model.layers.57.block_sparse_moe.experts.196.w3", "model.layers.57.block_sparse_moe.experts.197.w3", "model.layers.57.block_sparse_moe.experts.198.w3", "model.layers.57.block_sparse_moe.experts.199.w3", "model.layers.57.block_sparse_moe.experts.200.w3", "model.layers.57.block_sparse_moe.experts.201.w3", "model.layers.57.block_sparse_moe.experts.202.w3", "model.layers.57.block_sparse_moe.experts.203.w3", "model.layers.57.block_sparse_moe.experts.204.w3", "model.layers.57.block_sparse_moe.experts.205.w3", "model.layers.57.block_sparse_moe.experts.206.w3", "model.layers.57.block_sparse_moe.experts.207.w3", "model.layers.57.block_sparse_moe.experts.208.w3", "model.layers.57.block_sparse_moe.experts.209.w3", "model.layers.57.block_sparse_moe.experts.210.w3", "model.layers.57.block_sparse_moe.experts.211.w3", "model.layers.57.block_sparse_moe.experts.212.w3", "model.layers.57.block_sparse_moe.experts.213.w3", "model.layers.57.block_sparse_moe.experts.214.w3", "model.layers.57.block_sparse_moe.experts.215.w3", "model.layers.57.block_sparse_moe.experts.216.w3", "model.layers.57.block_sparse_moe.experts.217.w3", "model.layers.57.block_sparse_moe.experts.218.w3", "model.layers.57.block_sparse_moe.experts.219.w3", "model.layers.57.block_sparse_moe.experts.220.w3", "model.layers.57.block_sparse_moe.experts.221.w3", "model.layers.57.block_sparse_moe.experts.222.w3", "model.layers.57.block_sparse_moe.experts.223.w3", "model.layers.57.block_sparse_moe.experts.224.w3", "model.layers.57.block_sparse_moe.experts.225.w3", "model.layers.57.block_sparse_moe.experts.226.w3", "model.layers.57.block_sparse_moe.experts.227.w3", "model.layers.57.block_sparse_moe.experts.228.w3", "model.layers.57.block_sparse_moe.experts.229.w3", "model.layers.57.block_sparse_moe.experts.230.w3", "model.layers.57.block_sparse_moe.experts.231.w3", "model.layers.57.block_sparse_moe.experts.232.w3", "model.layers.57.block_sparse_moe.experts.233.w3", "model.layers.57.block_sparse_moe.experts.234.w3", "model.layers.57.block_sparse_moe.experts.235.w3", "model.layers.57.block_sparse_moe.experts.236.w3", "model.layers.57.block_sparse_moe.experts.237.w3", "model.layers.57.block_sparse_moe.experts.238.w3", "model.layers.57.block_sparse_moe.experts.239.w3", "model.layers.57.block_sparse_moe.experts.240.w3", "model.layers.57.block_sparse_moe.experts.241.w3", "model.layers.57.block_sparse_moe.experts.242.w3", "model.layers.57.block_sparse_moe.experts.243.w3", "model.layers.57.block_sparse_moe.experts.244.w3", "model.layers.57.block_sparse_moe.experts.245.w3", "model.layers.57.block_sparse_moe.experts.246.w3", "model.layers.57.block_sparse_moe.experts.247.w3", "model.layers.57.block_sparse_moe.experts.248.w3", "model.layers.57.block_sparse_moe.experts.249.w3", "model.layers.57.block_sparse_moe.experts.250.w3", "model.layers.57.block_sparse_moe.experts.251.w3", "model.layers.57.block_sparse_moe.experts.252.w3", "model.layers.57.block_sparse_moe.experts.253.w3", "model.layers.57.block_sparse_moe.experts.254.w3", "model.layers.57.block_sparse_moe.experts.255.w3", "model.layers.57.block_sparse_moe.experts.0.w2", "model.layers.57.block_sparse_moe.experts.1.w2", "model.layers.57.block_sparse_moe.experts.2.w2", "model.layers.57.block_sparse_moe.experts.3.w2", "model.layers.57.block_sparse_moe.experts.4.w2", "model.layers.57.block_sparse_moe.experts.5.w2", "model.layers.57.block_sparse_moe.experts.6.w2", "model.layers.57.block_sparse_moe.experts.7.w2", "model.layers.57.block_sparse_moe.experts.8.w2", "model.layers.57.block_sparse_moe.experts.9.w2", "model.layers.57.block_sparse_moe.experts.10.w2", "model.layers.57.block_sparse_moe.experts.11.w2", "model.layers.57.block_sparse_moe.experts.12.w2", "model.layers.57.block_sparse_moe.experts.13.w2", "model.layers.57.block_sparse_moe.experts.14.w2", "model.layers.57.block_sparse_moe.experts.15.w2", "model.layers.57.block_sparse_moe.experts.16.w2", "model.layers.57.block_sparse_moe.experts.17.w2", "model.layers.57.block_sparse_moe.experts.18.w2", "model.layers.57.block_sparse_moe.experts.19.w2", "model.layers.57.block_sparse_moe.experts.20.w2", "model.layers.57.block_sparse_moe.experts.21.w2", "model.layers.57.block_sparse_moe.experts.22.w2", "model.layers.57.block_sparse_moe.experts.23.w2", "model.layers.57.block_sparse_moe.experts.24.w2", "model.layers.57.block_sparse_moe.experts.25.w2", "model.layers.57.block_sparse_moe.experts.26.w2", "model.layers.57.block_sparse_moe.experts.27.w2", "model.layers.57.block_sparse_moe.experts.28.w2", "model.layers.57.block_sparse_moe.experts.29.w2", "model.layers.57.block_sparse_moe.experts.30.w2", "model.layers.57.block_sparse_moe.experts.31.w2", "model.layers.57.block_sparse_moe.experts.32.w2", "model.layers.57.block_sparse_moe.experts.33.w2", "model.layers.57.block_sparse_moe.experts.34.w2", "model.layers.57.block_sparse_moe.experts.35.w2", "model.layers.57.block_sparse_moe.experts.36.w2", "model.layers.57.block_sparse_moe.experts.37.w2", "model.layers.57.block_sparse_moe.experts.38.w2", "model.layers.57.block_sparse_moe.experts.39.w2", "model.layers.57.block_sparse_moe.experts.40.w2", "model.layers.57.block_sparse_moe.experts.41.w2", "model.layers.57.block_sparse_moe.experts.42.w2", "model.layers.57.block_sparse_moe.experts.43.w2", "model.layers.57.block_sparse_moe.experts.44.w2", "model.layers.57.block_sparse_moe.experts.45.w2", "model.layers.57.block_sparse_moe.experts.46.w2", "model.layers.57.block_sparse_moe.experts.47.w2", "model.layers.57.block_sparse_moe.experts.48.w2", "model.layers.57.block_sparse_moe.experts.49.w2", "model.layers.57.block_sparse_moe.experts.50.w2", "model.layers.57.block_sparse_moe.experts.51.w2", "model.layers.57.block_sparse_moe.experts.52.w2", "model.layers.57.block_sparse_moe.experts.53.w2", "model.layers.57.block_sparse_moe.experts.54.w2", "model.layers.57.block_sparse_moe.experts.55.w2", "model.layers.57.block_sparse_moe.experts.56.w2", "model.layers.57.block_sparse_moe.experts.57.w2", "model.layers.57.block_sparse_moe.experts.58.w2", "model.layers.57.block_sparse_moe.experts.59.w2", "model.layers.57.block_sparse_moe.experts.60.w2", "model.layers.57.block_sparse_moe.experts.61.w2", "model.layers.57.block_sparse_moe.experts.62.w2", "model.layers.57.block_sparse_moe.experts.63.w2", "model.layers.57.block_sparse_moe.experts.64.w2", "model.layers.57.block_sparse_moe.experts.65.w2", "model.layers.57.block_sparse_moe.experts.66.w2", "model.layers.57.block_sparse_moe.experts.67.w2", "model.layers.57.block_sparse_moe.experts.68.w2", "model.layers.57.block_sparse_moe.experts.69.w2", "model.layers.57.block_sparse_moe.experts.70.w2", "model.layers.57.block_sparse_moe.experts.71.w2", "model.layers.57.block_sparse_moe.experts.72.w2", "model.layers.57.block_sparse_moe.experts.73.w2", "model.layers.57.block_sparse_moe.experts.74.w2", "model.layers.57.block_sparse_moe.experts.75.w2", "model.layers.57.block_sparse_moe.experts.76.w2", "model.layers.57.block_sparse_moe.experts.77.w2", "model.layers.57.block_sparse_moe.experts.78.w2", "model.layers.57.block_sparse_moe.experts.79.w2", "model.layers.57.block_sparse_moe.experts.80.w2", "model.layers.57.block_sparse_moe.experts.81.w2", "model.layers.57.block_sparse_moe.experts.82.w2", "model.layers.57.block_sparse_moe.experts.83.w2", "model.layers.57.block_sparse_moe.experts.84.w2", "model.layers.57.block_sparse_moe.experts.85.w2", "model.layers.57.block_sparse_moe.experts.86.w2", "model.layers.57.block_sparse_moe.experts.87.w2", "model.layers.57.block_sparse_moe.experts.88.w2", "model.layers.57.block_sparse_moe.experts.89.w2", "model.layers.57.block_sparse_moe.experts.90.w2", "model.layers.57.block_sparse_moe.experts.91.w2", "model.layers.57.block_sparse_moe.experts.92.w2", "model.layers.57.block_sparse_moe.experts.93.w2", "model.layers.57.block_sparse_moe.experts.94.w2", "model.layers.57.block_sparse_moe.experts.95.w2", "model.layers.57.block_sparse_moe.experts.96.w2", "model.layers.57.block_sparse_moe.experts.97.w2", "model.layers.57.block_sparse_moe.experts.98.w2", "model.layers.57.block_sparse_moe.experts.99.w2", "model.layers.57.block_sparse_moe.experts.100.w2", "model.layers.57.block_sparse_moe.experts.101.w2", "model.layers.57.block_sparse_moe.experts.102.w2", "model.layers.57.block_sparse_moe.experts.103.w2", "model.layers.57.block_sparse_moe.experts.104.w2", "model.layers.57.block_sparse_moe.experts.105.w2", "model.layers.57.block_sparse_moe.experts.106.w2", "model.layers.57.block_sparse_moe.experts.107.w2", "model.layers.57.block_sparse_moe.experts.108.w2", "model.layers.57.block_sparse_moe.experts.109.w2", "model.layers.57.block_sparse_moe.experts.110.w2", "model.layers.57.block_sparse_moe.experts.111.w2", "model.layers.57.block_sparse_moe.experts.112.w2", "model.layers.57.block_sparse_moe.experts.113.w2", "model.layers.57.block_sparse_moe.experts.114.w2", "model.layers.57.block_sparse_moe.experts.115.w2", "model.layers.57.block_sparse_moe.experts.116.w2", "model.layers.57.block_sparse_moe.experts.117.w2", "model.layers.57.block_sparse_moe.experts.118.w2", "model.layers.57.block_sparse_moe.experts.119.w2", "model.layers.57.block_sparse_moe.experts.120.w2", "model.layers.57.block_sparse_moe.experts.121.w2", "model.layers.57.block_sparse_moe.experts.122.w2", "model.layers.57.block_sparse_moe.experts.123.w2", "model.layers.57.block_sparse_moe.experts.124.w2", "model.layers.57.block_sparse_moe.experts.125.w2", "model.layers.57.block_sparse_moe.experts.126.w2", "model.layers.57.block_sparse_moe.experts.127.w2", "model.layers.57.block_sparse_moe.experts.128.w2", "model.layers.57.block_sparse_moe.experts.129.w2", "model.layers.57.block_sparse_moe.experts.130.w2", "model.layers.57.block_sparse_moe.experts.131.w2", "model.layers.57.block_sparse_moe.experts.132.w2", "model.layers.57.block_sparse_moe.experts.133.w2", "model.layers.57.block_sparse_moe.experts.134.w2", "model.layers.57.block_sparse_moe.experts.135.w2", "model.layers.57.block_sparse_moe.experts.136.w2", "model.layers.57.block_sparse_moe.experts.137.w2", "model.layers.57.block_sparse_moe.experts.138.w2", "model.layers.57.block_sparse_moe.experts.139.w2", "model.layers.57.block_sparse_moe.experts.140.w2", "model.layers.57.block_sparse_moe.experts.141.w2", "model.layers.57.block_sparse_moe.experts.142.w2", "model.layers.57.block_sparse_moe.experts.143.w2", "model.layers.57.block_sparse_moe.experts.144.w2", "model.layers.57.block_sparse_moe.experts.145.w2", "model.layers.57.block_sparse_moe.experts.146.w2", "model.layers.57.block_sparse_moe.experts.147.w2", "model.layers.57.block_sparse_moe.experts.148.w2", "model.layers.57.block_sparse_moe.experts.149.w2", "model.layers.57.block_sparse_moe.experts.150.w2", "model.layers.57.block_sparse_moe.experts.151.w2", "model.layers.57.block_sparse_moe.experts.152.w2", "model.layers.57.block_sparse_moe.experts.153.w2", "model.layers.57.block_sparse_moe.experts.154.w2", "model.layers.57.block_sparse_moe.experts.155.w2", "model.layers.57.block_sparse_moe.experts.156.w2", "model.layers.57.block_sparse_moe.experts.157.w2", "model.layers.57.block_sparse_moe.experts.158.w2", "model.layers.57.block_sparse_moe.experts.159.w2", "model.layers.57.block_sparse_moe.experts.160.w2", "model.layers.57.block_sparse_moe.experts.161.w2", "model.layers.57.block_sparse_moe.experts.162.w2", "model.layers.57.block_sparse_moe.experts.163.w2", "model.layers.57.block_sparse_moe.experts.164.w2", "model.layers.57.block_sparse_moe.experts.165.w2", "model.layers.57.block_sparse_moe.experts.166.w2", "model.layers.57.block_sparse_moe.experts.167.w2", "model.layers.57.block_sparse_moe.experts.168.w2", "model.layers.57.block_sparse_moe.experts.169.w2", "model.layers.57.block_sparse_moe.experts.170.w2", "model.layers.57.block_sparse_moe.experts.171.w2", "model.layers.57.block_sparse_moe.experts.172.w2", "model.layers.57.block_sparse_moe.experts.173.w2", "model.layers.57.block_sparse_moe.experts.174.w2", "model.layers.57.block_sparse_moe.experts.175.w2", "model.layers.57.block_sparse_moe.experts.176.w2", "model.layers.57.block_sparse_moe.experts.177.w2", "model.layers.57.block_sparse_moe.experts.178.w2", "model.layers.57.block_sparse_moe.experts.179.w2", "model.layers.57.block_sparse_moe.experts.180.w2", "model.layers.57.block_sparse_moe.experts.181.w2", "model.layers.57.block_sparse_moe.experts.182.w2", "model.layers.57.block_sparse_moe.experts.183.w2", "model.layers.57.block_sparse_moe.experts.184.w2", "model.layers.57.block_sparse_moe.experts.185.w2", "model.layers.57.block_sparse_moe.experts.186.w2", "model.layers.57.block_sparse_moe.experts.187.w2", "model.layers.57.block_sparse_moe.experts.188.w2", "model.layers.57.block_sparse_moe.experts.189.w2", "model.layers.57.block_sparse_moe.experts.190.w2", "model.layers.57.block_sparse_moe.experts.191.w2", "model.layers.57.block_sparse_moe.experts.192.w2", "model.layers.57.block_sparse_moe.experts.193.w2", "model.layers.57.block_sparse_moe.experts.194.w2", "model.layers.57.block_sparse_moe.experts.195.w2", "model.layers.57.block_sparse_moe.experts.196.w2", "model.layers.57.block_sparse_moe.experts.197.w2", "model.layers.57.block_sparse_moe.experts.198.w2", "model.layers.57.block_sparse_moe.experts.199.w2", "model.layers.57.block_sparse_moe.experts.200.w2", "model.layers.57.block_sparse_moe.experts.201.w2", "model.layers.57.block_sparse_moe.experts.202.w2", "model.layers.57.block_sparse_moe.experts.203.w2", "model.layers.57.block_sparse_moe.experts.204.w2", "model.layers.57.block_sparse_moe.experts.205.w2", "model.layers.57.block_sparse_moe.experts.206.w2", "model.layers.57.block_sparse_moe.experts.207.w2", "model.layers.57.block_sparse_moe.experts.208.w2", "model.layers.57.block_sparse_moe.experts.209.w2", "model.layers.57.block_sparse_moe.experts.210.w2", "model.layers.57.block_sparse_moe.experts.211.w2", "model.layers.57.block_sparse_moe.experts.212.w2", "model.layers.57.block_sparse_moe.experts.213.w2", "model.layers.57.block_sparse_moe.experts.214.w2", "model.layers.57.block_sparse_moe.experts.215.w2", "model.layers.57.block_sparse_moe.experts.216.w2", "model.layers.57.block_sparse_moe.experts.217.w2", "model.layers.57.block_sparse_moe.experts.218.w2", "model.layers.57.block_sparse_moe.experts.219.w2", "model.layers.57.block_sparse_moe.experts.220.w2", "model.layers.57.block_sparse_moe.experts.221.w2", "model.layers.57.block_sparse_moe.experts.222.w2", "model.layers.57.block_sparse_moe.experts.223.w2", "model.layers.57.block_sparse_moe.experts.224.w2", "model.layers.57.block_sparse_moe.experts.225.w2", "model.layers.57.block_sparse_moe.experts.226.w2", "model.layers.57.block_sparse_moe.experts.227.w2", "model.layers.57.block_sparse_moe.experts.228.w2", "model.layers.57.block_sparse_moe.experts.229.w2", "model.layers.57.block_sparse_moe.experts.230.w2", "model.layers.57.block_sparse_moe.experts.231.w2", "model.layers.57.block_sparse_moe.experts.232.w2", "model.layers.57.block_sparse_moe.experts.233.w2", "model.layers.57.block_sparse_moe.experts.234.w2", "model.layers.57.block_sparse_moe.experts.235.w2", "model.layers.57.block_sparse_moe.experts.236.w2", "model.layers.57.block_sparse_moe.experts.237.w2", "model.layers.57.block_sparse_moe.experts.238.w2", "model.layers.57.block_sparse_moe.experts.239.w2", "model.layers.57.block_sparse_moe.experts.240.w2", "model.layers.57.block_sparse_moe.experts.241.w2", "model.layers.57.block_sparse_moe.experts.242.w2", "model.layers.57.block_sparse_moe.experts.243.w2", "model.layers.57.block_sparse_moe.experts.244.w2", "model.layers.57.block_sparse_moe.experts.245.w2", "model.layers.57.block_sparse_moe.experts.246.w2", "model.layers.57.block_sparse_moe.experts.247.w2", "model.layers.57.block_sparse_moe.experts.248.w2", "model.layers.57.block_sparse_moe.experts.249.w2", "model.layers.57.block_sparse_moe.experts.250.w2", "model.layers.57.block_sparse_moe.experts.251.w2", "model.layers.57.block_sparse_moe.experts.252.w2", "model.layers.57.block_sparse_moe.experts.253.w2", "model.layers.57.block_sparse_moe.experts.254.w2", "model.layers.57.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00022240765392783635, "dbits": 3623878656 } ] }, { "idx": 116, "layers": [ "model.layers.58.self_attn.q_proj", "model.layers.58.self_attn.k_proj", "model.layers.58.self_attn.v_proj", "model.layers.58.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0009490121155977249, "dbits": 44040192 } ] }, { "idx": 117, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w1", "model.layers.58.block_sparse_moe.experts.1.w1", "model.layers.58.block_sparse_moe.experts.2.w1", "model.layers.58.block_sparse_moe.experts.3.w1", "model.layers.58.block_sparse_moe.experts.4.w1", "model.layers.58.block_sparse_moe.experts.5.w1", "model.layers.58.block_sparse_moe.experts.6.w1", "model.layers.58.block_sparse_moe.experts.7.w1", "model.layers.58.block_sparse_moe.experts.8.w1", "model.layers.58.block_sparse_moe.experts.9.w1", "model.layers.58.block_sparse_moe.experts.10.w1", "model.layers.58.block_sparse_moe.experts.11.w1", "model.layers.58.block_sparse_moe.experts.12.w1", "model.layers.58.block_sparse_moe.experts.13.w1", "model.layers.58.block_sparse_moe.experts.14.w1", "model.layers.58.block_sparse_moe.experts.15.w1", "model.layers.58.block_sparse_moe.experts.16.w1", "model.layers.58.block_sparse_moe.experts.17.w1", "model.layers.58.block_sparse_moe.experts.18.w1", "model.layers.58.block_sparse_moe.experts.19.w1", "model.layers.58.block_sparse_moe.experts.20.w1", "model.layers.58.block_sparse_moe.experts.21.w1", "model.layers.58.block_sparse_moe.experts.22.w1", "model.layers.58.block_sparse_moe.experts.23.w1", "model.layers.58.block_sparse_moe.experts.24.w1", "model.layers.58.block_sparse_moe.experts.25.w1", "model.layers.58.block_sparse_moe.experts.26.w1", "model.layers.58.block_sparse_moe.experts.27.w1", "model.layers.58.block_sparse_moe.experts.28.w1", "model.layers.58.block_sparse_moe.experts.29.w1", "model.layers.58.block_sparse_moe.experts.30.w1", "model.layers.58.block_sparse_moe.experts.31.w1", "model.layers.58.block_sparse_moe.experts.32.w1", "model.layers.58.block_sparse_moe.experts.33.w1", "model.layers.58.block_sparse_moe.experts.34.w1", "model.layers.58.block_sparse_moe.experts.35.w1", "model.layers.58.block_sparse_moe.experts.36.w1", "model.layers.58.block_sparse_moe.experts.37.w1", "model.layers.58.block_sparse_moe.experts.38.w1", "model.layers.58.block_sparse_moe.experts.39.w1", "model.layers.58.block_sparse_moe.experts.40.w1", "model.layers.58.block_sparse_moe.experts.41.w1", "model.layers.58.block_sparse_moe.experts.42.w1", "model.layers.58.block_sparse_moe.experts.43.w1", "model.layers.58.block_sparse_moe.experts.44.w1", "model.layers.58.block_sparse_moe.experts.45.w1", "model.layers.58.block_sparse_moe.experts.46.w1", "model.layers.58.block_sparse_moe.experts.47.w1", "model.layers.58.block_sparse_moe.experts.48.w1", "model.layers.58.block_sparse_moe.experts.49.w1", "model.layers.58.block_sparse_moe.experts.50.w1", "model.layers.58.block_sparse_moe.experts.51.w1", "model.layers.58.block_sparse_moe.experts.52.w1", "model.layers.58.block_sparse_moe.experts.53.w1", "model.layers.58.block_sparse_moe.experts.54.w1", "model.layers.58.block_sparse_moe.experts.55.w1", "model.layers.58.block_sparse_moe.experts.56.w1", "model.layers.58.block_sparse_moe.experts.57.w1", "model.layers.58.block_sparse_moe.experts.58.w1", "model.layers.58.block_sparse_moe.experts.59.w1", "model.layers.58.block_sparse_moe.experts.60.w1", "model.layers.58.block_sparse_moe.experts.61.w1", "model.layers.58.block_sparse_moe.experts.62.w1", "model.layers.58.block_sparse_moe.experts.63.w1", "model.layers.58.block_sparse_moe.experts.64.w1", "model.layers.58.block_sparse_moe.experts.65.w1", "model.layers.58.block_sparse_moe.experts.66.w1", "model.layers.58.block_sparse_moe.experts.67.w1", "model.layers.58.block_sparse_moe.experts.68.w1", "model.layers.58.block_sparse_moe.experts.69.w1", "model.layers.58.block_sparse_moe.experts.70.w1", "model.layers.58.block_sparse_moe.experts.71.w1", "model.layers.58.block_sparse_moe.experts.72.w1", "model.layers.58.block_sparse_moe.experts.73.w1", "model.layers.58.block_sparse_moe.experts.74.w1", "model.layers.58.block_sparse_moe.experts.75.w1", "model.layers.58.block_sparse_moe.experts.76.w1", "model.layers.58.block_sparse_moe.experts.77.w1", "model.layers.58.block_sparse_moe.experts.78.w1", "model.layers.58.block_sparse_moe.experts.79.w1", "model.layers.58.block_sparse_moe.experts.80.w1", "model.layers.58.block_sparse_moe.experts.81.w1", "model.layers.58.block_sparse_moe.experts.82.w1", "model.layers.58.block_sparse_moe.experts.83.w1", "model.layers.58.block_sparse_moe.experts.84.w1", "model.layers.58.block_sparse_moe.experts.85.w1", "model.layers.58.block_sparse_moe.experts.86.w1", "model.layers.58.block_sparse_moe.experts.87.w1", "model.layers.58.block_sparse_moe.experts.88.w1", "model.layers.58.block_sparse_moe.experts.89.w1", "model.layers.58.block_sparse_moe.experts.90.w1", "model.layers.58.block_sparse_moe.experts.91.w1", "model.layers.58.block_sparse_moe.experts.92.w1", "model.layers.58.block_sparse_moe.experts.93.w1", "model.layers.58.block_sparse_moe.experts.94.w1", "model.layers.58.block_sparse_moe.experts.95.w1", "model.layers.58.block_sparse_moe.experts.96.w1", "model.layers.58.block_sparse_moe.experts.97.w1", "model.layers.58.block_sparse_moe.experts.98.w1", "model.layers.58.block_sparse_moe.experts.99.w1", "model.layers.58.block_sparse_moe.experts.100.w1", "model.layers.58.block_sparse_moe.experts.101.w1", "model.layers.58.block_sparse_moe.experts.102.w1", "model.layers.58.block_sparse_moe.experts.103.w1", "model.layers.58.block_sparse_moe.experts.104.w1", "model.layers.58.block_sparse_moe.experts.105.w1", "model.layers.58.block_sparse_moe.experts.106.w1", "model.layers.58.block_sparse_moe.experts.107.w1", "model.layers.58.block_sparse_moe.experts.108.w1", "model.layers.58.block_sparse_moe.experts.109.w1", "model.layers.58.block_sparse_moe.experts.110.w1", "model.layers.58.block_sparse_moe.experts.111.w1", "model.layers.58.block_sparse_moe.experts.112.w1", "model.layers.58.block_sparse_moe.experts.113.w1", "model.layers.58.block_sparse_moe.experts.114.w1", "model.layers.58.block_sparse_moe.experts.115.w1", "model.layers.58.block_sparse_moe.experts.116.w1", "model.layers.58.block_sparse_moe.experts.117.w1", "model.layers.58.block_sparse_moe.experts.118.w1", "model.layers.58.block_sparse_moe.experts.119.w1", "model.layers.58.block_sparse_moe.experts.120.w1", "model.layers.58.block_sparse_moe.experts.121.w1", "model.layers.58.block_sparse_moe.experts.122.w1", "model.layers.58.block_sparse_moe.experts.123.w1", "model.layers.58.block_sparse_moe.experts.124.w1", "model.layers.58.block_sparse_moe.experts.125.w1", "model.layers.58.block_sparse_moe.experts.126.w1", "model.layers.58.block_sparse_moe.experts.127.w1", "model.layers.58.block_sparse_moe.experts.128.w1", "model.layers.58.block_sparse_moe.experts.129.w1", "model.layers.58.block_sparse_moe.experts.130.w1", "model.layers.58.block_sparse_moe.experts.131.w1", "model.layers.58.block_sparse_moe.experts.132.w1", "model.layers.58.block_sparse_moe.experts.133.w1", "model.layers.58.block_sparse_moe.experts.134.w1", "model.layers.58.block_sparse_moe.experts.135.w1", "model.layers.58.block_sparse_moe.experts.136.w1", "model.layers.58.block_sparse_moe.experts.137.w1", "model.layers.58.block_sparse_moe.experts.138.w1", "model.layers.58.block_sparse_moe.experts.139.w1", "model.layers.58.block_sparse_moe.experts.140.w1", "model.layers.58.block_sparse_moe.experts.141.w1", "model.layers.58.block_sparse_moe.experts.142.w1", "model.layers.58.block_sparse_moe.experts.143.w1", "model.layers.58.block_sparse_moe.experts.144.w1", "model.layers.58.block_sparse_moe.experts.145.w1", "model.layers.58.block_sparse_moe.experts.146.w1", "model.layers.58.block_sparse_moe.experts.147.w1", "model.layers.58.block_sparse_moe.experts.148.w1", "model.layers.58.block_sparse_moe.experts.149.w1", "model.layers.58.block_sparse_moe.experts.150.w1", "model.layers.58.block_sparse_moe.experts.151.w1", "model.layers.58.block_sparse_moe.experts.152.w1", "model.layers.58.block_sparse_moe.experts.153.w1", "model.layers.58.block_sparse_moe.experts.154.w1", "model.layers.58.block_sparse_moe.experts.155.w1", "model.layers.58.block_sparse_moe.experts.156.w1", "model.layers.58.block_sparse_moe.experts.157.w1", "model.layers.58.block_sparse_moe.experts.158.w1", "model.layers.58.block_sparse_moe.experts.159.w1", "model.layers.58.block_sparse_moe.experts.160.w1", "model.layers.58.block_sparse_moe.experts.161.w1", "model.layers.58.block_sparse_moe.experts.162.w1", "model.layers.58.block_sparse_moe.experts.163.w1", "model.layers.58.block_sparse_moe.experts.164.w1", "model.layers.58.block_sparse_moe.experts.165.w1", "model.layers.58.block_sparse_moe.experts.166.w1", "model.layers.58.block_sparse_moe.experts.167.w1", "model.layers.58.block_sparse_moe.experts.168.w1", "model.layers.58.block_sparse_moe.experts.169.w1", "model.layers.58.block_sparse_moe.experts.170.w1", "model.layers.58.block_sparse_moe.experts.171.w1", "model.layers.58.block_sparse_moe.experts.172.w1", "model.layers.58.block_sparse_moe.experts.173.w1", "model.layers.58.block_sparse_moe.experts.174.w1", "model.layers.58.block_sparse_moe.experts.175.w1", "model.layers.58.block_sparse_moe.experts.176.w1", "model.layers.58.block_sparse_moe.experts.177.w1", "model.layers.58.block_sparse_moe.experts.178.w1", "model.layers.58.block_sparse_moe.experts.179.w1", "model.layers.58.block_sparse_moe.experts.180.w1", "model.layers.58.block_sparse_moe.experts.181.w1", "model.layers.58.block_sparse_moe.experts.182.w1", "model.layers.58.block_sparse_moe.experts.183.w1", "model.layers.58.block_sparse_moe.experts.184.w1", "model.layers.58.block_sparse_moe.experts.185.w1", "model.layers.58.block_sparse_moe.experts.186.w1", "model.layers.58.block_sparse_moe.experts.187.w1", "model.layers.58.block_sparse_moe.experts.188.w1", "model.layers.58.block_sparse_moe.experts.189.w1", "model.layers.58.block_sparse_moe.experts.190.w1", "model.layers.58.block_sparse_moe.experts.191.w1", "model.layers.58.block_sparse_moe.experts.192.w1", "model.layers.58.block_sparse_moe.experts.193.w1", "model.layers.58.block_sparse_moe.experts.194.w1", "model.layers.58.block_sparse_moe.experts.195.w1", "model.layers.58.block_sparse_moe.experts.196.w1", "model.layers.58.block_sparse_moe.experts.197.w1", "model.layers.58.block_sparse_moe.experts.198.w1", "model.layers.58.block_sparse_moe.experts.199.w1", "model.layers.58.block_sparse_moe.experts.200.w1", "model.layers.58.block_sparse_moe.experts.201.w1", "model.layers.58.block_sparse_moe.experts.202.w1", "model.layers.58.block_sparse_moe.experts.203.w1", "model.layers.58.block_sparse_moe.experts.204.w1", "model.layers.58.block_sparse_moe.experts.205.w1", "model.layers.58.block_sparse_moe.experts.206.w1", "model.layers.58.block_sparse_moe.experts.207.w1", "model.layers.58.block_sparse_moe.experts.208.w1", "model.layers.58.block_sparse_moe.experts.209.w1", "model.layers.58.block_sparse_moe.experts.210.w1", "model.layers.58.block_sparse_moe.experts.211.w1", "model.layers.58.block_sparse_moe.experts.212.w1", "model.layers.58.block_sparse_moe.experts.213.w1", "model.layers.58.block_sparse_moe.experts.214.w1", "model.layers.58.block_sparse_moe.experts.215.w1", "model.layers.58.block_sparse_moe.experts.216.w1", "model.layers.58.block_sparse_moe.experts.217.w1", "model.layers.58.block_sparse_moe.experts.218.w1", "model.layers.58.block_sparse_moe.experts.219.w1", "model.layers.58.block_sparse_moe.experts.220.w1", "model.layers.58.block_sparse_moe.experts.221.w1", "model.layers.58.block_sparse_moe.experts.222.w1", "model.layers.58.block_sparse_moe.experts.223.w1", "model.layers.58.block_sparse_moe.experts.224.w1", "model.layers.58.block_sparse_moe.experts.225.w1", "model.layers.58.block_sparse_moe.experts.226.w1", "model.layers.58.block_sparse_moe.experts.227.w1", "model.layers.58.block_sparse_moe.experts.228.w1", "model.layers.58.block_sparse_moe.experts.229.w1", "model.layers.58.block_sparse_moe.experts.230.w1", "model.layers.58.block_sparse_moe.experts.231.w1", "model.layers.58.block_sparse_moe.experts.232.w1", "model.layers.58.block_sparse_moe.experts.233.w1", "model.layers.58.block_sparse_moe.experts.234.w1", "model.layers.58.block_sparse_moe.experts.235.w1", "model.layers.58.block_sparse_moe.experts.236.w1", "model.layers.58.block_sparse_moe.experts.237.w1", "model.layers.58.block_sparse_moe.experts.238.w1", "model.layers.58.block_sparse_moe.experts.239.w1", "model.layers.58.block_sparse_moe.experts.240.w1", "model.layers.58.block_sparse_moe.experts.241.w1", "model.layers.58.block_sparse_moe.experts.242.w1", "model.layers.58.block_sparse_moe.experts.243.w1", "model.layers.58.block_sparse_moe.experts.244.w1", "model.layers.58.block_sparse_moe.experts.245.w1", "model.layers.58.block_sparse_moe.experts.246.w1", "model.layers.58.block_sparse_moe.experts.247.w1", "model.layers.58.block_sparse_moe.experts.248.w1", "model.layers.58.block_sparse_moe.experts.249.w1", "model.layers.58.block_sparse_moe.experts.250.w1", "model.layers.58.block_sparse_moe.experts.251.w1", "model.layers.58.block_sparse_moe.experts.252.w1", "model.layers.58.block_sparse_moe.experts.253.w1", "model.layers.58.block_sparse_moe.experts.254.w1", "model.layers.58.block_sparse_moe.experts.255.w1", "model.layers.58.block_sparse_moe.experts.0.w3", "model.layers.58.block_sparse_moe.experts.1.w3", "model.layers.58.block_sparse_moe.experts.2.w3", "model.layers.58.block_sparse_moe.experts.3.w3", "model.layers.58.block_sparse_moe.experts.4.w3", "model.layers.58.block_sparse_moe.experts.5.w3", "model.layers.58.block_sparse_moe.experts.6.w3", "model.layers.58.block_sparse_moe.experts.7.w3", "model.layers.58.block_sparse_moe.experts.8.w3", "model.layers.58.block_sparse_moe.experts.9.w3", "model.layers.58.block_sparse_moe.experts.10.w3", "model.layers.58.block_sparse_moe.experts.11.w3", "model.layers.58.block_sparse_moe.experts.12.w3", "model.layers.58.block_sparse_moe.experts.13.w3", "model.layers.58.block_sparse_moe.experts.14.w3", "model.layers.58.block_sparse_moe.experts.15.w3", "model.layers.58.block_sparse_moe.experts.16.w3", "model.layers.58.block_sparse_moe.experts.17.w3", "model.layers.58.block_sparse_moe.experts.18.w3", "model.layers.58.block_sparse_moe.experts.19.w3", "model.layers.58.block_sparse_moe.experts.20.w3", "model.layers.58.block_sparse_moe.experts.21.w3", "model.layers.58.block_sparse_moe.experts.22.w3", "model.layers.58.block_sparse_moe.experts.23.w3", "model.layers.58.block_sparse_moe.experts.24.w3", "model.layers.58.block_sparse_moe.experts.25.w3", "model.layers.58.block_sparse_moe.experts.26.w3", "model.layers.58.block_sparse_moe.experts.27.w3", "model.layers.58.block_sparse_moe.experts.28.w3", "model.layers.58.block_sparse_moe.experts.29.w3", "model.layers.58.block_sparse_moe.experts.30.w3", "model.layers.58.block_sparse_moe.experts.31.w3", "model.layers.58.block_sparse_moe.experts.32.w3", "model.layers.58.block_sparse_moe.experts.33.w3", "model.layers.58.block_sparse_moe.experts.34.w3", "model.layers.58.block_sparse_moe.experts.35.w3", "model.layers.58.block_sparse_moe.experts.36.w3", "model.layers.58.block_sparse_moe.experts.37.w3", "model.layers.58.block_sparse_moe.experts.38.w3", "model.layers.58.block_sparse_moe.experts.39.w3", "model.layers.58.block_sparse_moe.experts.40.w3", "model.layers.58.block_sparse_moe.experts.41.w3", "model.layers.58.block_sparse_moe.experts.42.w3", "model.layers.58.block_sparse_moe.experts.43.w3", "model.layers.58.block_sparse_moe.experts.44.w3", "model.layers.58.block_sparse_moe.experts.45.w3", "model.layers.58.block_sparse_moe.experts.46.w3", "model.layers.58.block_sparse_moe.experts.47.w3", "model.layers.58.block_sparse_moe.experts.48.w3", "model.layers.58.block_sparse_moe.experts.49.w3", "model.layers.58.block_sparse_moe.experts.50.w3", "model.layers.58.block_sparse_moe.experts.51.w3", "model.layers.58.block_sparse_moe.experts.52.w3", "model.layers.58.block_sparse_moe.experts.53.w3", "model.layers.58.block_sparse_moe.experts.54.w3", "model.layers.58.block_sparse_moe.experts.55.w3", "model.layers.58.block_sparse_moe.experts.56.w3", "model.layers.58.block_sparse_moe.experts.57.w3", "model.layers.58.block_sparse_moe.experts.58.w3", "model.layers.58.block_sparse_moe.experts.59.w3", "model.layers.58.block_sparse_moe.experts.60.w3", "model.layers.58.block_sparse_moe.experts.61.w3", "model.layers.58.block_sparse_moe.experts.62.w3", "model.layers.58.block_sparse_moe.experts.63.w3", "model.layers.58.block_sparse_moe.experts.64.w3", "model.layers.58.block_sparse_moe.experts.65.w3", "model.layers.58.block_sparse_moe.experts.66.w3", "model.layers.58.block_sparse_moe.experts.67.w3", "model.layers.58.block_sparse_moe.experts.68.w3", "model.layers.58.block_sparse_moe.experts.69.w3", "model.layers.58.block_sparse_moe.experts.70.w3", "model.layers.58.block_sparse_moe.experts.71.w3", "model.layers.58.block_sparse_moe.experts.72.w3", "model.layers.58.block_sparse_moe.experts.73.w3", "model.layers.58.block_sparse_moe.experts.74.w3", "model.layers.58.block_sparse_moe.experts.75.w3", "model.layers.58.block_sparse_moe.experts.76.w3", "model.layers.58.block_sparse_moe.experts.77.w3", "model.layers.58.block_sparse_moe.experts.78.w3", "model.layers.58.block_sparse_moe.experts.79.w3", "model.layers.58.block_sparse_moe.experts.80.w3", "model.layers.58.block_sparse_moe.experts.81.w3", "model.layers.58.block_sparse_moe.experts.82.w3", "model.layers.58.block_sparse_moe.experts.83.w3", "model.layers.58.block_sparse_moe.experts.84.w3", "model.layers.58.block_sparse_moe.experts.85.w3", "model.layers.58.block_sparse_moe.experts.86.w3", "model.layers.58.block_sparse_moe.experts.87.w3", "model.layers.58.block_sparse_moe.experts.88.w3", "model.layers.58.block_sparse_moe.experts.89.w3", "model.layers.58.block_sparse_moe.experts.90.w3", "model.layers.58.block_sparse_moe.experts.91.w3", "model.layers.58.block_sparse_moe.experts.92.w3", "model.layers.58.block_sparse_moe.experts.93.w3", "model.layers.58.block_sparse_moe.experts.94.w3", "model.layers.58.block_sparse_moe.experts.95.w3", "model.layers.58.block_sparse_moe.experts.96.w3", "model.layers.58.block_sparse_moe.experts.97.w3", "model.layers.58.block_sparse_moe.experts.98.w3", "model.layers.58.block_sparse_moe.experts.99.w3", "model.layers.58.block_sparse_moe.experts.100.w3", "model.layers.58.block_sparse_moe.experts.101.w3", "model.layers.58.block_sparse_moe.experts.102.w3", "model.layers.58.block_sparse_moe.experts.103.w3", "model.layers.58.block_sparse_moe.experts.104.w3", "model.layers.58.block_sparse_moe.experts.105.w3", "model.layers.58.block_sparse_moe.experts.106.w3", "model.layers.58.block_sparse_moe.experts.107.w3", "model.layers.58.block_sparse_moe.experts.108.w3", "model.layers.58.block_sparse_moe.experts.109.w3", "model.layers.58.block_sparse_moe.experts.110.w3", "model.layers.58.block_sparse_moe.experts.111.w3", "model.layers.58.block_sparse_moe.experts.112.w3", "model.layers.58.block_sparse_moe.experts.113.w3", "model.layers.58.block_sparse_moe.experts.114.w3", "model.layers.58.block_sparse_moe.experts.115.w3", "model.layers.58.block_sparse_moe.experts.116.w3", "model.layers.58.block_sparse_moe.experts.117.w3", "model.layers.58.block_sparse_moe.experts.118.w3", "model.layers.58.block_sparse_moe.experts.119.w3", "model.layers.58.block_sparse_moe.experts.120.w3", "model.layers.58.block_sparse_moe.experts.121.w3", "model.layers.58.block_sparse_moe.experts.122.w3", "model.layers.58.block_sparse_moe.experts.123.w3", "model.layers.58.block_sparse_moe.experts.124.w3", "model.layers.58.block_sparse_moe.experts.125.w3", "model.layers.58.block_sparse_moe.experts.126.w3", "model.layers.58.block_sparse_moe.experts.127.w3", "model.layers.58.block_sparse_moe.experts.128.w3", "model.layers.58.block_sparse_moe.experts.129.w3", "model.layers.58.block_sparse_moe.experts.130.w3", "model.layers.58.block_sparse_moe.experts.131.w3", "model.layers.58.block_sparse_moe.experts.132.w3", "model.layers.58.block_sparse_moe.experts.133.w3", "model.layers.58.block_sparse_moe.experts.134.w3", "model.layers.58.block_sparse_moe.experts.135.w3", "model.layers.58.block_sparse_moe.experts.136.w3", "model.layers.58.block_sparse_moe.experts.137.w3", "model.layers.58.block_sparse_moe.experts.138.w3", "model.layers.58.block_sparse_moe.experts.139.w3", "model.layers.58.block_sparse_moe.experts.140.w3", "model.layers.58.block_sparse_moe.experts.141.w3", "model.layers.58.block_sparse_moe.experts.142.w3", "model.layers.58.block_sparse_moe.experts.143.w3", "model.layers.58.block_sparse_moe.experts.144.w3", "model.layers.58.block_sparse_moe.experts.145.w3", "model.layers.58.block_sparse_moe.experts.146.w3", "model.layers.58.block_sparse_moe.experts.147.w3", "model.layers.58.block_sparse_moe.experts.148.w3", "model.layers.58.block_sparse_moe.experts.149.w3", "model.layers.58.block_sparse_moe.experts.150.w3", "model.layers.58.block_sparse_moe.experts.151.w3", "model.layers.58.block_sparse_moe.experts.152.w3", "model.layers.58.block_sparse_moe.experts.153.w3", "model.layers.58.block_sparse_moe.experts.154.w3", "model.layers.58.block_sparse_moe.experts.155.w3", "model.layers.58.block_sparse_moe.experts.156.w3", "model.layers.58.block_sparse_moe.experts.157.w3", "model.layers.58.block_sparse_moe.experts.158.w3", "model.layers.58.block_sparse_moe.experts.159.w3", "model.layers.58.block_sparse_moe.experts.160.w3", "model.layers.58.block_sparse_moe.experts.161.w3", "model.layers.58.block_sparse_moe.experts.162.w3", "model.layers.58.block_sparse_moe.experts.163.w3", "model.layers.58.block_sparse_moe.experts.164.w3", "model.layers.58.block_sparse_moe.experts.165.w3", "model.layers.58.block_sparse_moe.experts.166.w3", "model.layers.58.block_sparse_moe.experts.167.w3", "model.layers.58.block_sparse_moe.experts.168.w3", "model.layers.58.block_sparse_moe.experts.169.w3", "model.layers.58.block_sparse_moe.experts.170.w3", "model.layers.58.block_sparse_moe.experts.171.w3", "model.layers.58.block_sparse_moe.experts.172.w3", "model.layers.58.block_sparse_moe.experts.173.w3", "model.layers.58.block_sparse_moe.experts.174.w3", "model.layers.58.block_sparse_moe.experts.175.w3", "model.layers.58.block_sparse_moe.experts.176.w3", "model.layers.58.block_sparse_moe.experts.177.w3", "model.layers.58.block_sparse_moe.experts.178.w3", "model.layers.58.block_sparse_moe.experts.179.w3", "model.layers.58.block_sparse_moe.experts.180.w3", "model.layers.58.block_sparse_moe.experts.181.w3", "model.layers.58.block_sparse_moe.experts.182.w3", "model.layers.58.block_sparse_moe.experts.183.w3", "model.layers.58.block_sparse_moe.experts.184.w3", "model.layers.58.block_sparse_moe.experts.185.w3", "model.layers.58.block_sparse_moe.experts.186.w3", "model.layers.58.block_sparse_moe.experts.187.w3", "model.layers.58.block_sparse_moe.experts.188.w3", "model.layers.58.block_sparse_moe.experts.189.w3", "model.layers.58.block_sparse_moe.experts.190.w3", "model.layers.58.block_sparse_moe.experts.191.w3", "model.layers.58.block_sparse_moe.experts.192.w3", "model.layers.58.block_sparse_moe.experts.193.w3", "model.layers.58.block_sparse_moe.experts.194.w3", "model.layers.58.block_sparse_moe.experts.195.w3", "model.layers.58.block_sparse_moe.experts.196.w3", "model.layers.58.block_sparse_moe.experts.197.w3", "model.layers.58.block_sparse_moe.experts.198.w3", "model.layers.58.block_sparse_moe.experts.199.w3", "model.layers.58.block_sparse_moe.experts.200.w3", "model.layers.58.block_sparse_moe.experts.201.w3", "model.layers.58.block_sparse_moe.experts.202.w3", "model.layers.58.block_sparse_moe.experts.203.w3", "model.layers.58.block_sparse_moe.experts.204.w3", "model.layers.58.block_sparse_moe.experts.205.w3", "model.layers.58.block_sparse_moe.experts.206.w3", "model.layers.58.block_sparse_moe.experts.207.w3", "model.layers.58.block_sparse_moe.experts.208.w3", "model.layers.58.block_sparse_moe.experts.209.w3", "model.layers.58.block_sparse_moe.experts.210.w3", "model.layers.58.block_sparse_moe.experts.211.w3", "model.layers.58.block_sparse_moe.experts.212.w3", "model.layers.58.block_sparse_moe.experts.213.w3", "model.layers.58.block_sparse_moe.experts.214.w3", "model.layers.58.block_sparse_moe.experts.215.w3", "model.layers.58.block_sparse_moe.experts.216.w3", "model.layers.58.block_sparse_moe.experts.217.w3", "model.layers.58.block_sparse_moe.experts.218.w3", "model.layers.58.block_sparse_moe.experts.219.w3", "model.layers.58.block_sparse_moe.experts.220.w3", "model.layers.58.block_sparse_moe.experts.221.w3", "model.layers.58.block_sparse_moe.experts.222.w3", "model.layers.58.block_sparse_moe.experts.223.w3", "model.layers.58.block_sparse_moe.experts.224.w3", "model.layers.58.block_sparse_moe.experts.225.w3", "model.layers.58.block_sparse_moe.experts.226.w3", "model.layers.58.block_sparse_moe.experts.227.w3", "model.layers.58.block_sparse_moe.experts.228.w3", "model.layers.58.block_sparse_moe.experts.229.w3", "model.layers.58.block_sparse_moe.experts.230.w3", "model.layers.58.block_sparse_moe.experts.231.w3", "model.layers.58.block_sparse_moe.experts.232.w3", "model.layers.58.block_sparse_moe.experts.233.w3", "model.layers.58.block_sparse_moe.experts.234.w3", "model.layers.58.block_sparse_moe.experts.235.w3", "model.layers.58.block_sparse_moe.experts.236.w3", "model.layers.58.block_sparse_moe.experts.237.w3", "model.layers.58.block_sparse_moe.experts.238.w3", "model.layers.58.block_sparse_moe.experts.239.w3", "model.layers.58.block_sparse_moe.experts.240.w3", "model.layers.58.block_sparse_moe.experts.241.w3", "model.layers.58.block_sparse_moe.experts.242.w3", "model.layers.58.block_sparse_moe.experts.243.w3", "model.layers.58.block_sparse_moe.experts.244.w3", "model.layers.58.block_sparse_moe.experts.245.w3", "model.layers.58.block_sparse_moe.experts.246.w3", "model.layers.58.block_sparse_moe.experts.247.w3", "model.layers.58.block_sparse_moe.experts.248.w3", "model.layers.58.block_sparse_moe.experts.249.w3", "model.layers.58.block_sparse_moe.experts.250.w3", "model.layers.58.block_sparse_moe.experts.251.w3", "model.layers.58.block_sparse_moe.experts.252.w3", "model.layers.58.block_sparse_moe.experts.253.w3", "model.layers.58.block_sparse_moe.experts.254.w3", "model.layers.58.block_sparse_moe.experts.255.w3", "model.layers.58.block_sparse_moe.experts.0.w2", "model.layers.58.block_sparse_moe.experts.1.w2", "model.layers.58.block_sparse_moe.experts.2.w2", "model.layers.58.block_sparse_moe.experts.3.w2", "model.layers.58.block_sparse_moe.experts.4.w2", "model.layers.58.block_sparse_moe.experts.5.w2", "model.layers.58.block_sparse_moe.experts.6.w2", "model.layers.58.block_sparse_moe.experts.7.w2", "model.layers.58.block_sparse_moe.experts.8.w2", "model.layers.58.block_sparse_moe.experts.9.w2", "model.layers.58.block_sparse_moe.experts.10.w2", "model.layers.58.block_sparse_moe.experts.11.w2", "model.layers.58.block_sparse_moe.experts.12.w2", "model.layers.58.block_sparse_moe.experts.13.w2", "model.layers.58.block_sparse_moe.experts.14.w2", "model.layers.58.block_sparse_moe.experts.15.w2", "model.layers.58.block_sparse_moe.experts.16.w2", "model.layers.58.block_sparse_moe.experts.17.w2", "model.layers.58.block_sparse_moe.experts.18.w2", "model.layers.58.block_sparse_moe.experts.19.w2", "model.layers.58.block_sparse_moe.experts.20.w2", "model.layers.58.block_sparse_moe.experts.21.w2", "model.layers.58.block_sparse_moe.experts.22.w2", "model.layers.58.block_sparse_moe.experts.23.w2", "model.layers.58.block_sparse_moe.experts.24.w2", "model.layers.58.block_sparse_moe.experts.25.w2", "model.layers.58.block_sparse_moe.experts.26.w2", "model.layers.58.block_sparse_moe.experts.27.w2", "model.layers.58.block_sparse_moe.experts.28.w2", "model.layers.58.block_sparse_moe.experts.29.w2", "model.layers.58.block_sparse_moe.experts.30.w2", "model.layers.58.block_sparse_moe.experts.31.w2", "model.layers.58.block_sparse_moe.experts.32.w2", "model.layers.58.block_sparse_moe.experts.33.w2", "model.layers.58.block_sparse_moe.experts.34.w2", "model.layers.58.block_sparse_moe.experts.35.w2", "model.layers.58.block_sparse_moe.experts.36.w2", "model.layers.58.block_sparse_moe.experts.37.w2", "model.layers.58.block_sparse_moe.experts.38.w2", "model.layers.58.block_sparse_moe.experts.39.w2", "model.layers.58.block_sparse_moe.experts.40.w2", "model.layers.58.block_sparse_moe.experts.41.w2", "model.layers.58.block_sparse_moe.experts.42.w2", "model.layers.58.block_sparse_moe.experts.43.w2", "model.layers.58.block_sparse_moe.experts.44.w2", "model.layers.58.block_sparse_moe.experts.45.w2", "model.layers.58.block_sparse_moe.experts.46.w2", "model.layers.58.block_sparse_moe.experts.47.w2", "model.layers.58.block_sparse_moe.experts.48.w2", "model.layers.58.block_sparse_moe.experts.49.w2", "model.layers.58.block_sparse_moe.experts.50.w2", "model.layers.58.block_sparse_moe.experts.51.w2", "model.layers.58.block_sparse_moe.experts.52.w2", "model.layers.58.block_sparse_moe.experts.53.w2", "model.layers.58.block_sparse_moe.experts.54.w2", "model.layers.58.block_sparse_moe.experts.55.w2", "model.layers.58.block_sparse_moe.experts.56.w2", "model.layers.58.block_sparse_moe.experts.57.w2", "model.layers.58.block_sparse_moe.experts.58.w2", "model.layers.58.block_sparse_moe.experts.59.w2", "model.layers.58.block_sparse_moe.experts.60.w2", "model.layers.58.block_sparse_moe.experts.61.w2", "model.layers.58.block_sparse_moe.experts.62.w2", "model.layers.58.block_sparse_moe.experts.63.w2", "model.layers.58.block_sparse_moe.experts.64.w2", "model.layers.58.block_sparse_moe.experts.65.w2", "model.layers.58.block_sparse_moe.experts.66.w2", "model.layers.58.block_sparse_moe.experts.67.w2", "model.layers.58.block_sparse_moe.experts.68.w2", "model.layers.58.block_sparse_moe.experts.69.w2", "model.layers.58.block_sparse_moe.experts.70.w2", "model.layers.58.block_sparse_moe.experts.71.w2", "model.layers.58.block_sparse_moe.experts.72.w2", "model.layers.58.block_sparse_moe.experts.73.w2", "model.layers.58.block_sparse_moe.experts.74.w2", "model.layers.58.block_sparse_moe.experts.75.w2", "model.layers.58.block_sparse_moe.experts.76.w2", "model.layers.58.block_sparse_moe.experts.77.w2", "model.layers.58.block_sparse_moe.experts.78.w2", "model.layers.58.block_sparse_moe.experts.79.w2", "model.layers.58.block_sparse_moe.experts.80.w2", "model.layers.58.block_sparse_moe.experts.81.w2", "model.layers.58.block_sparse_moe.experts.82.w2", "model.layers.58.block_sparse_moe.experts.83.w2", "model.layers.58.block_sparse_moe.experts.84.w2", "model.layers.58.block_sparse_moe.experts.85.w2", "model.layers.58.block_sparse_moe.experts.86.w2", "model.layers.58.block_sparse_moe.experts.87.w2", "model.layers.58.block_sparse_moe.experts.88.w2", "model.layers.58.block_sparse_moe.experts.89.w2", "model.layers.58.block_sparse_moe.experts.90.w2", "model.layers.58.block_sparse_moe.experts.91.w2", "model.layers.58.block_sparse_moe.experts.92.w2", "model.layers.58.block_sparse_moe.experts.93.w2", "model.layers.58.block_sparse_moe.experts.94.w2", "model.layers.58.block_sparse_moe.experts.95.w2", "model.layers.58.block_sparse_moe.experts.96.w2", "model.layers.58.block_sparse_moe.experts.97.w2", "model.layers.58.block_sparse_moe.experts.98.w2", "model.layers.58.block_sparse_moe.experts.99.w2", "model.layers.58.block_sparse_moe.experts.100.w2", "model.layers.58.block_sparse_moe.experts.101.w2", "model.layers.58.block_sparse_moe.experts.102.w2", "model.layers.58.block_sparse_moe.experts.103.w2", "model.layers.58.block_sparse_moe.experts.104.w2", "model.layers.58.block_sparse_moe.experts.105.w2", "model.layers.58.block_sparse_moe.experts.106.w2", "model.layers.58.block_sparse_moe.experts.107.w2", "model.layers.58.block_sparse_moe.experts.108.w2", "model.layers.58.block_sparse_moe.experts.109.w2", "model.layers.58.block_sparse_moe.experts.110.w2", "model.layers.58.block_sparse_moe.experts.111.w2", "model.layers.58.block_sparse_moe.experts.112.w2", "model.layers.58.block_sparse_moe.experts.113.w2", "model.layers.58.block_sparse_moe.experts.114.w2", "model.layers.58.block_sparse_moe.experts.115.w2", "model.layers.58.block_sparse_moe.experts.116.w2", "model.layers.58.block_sparse_moe.experts.117.w2", "model.layers.58.block_sparse_moe.experts.118.w2", "model.layers.58.block_sparse_moe.experts.119.w2", "model.layers.58.block_sparse_moe.experts.120.w2", "model.layers.58.block_sparse_moe.experts.121.w2", "model.layers.58.block_sparse_moe.experts.122.w2", "model.layers.58.block_sparse_moe.experts.123.w2", "model.layers.58.block_sparse_moe.experts.124.w2", "model.layers.58.block_sparse_moe.experts.125.w2", "model.layers.58.block_sparse_moe.experts.126.w2", "model.layers.58.block_sparse_moe.experts.127.w2", "model.layers.58.block_sparse_moe.experts.128.w2", "model.layers.58.block_sparse_moe.experts.129.w2", "model.layers.58.block_sparse_moe.experts.130.w2", "model.layers.58.block_sparse_moe.experts.131.w2", "model.layers.58.block_sparse_moe.experts.132.w2", "model.layers.58.block_sparse_moe.experts.133.w2", "model.layers.58.block_sparse_moe.experts.134.w2", "model.layers.58.block_sparse_moe.experts.135.w2", "model.layers.58.block_sparse_moe.experts.136.w2", "model.layers.58.block_sparse_moe.experts.137.w2", "model.layers.58.block_sparse_moe.experts.138.w2", "model.layers.58.block_sparse_moe.experts.139.w2", "model.layers.58.block_sparse_moe.experts.140.w2", "model.layers.58.block_sparse_moe.experts.141.w2", "model.layers.58.block_sparse_moe.experts.142.w2", "model.layers.58.block_sparse_moe.experts.143.w2", "model.layers.58.block_sparse_moe.experts.144.w2", "model.layers.58.block_sparse_moe.experts.145.w2", "model.layers.58.block_sparse_moe.experts.146.w2", "model.layers.58.block_sparse_moe.experts.147.w2", "model.layers.58.block_sparse_moe.experts.148.w2", "model.layers.58.block_sparse_moe.experts.149.w2", "model.layers.58.block_sparse_moe.experts.150.w2", "model.layers.58.block_sparse_moe.experts.151.w2", "model.layers.58.block_sparse_moe.experts.152.w2", "model.layers.58.block_sparse_moe.experts.153.w2", "model.layers.58.block_sparse_moe.experts.154.w2", "model.layers.58.block_sparse_moe.experts.155.w2", "model.layers.58.block_sparse_moe.experts.156.w2", "model.layers.58.block_sparse_moe.experts.157.w2", "model.layers.58.block_sparse_moe.experts.158.w2", "model.layers.58.block_sparse_moe.experts.159.w2", "model.layers.58.block_sparse_moe.experts.160.w2", "model.layers.58.block_sparse_moe.experts.161.w2", "model.layers.58.block_sparse_moe.experts.162.w2", "model.layers.58.block_sparse_moe.experts.163.w2", "model.layers.58.block_sparse_moe.experts.164.w2", "model.layers.58.block_sparse_moe.experts.165.w2", "model.layers.58.block_sparse_moe.experts.166.w2", "model.layers.58.block_sparse_moe.experts.167.w2", "model.layers.58.block_sparse_moe.experts.168.w2", "model.layers.58.block_sparse_moe.experts.169.w2", "model.layers.58.block_sparse_moe.experts.170.w2", "model.layers.58.block_sparse_moe.experts.171.w2", "model.layers.58.block_sparse_moe.experts.172.w2", "model.layers.58.block_sparse_moe.experts.173.w2", "model.layers.58.block_sparse_moe.experts.174.w2", "model.layers.58.block_sparse_moe.experts.175.w2", "model.layers.58.block_sparse_moe.experts.176.w2", "model.layers.58.block_sparse_moe.experts.177.w2", "model.layers.58.block_sparse_moe.experts.178.w2", "model.layers.58.block_sparse_moe.experts.179.w2", "model.layers.58.block_sparse_moe.experts.180.w2", "model.layers.58.block_sparse_moe.experts.181.w2", "model.layers.58.block_sparse_moe.experts.182.w2", "model.layers.58.block_sparse_moe.experts.183.w2", "model.layers.58.block_sparse_moe.experts.184.w2", "model.layers.58.block_sparse_moe.experts.185.w2", "model.layers.58.block_sparse_moe.experts.186.w2", "model.layers.58.block_sparse_moe.experts.187.w2", "model.layers.58.block_sparse_moe.experts.188.w2", "model.layers.58.block_sparse_moe.experts.189.w2", "model.layers.58.block_sparse_moe.experts.190.w2", "model.layers.58.block_sparse_moe.experts.191.w2", "model.layers.58.block_sparse_moe.experts.192.w2", "model.layers.58.block_sparse_moe.experts.193.w2", "model.layers.58.block_sparse_moe.experts.194.w2", "model.layers.58.block_sparse_moe.experts.195.w2", "model.layers.58.block_sparse_moe.experts.196.w2", "model.layers.58.block_sparse_moe.experts.197.w2", "model.layers.58.block_sparse_moe.experts.198.w2", "model.layers.58.block_sparse_moe.experts.199.w2", "model.layers.58.block_sparse_moe.experts.200.w2", "model.layers.58.block_sparse_moe.experts.201.w2", "model.layers.58.block_sparse_moe.experts.202.w2", "model.layers.58.block_sparse_moe.experts.203.w2", "model.layers.58.block_sparse_moe.experts.204.w2", "model.layers.58.block_sparse_moe.experts.205.w2", "model.layers.58.block_sparse_moe.experts.206.w2", "model.layers.58.block_sparse_moe.experts.207.w2", "model.layers.58.block_sparse_moe.experts.208.w2", "model.layers.58.block_sparse_moe.experts.209.w2", "model.layers.58.block_sparse_moe.experts.210.w2", "model.layers.58.block_sparse_moe.experts.211.w2", "model.layers.58.block_sparse_moe.experts.212.w2", "model.layers.58.block_sparse_moe.experts.213.w2", "model.layers.58.block_sparse_moe.experts.214.w2", "model.layers.58.block_sparse_moe.experts.215.w2", "model.layers.58.block_sparse_moe.experts.216.w2", "model.layers.58.block_sparse_moe.experts.217.w2", "model.layers.58.block_sparse_moe.experts.218.w2", "model.layers.58.block_sparse_moe.experts.219.w2", "model.layers.58.block_sparse_moe.experts.220.w2", "model.layers.58.block_sparse_moe.experts.221.w2", "model.layers.58.block_sparse_moe.experts.222.w2", "model.layers.58.block_sparse_moe.experts.223.w2", "model.layers.58.block_sparse_moe.experts.224.w2", "model.layers.58.block_sparse_moe.experts.225.w2", "model.layers.58.block_sparse_moe.experts.226.w2", "model.layers.58.block_sparse_moe.experts.227.w2", "model.layers.58.block_sparse_moe.experts.228.w2", "model.layers.58.block_sparse_moe.experts.229.w2", "model.layers.58.block_sparse_moe.experts.230.w2", "model.layers.58.block_sparse_moe.experts.231.w2", "model.layers.58.block_sparse_moe.experts.232.w2", "model.layers.58.block_sparse_moe.experts.233.w2", "model.layers.58.block_sparse_moe.experts.234.w2", "model.layers.58.block_sparse_moe.experts.235.w2", "model.layers.58.block_sparse_moe.experts.236.w2", "model.layers.58.block_sparse_moe.experts.237.w2", "model.layers.58.block_sparse_moe.experts.238.w2", "model.layers.58.block_sparse_moe.experts.239.w2", "model.layers.58.block_sparse_moe.experts.240.w2", "model.layers.58.block_sparse_moe.experts.241.w2", "model.layers.58.block_sparse_moe.experts.242.w2", "model.layers.58.block_sparse_moe.experts.243.w2", "model.layers.58.block_sparse_moe.experts.244.w2", "model.layers.58.block_sparse_moe.experts.245.w2", "model.layers.58.block_sparse_moe.experts.246.w2", "model.layers.58.block_sparse_moe.experts.247.w2", "model.layers.58.block_sparse_moe.experts.248.w2", "model.layers.58.block_sparse_moe.experts.249.w2", "model.layers.58.block_sparse_moe.experts.250.w2", "model.layers.58.block_sparse_moe.experts.251.w2", "model.layers.58.block_sparse_moe.experts.252.w2", "model.layers.58.block_sparse_moe.experts.253.w2", "model.layers.58.block_sparse_moe.experts.254.w2", "model.layers.58.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00022552162408828735, "dbits": 3623878656 } ] }, { "idx": 118, "layers": [ "model.layers.59.self_attn.q_proj", "model.layers.59.self_attn.k_proj", "model.layers.59.self_attn.v_proj", "model.layers.59.self_attn.o_proj" ], "candidates": [ { "dkld": -0.001330323889851548, "dbits": 44040192 } ] }, { "idx": 119, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w1", "model.layers.59.block_sparse_moe.experts.1.w1", "model.layers.59.block_sparse_moe.experts.2.w1", "model.layers.59.block_sparse_moe.experts.3.w1", "model.layers.59.block_sparse_moe.experts.4.w1", "model.layers.59.block_sparse_moe.experts.5.w1", "model.layers.59.block_sparse_moe.experts.6.w1", "model.layers.59.block_sparse_moe.experts.7.w1", "model.layers.59.block_sparse_moe.experts.8.w1", "model.layers.59.block_sparse_moe.experts.9.w1", "model.layers.59.block_sparse_moe.experts.10.w1", "model.layers.59.block_sparse_moe.experts.11.w1", "model.layers.59.block_sparse_moe.experts.12.w1", "model.layers.59.block_sparse_moe.experts.13.w1", "model.layers.59.block_sparse_moe.experts.14.w1", "model.layers.59.block_sparse_moe.experts.15.w1", "model.layers.59.block_sparse_moe.experts.16.w1", "model.layers.59.block_sparse_moe.experts.17.w1", "model.layers.59.block_sparse_moe.experts.18.w1", "model.layers.59.block_sparse_moe.experts.19.w1", "model.layers.59.block_sparse_moe.experts.20.w1", "model.layers.59.block_sparse_moe.experts.21.w1", "model.layers.59.block_sparse_moe.experts.22.w1", "model.layers.59.block_sparse_moe.experts.23.w1", "model.layers.59.block_sparse_moe.experts.24.w1", "model.layers.59.block_sparse_moe.experts.25.w1", "model.layers.59.block_sparse_moe.experts.26.w1", "model.layers.59.block_sparse_moe.experts.27.w1", "model.layers.59.block_sparse_moe.experts.28.w1", "model.layers.59.block_sparse_moe.experts.29.w1", "model.layers.59.block_sparse_moe.experts.30.w1", "model.layers.59.block_sparse_moe.experts.31.w1", "model.layers.59.block_sparse_moe.experts.32.w1", "model.layers.59.block_sparse_moe.experts.33.w1", "model.layers.59.block_sparse_moe.experts.34.w1", "model.layers.59.block_sparse_moe.experts.35.w1", "model.layers.59.block_sparse_moe.experts.36.w1", "model.layers.59.block_sparse_moe.experts.37.w1", "model.layers.59.block_sparse_moe.experts.38.w1", "model.layers.59.block_sparse_moe.experts.39.w1", "model.layers.59.block_sparse_moe.experts.40.w1", "model.layers.59.block_sparse_moe.experts.41.w1", "model.layers.59.block_sparse_moe.experts.42.w1", "model.layers.59.block_sparse_moe.experts.43.w1", "model.layers.59.block_sparse_moe.experts.44.w1", "model.layers.59.block_sparse_moe.experts.45.w1", "model.layers.59.block_sparse_moe.experts.46.w1", "model.layers.59.block_sparse_moe.experts.47.w1", "model.layers.59.block_sparse_moe.experts.48.w1", "model.layers.59.block_sparse_moe.experts.49.w1", "model.layers.59.block_sparse_moe.experts.50.w1", "model.layers.59.block_sparse_moe.experts.51.w1", "model.layers.59.block_sparse_moe.experts.52.w1", "model.layers.59.block_sparse_moe.experts.53.w1", "model.layers.59.block_sparse_moe.experts.54.w1", "model.layers.59.block_sparse_moe.experts.55.w1", "model.layers.59.block_sparse_moe.experts.56.w1", "model.layers.59.block_sparse_moe.experts.57.w1", "model.layers.59.block_sparse_moe.experts.58.w1", "model.layers.59.block_sparse_moe.experts.59.w1", "model.layers.59.block_sparse_moe.experts.60.w1", "model.layers.59.block_sparse_moe.experts.61.w1", "model.layers.59.block_sparse_moe.experts.62.w1", "model.layers.59.block_sparse_moe.experts.63.w1", "model.layers.59.block_sparse_moe.experts.64.w1", "model.layers.59.block_sparse_moe.experts.65.w1", "model.layers.59.block_sparse_moe.experts.66.w1", "model.layers.59.block_sparse_moe.experts.67.w1", "model.layers.59.block_sparse_moe.experts.68.w1", "model.layers.59.block_sparse_moe.experts.69.w1", "model.layers.59.block_sparse_moe.experts.70.w1", "model.layers.59.block_sparse_moe.experts.71.w1", "model.layers.59.block_sparse_moe.experts.72.w1", "model.layers.59.block_sparse_moe.experts.73.w1", "model.layers.59.block_sparse_moe.experts.74.w1", "model.layers.59.block_sparse_moe.experts.75.w1", "model.layers.59.block_sparse_moe.experts.76.w1", "model.layers.59.block_sparse_moe.experts.77.w1", "model.layers.59.block_sparse_moe.experts.78.w1", "model.layers.59.block_sparse_moe.experts.79.w1", "model.layers.59.block_sparse_moe.experts.80.w1", "model.layers.59.block_sparse_moe.experts.81.w1", "model.layers.59.block_sparse_moe.experts.82.w1", "model.layers.59.block_sparse_moe.experts.83.w1", "model.layers.59.block_sparse_moe.experts.84.w1", "model.layers.59.block_sparse_moe.experts.85.w1", "model.layers.59.block_sparse_moe.experts.86.w1", "model.layers.59.block_sparse_moe.experts.87.w1", "model.layers.59.block_sparse_moe.experts.88.w1", "model.layers.59.block_sparse_moe.experts.89.w1", "model.layers.59.block_sparse_moe.experts.90.w1", "model.layers.59.block_sparse_moe.experts.91.w1", "model.layers.59.block_sparse_moe.experts.92.w1", "model.layers.59.block_sparse_moe.experts.93.w1", "model.layers.59.block_sparse_moe.experts.94.w1", "model.layers.59.block_sparse_moe.experts.95.w1", "model.layers.59.block_sparse_moe.experts.96.w1", "model.layers.59.block_sparse_moe.experts.97.w1", "model.layers.59.block_sparse_moe.experts.98.w1", "model.layers.59.block_sparse_moe.experts.99.w1", "model.layers.59.block_sparse_moe.experts.100.w1", "model.layers.59.block_sparse_moe.experts.101.w1", "model.layers.59.block_sparse_moe.experts.102.w1", "model.layers.59.block_sparse_moe.experts.103.w1", "model.layers.59.block_sparse_moe.experts.104.w1", "model.layers.59.block_sparse_moe.experts.105.w1", "model.layers.59.block_sparse_moe.experts.106.w1", "model.layers.59.block_sparse_moe.experts.107.w1", "model.layers.59.block_sparse_moe.experts.108.w1", "model.layers.59.block_sparse_moe.experts.109.w1", "model.layers.59.block_sparse_moe.experts.110.w1", "model.layers.59.block_sparse_moe.experts.111.w1", "model.layers.59.block_sparse_moe.experts.112.w1", "model.layers.59.block_sparse_moe.experts.113.w1", "model.layers.59.block_sparse_moe.experts.114.w1", "model.layers.59.block_sparse_moe.experts.115.w1", "model.layers.59.block_sparse_moe.experts.116.w1", "model.layers.59.block_sparse_moe.experts.117.w1", "model.layers.59.block_sparse_moe.experts.118.w1", "model.layers.59.block_sparse_moe.experts.119.w1", "model.layers.59.block_sparse_moe.experts.120.w1", "model.layers.59.block_sparse_moe.experts.121.w1", "model.layers.59.block_sparse_moe.experts.122.w1", "model.layers.59.block_sparse_moe.experts.123.w1", "model.layers.59.block_sparse_moe.experts.124.w1", "model.layers.59.block_sparse_moe.experts.125.w1", "model.layers.59.block_sparse_moe.experts.126.w1", "model.layers.59.block_sparse_moe.experts.127.w1", "model.layers.59.block_sparse_moe.experts.128.w1", "model.layers.59.block_sparse_moe.experts.129.w1", "model.layers.59.block_sparse_moe.experts.130.w1", "model.layers.59.block_sparse_moe.experts.131.w1", "model.layers.59.block_sparse_moe.experts.132.w1", "model.layers.59.block_sparse_moe.experts.133.w1", "model.layers.59.block_sparse_moe.experts.134.w1", "model.layers.59.block_sparse_moe.experts.135.w1", "model.layers.59.block_sparse_moe.experts.136.w1", "model.layers.59.block_sparse_moe.experts.137.w1", "model.layers.59.block_sparse_moe.experts.138.w1", "model.layers.59.block_sparse_moe.experts.139.w1", "model.layers.59.block_sparse_moe.experts.140.w1", "model.layers.59.block_sparse_moe.experts.141.w1", "model.layers.59.block_sparse_moe.experts.142.w1", "model.layers.59.block_sparse_moe.experts.143.w1", "model.layers.59.block_sparse_moe.experts.144.w1", "model.layers.59.block_sparse_moe.experts.145.w1", "model.layers.59.block_sparse_moe.experts.146.w1", "model.layers.59.block_sparse_moe.experts.147.w1", "model.layers.59.block_sparse_moe.experts.148.w1", "model.layers.59.block_sparse_moe.experts.149.w1", "model.layers.59.block_sparse_moe.experts.150.w1", "model.layers.59.block_sparse_moe.experts.151.w1", "model.layers.59.block_sparse_moe.experts.152.w1", "model.layers.59.block_sparse_moe.experts.153.w1", "model.layers.59.block_sparse_moe.experts.154.w1", "model.layers.59.block_sparse_moe.experts.155.w1", "model.layers.59.block_sparse_moe.experts.156.w1", "model.layers.59.block_sparse_moe.experts.157.w1", "model.layers.59.block_sparse_moe.experts.158.w1", "model.layers.59.block_sparse_moe.experts.159.w1", "model.layers.59.block_sparse_moe.experts.160.w1", "model.layers.59.block_sparse_moe.experts.161.w1", "model.layers.59.block_sparse_moe.experts.162.w1", "model.layers.59.block_sparse_moe.experts.163.w1", "model.layers.59.block_sparse_moe.experts.164.w1", "model.layers.59.block_sparse_moe.experts.165.w1", "model.layers.59.block_sparse_moe.experts.166.w1", "model.layers.59.block_sparse_moe.experts.167.w1", "model.layers.59.block_sparse_moe.experts.168.w1", "model.layers.59.block_sparse_moe.experts.169.w1", "model.layers.59.block_sparse_moe.experts.170.w1", "model.layers.59.block_sparse_moe.experts.171.w1", "model.layers.59.block_sparse_moe.experts.172.w1", "model.layers.59.block_sparse_moe.experts.173.w1", "model.layers.59.block_sparse_moe.experts.174.w1", "model.layers.59.block_sparse_moe.experts.175.w1", "model.layers.59.block_sparse_moe.experts.176.w1", "model.layers.59.block_sparse_moe.experts.177.w1", "model.layers.59.block_sparse_moe.experts.178.w1", "model.layers.59.block_sparse_moe.experts.179.w1", "model.layers.59.block_sparse_moe.experts.180.w1", "model.layers.59.block_sparse_moe.experts.181.w1", "model.layers.59.block_sparse_moe.experts.182.w1", "model.layers.59.block_sparse_moe.experts.183.w1", "model.layers.59.block_sparse_moe.experts.184.w1", "model.layers.59.block_sparse_moe.experts.185.w1", "model.layers.59.block_sparse_moe.experts.186.w1", "model.layers.59.block_sparse_moe.experts.187.w1", "model.layers.59.block_sparse_moe.experts.188.w1", "model.layers.59.block_sparse_moe.experts.189.w1", "model.layers.59.block_sparse_moe.experts.190.w1", "model.layers.59.block_sparse_moe.experts.191.w1", "model.layers.59.block_sparse_moe.experts.192.w1", "model.layers.59.block_sparse_moe.experts.193.w1", "model.layers.59.block_sparse_moe.experts.194.w1", "model.layers.59.block_sparse_moe.experts.195.w1", "model.layers.59.block_sparse_moe.experts.196.w1", "model.layers.59.block_sparse_moe.experts.197.w1", "model.layers.59.block_sparse_moe.experts.198.w1", "model.layers.59.block_sparse_moe.experts.199.w1", "model.layers.59.block_sparse_moe.experts.200.w1", "model.layers.59.block_sparse_moe.experts.201.w1", "model.layers.59.block_sparse_moe.experts.202.w1", "model.layers.59.block_sparse_moe.experts.203.w1", "model.layers.59.block_sparse_moe.experts.204.w1", "model.layers.59.block_sparse_moe.experts.205.w1", "model.layers.59.block_sparse_moe.experts.206.w1", "model.layers.59.block_sparse_moe.experts.207.w1", "model.layers.59.block_sparse_moe.experts.208.w1", "model.layers.59.block_sparse_moe.experts.209.w1", "model.layers.59.block_sparse_moe.experts.210.w1", "model.layers.59.block_sparse_moe.experts.211.w1", "model.layers.59.block_sparse_moe.experts.212.w1", "model.layers.59.block_sparse_moe.experts.213.w1", "model.layers.59.block_sparse_moe.experts.214.w1", "model.layers.59.block_sparse_moe.experts.215.w1", "model.layers.59.block_sparse_moe.experts.216.w1", "model.layers.59.block_sparse_moe.experts.217.w1", "model.layers.59.block_sparse_moe.experts.218.w1", "model.layers.59.block_sparse_moe.experts.219.w1", "model.layers.59.block_sparse_moe.experts.220.w1", "model.layers.59.block_sparse_moe.experts.221.w1", "model.layers.59.block_sparse_moe.experts.222.w1", "model.layers.59.block_sparse_moe.experts.223.w1", "model.layers.59.block_sparse_moe.experts.224.w1", "model.layers.59.block_sparse_moe.experts.225.w1", "model.layers.59.block_sparse_moe.experts.226.w1", "model.layers.59.block_sparse_moe.experts.227.w1", "model.layers.59.block_sparse_moe.experts.228.w1", "model.layers.59.block_sparse_moe.experts.229.w1", "model.layers.59.block_sparse_moe.experts.230.w1", "model.layers.59.block_sparse_moe.experts.231.w1", "model.layers.59.block_sparse_moe.experts.232.w1", "model.layers.59.block_sparse_moe.experts.233.w1", "model.layers.59.block_sparse_moe.experts.234.w1", "model.layers.59.block_sparse_moe.experts.235.w1", "model.layers.59.block_sparse_moe.experts.236.w1", "model.layers.59.block_sparse_moe.experts.237.w1", "model.layers.59.block_sparse_moe.experts.238.w1", "model.layers.59.block_sparse_moe.experts.239.w1", "model.layers.59.block_sparse_moe.experts.240.w1", "model.layers.59.block_sparse_moe.experts.241.w1", "model.layers.59.block_sparse_moe.experts.242.w1", "model.layers.59.block_sparse_moe.experts.243.w1", "model.layers.59.block_sparse_moe.experts.244.w1", "model.layers.59.block_sparse_moe.experts.245.w1", "model.layers.59.block_sparse_moe.experts.246.w1", "model.layers.59.block_sparse_moe.experts.247.w1", "model.layers.59.block_sparse_moe.experts.248.w1", "model.layers.59.block_sparse_moe.experts.249.w1", "model.layers.59.block_sparse_moe.experts.250.w1", "model.layers.59.block_sparse_moe.experts.251.w1", "model.layers.59.block_sparse_moe.experts.252.w1", "model.layers.59.block_sparse_moe.experts.253.w1", "model.layers.59.block_sparse_moe.experts.254.w1", "model.layers.59.block_sparse_moe.experts.255.w1", "model.layers.59.block_sparse_moe.experts.0.w3", "model.layers.59.block_sparse_moe.experts.1.w3", "model.layers.59.block_sparse_moe.experts.2.w3", "model.layers.59.block_sparse_moe.experts.3.w3", "model.layers.59.block_sparse_moe.experts.4.w3", "model.layers.59.block_sparse_moe.experts.5.w3", "model.layers.59.block_sparse_moe.experts.6.w3", "model.layers.59.block_sparse_moe.experts.7.w3", "model.layers.59.block_sparse_moe.experts.8.w3", "model.layers.59.block_sparse_moe.experts.9.w3", "model.layers.59.block_sparse_moe.experts.10.w3", "model.layers.59.block_sparse_moe.experts.11.w3", "model.layers.59.block_sparse_moe.experts.12.w3", "model.layers.59.block_sparse_moe.experts.13.w3", "model.layers.59.block_sparse_moe.experts.14.w3", "model.layers.59.block_sparse_moe.experts.15.w3", "model.layers.59.block_sparse_moe.experts.16.w3", "model.layers.59.block_sparse_moe.experts.17.w3", "model.layers.59.block_sparse_moe.experts.18.w3", "model.layers.59.block_sparse_moe.experts.19.w3", "model.layers.59.block_sparse_moe.experts.20.w3", "model.layers.59.block_sparse_moe.experts.21.w3", "model.layers.59.block_sparse_moe.experts.22.w3", "model.layers.59.block_sparse_moe.experts.23.w3", "model.layers.59.block_sparse_moe.experts.24.w3", "model.layers.59.block_sparse_moe.experts.25.w3", "model.layers.59.block_sparse_moe.experts.26.w3", "model.layers.59.block_sparse_moe.experts.27.w3", "model.layers.59.block_sparse_moe.experts.28.w3", "model.layers.59.block_sparse_moe.experts.29.w3", "model.layers.59.block_sparse_moe.experts.30.w3", "model.layers.59.block_sparse_moe.experts.31.w3", "model.layers.59.block_sparse_moe.experts.32.w3", "model.layers.59.block_sparse_moe.experts.33.w3", "model.layers.59.block_sparse_moe.experts.34.w3", "model.layers.59.block_sparse_moe.experts.35.w3", "model.layers.59.block_sparse_moe.experts.36.w3", "model.layers.59.block_sparse_moe.experts.37.w3", "model.layers.59.block_sparse_moe.experts.38.w3", "model.layers.59.block_sparse_moe.experts.39.w3", "model.layers.59.block_sparse_moe.experts.40.w3", "model.layers.59.block_sparse_moe.experts.41.w3", "model.layers.59.block_sparse_moe.experts.42.w3", "model.layers.59.block_sparse_moe.experts.43.w3", "model.layers.59.block_sparse_moe.experts.44.w3", "model.layers.59.block_sparse_moe.experts.45.w3", "model.layers.59.block_sparse_moe.experts.46.w3", "model.layers.59.block_sparse_moe.experts.47.w3", "model.layers.59.block_sparse_moe.experts.48.w3", "model.layers.59.block_sparse_moe.experts.49.w3", "model.layers.59.block_sparse_moe.experts.50.w3", "model.layers.59.block_sparse_moe.experts.51.w3", "model.layers.59.block_sparse_moe.experts.52.w3", "model.layers.59.block_sparse_moe.experts.53.w3", "model.layers.59.block_sparse_moe.experts.54.w3", "model.layers.59.block_sparse_moe.experts.55.w3", "model.layers.59.block_sparse_moe.experts.56.w3", "model.layers.59.block_sparse_moe.experts.57.w3", "model.layers.59.block_sparse_moe.experts.58.w3", "model.layers.59.block_sparse_moe.experts.59.w3", "model.layers.59.block_sparse_moe.experts.60.w3", "model.layers.59.block_sparse_moe.experts.61.w3", "model.layers.59.block_sparse_moe.experts.62.w3", "model.layers.59.block_sparse_moe.experts.63.w3", "model.layers.59.block_sparse_moe.experts.64.w3", "model.layers.59.block_sparse_moe.experts.65.w3", "model.layers.59.block_sparse_moe.experts.66.w3", "model.layers.59.block_sparse_moe.experts.67.w3", "model.layers.59.block_sparse_moe.experts.68.w3", "model.layers.59.block_sparse_moe.experts.69.w3", "model.layers.59.block_sparse_moe.experts.70.w3", "model.layers.59.block_sparse_moe.experts.71.w3", "model.layers.59.block_sparse_moe.experts.72.w3", "model.layers.59.block_sparse_moe.experts.73.w3", "model.layers.59.block_sparse_moe.experts.74.w3", "model.layers.59.block_sparse_moe.experts.75.w3", "model.layers.59.block_sparse_moe.experts.76.w3", "model.layers.59.block_sparse_moe.experts.77.w3", "model.layers.59.block_sparse_moe.experts.78.w3", "model.layers.59.block_sparse_moe.experts.79.w3", "model.layers.59.block_sparse_moe.experts.80.w3", "model.layers.59.block_sparse_moe.experts.81.w3", "model.layers.59.block_sparse_moe.experts.82.w3", "model.layers.59.block_sparse_moe.experts.83.w3", "model.layers.59.block_sparse_moe.experts.84.w3", "model.layers.59.block_sparse_moe.experts.85.w3", "model.layers.59.block_sparse_moe.experts.86.w3", "model.layers.59.block_sparse_moe.experts.87.w3", "model.layers.59.block_sparse_moe.experts.88.w3", "model.layers.59.block_sparse_moe.experts.89.w3", "model.layers.59.block_sparse_moe.experts.90.w3", "model.layers.59.block_sparse_moe.experts.91.w3", "model.layers.59.block_sparse_moe.experts.92.w3", "model.layers.59.block_sparse_moe.experts.93.w3", "model.layers.59.block_sparse_moe.experts.94.w3", "model.layers.59.block_sparse_moe.experts.95.w3", "model.layers.59.block_sparse_moe.experts.96.w3", "model.layers.59.block_sparse_moe.experts.97.w3", "model.layers.59.block_sparse_moe.experts.98.w3", "model.layers.59.block_sparse_moe.experts.99.w3", "model.layers.59.block_sparse_moe.experts.100.w3", "model.layers.59.block_sparse_moe.experts.101.w3", "model.layers.59.block_sparse_moe.experts.102.w3", "model.layers.59.block_sparse_moe.experts.103.w3", "model.layers.59.block_sparse_moe.experts.104.w3", "model.layers.59.block_sparse_moe.experts.105.w3", "model.layers.59.block_sparse_moe.experts.106.w3", "model.layers.59.block_sparse_moe.experts.107.w3", "model.layers.59.block_sparse_moe.experts.108.w3", "model.layers.59.block_sparse_moe.experts.109.w3", "model.layers.59.block_sparse_moe.experts.110.w3", "model.layers.59.block_sparse_moe.experts.111.w3", "model.layers.59.block_sparse_moe.experts.112.w3", "model.layers.59.block_sparse_moe.experts.113.w3", "model.layers.59.block_sparse_moe.experts.114.w3", "model.layers.59.block_sparse_moe.experts.115.w3", "model.layers.59.block_sparse_moe.experts.116.w3", "model.layers.59.block_sparse_moe.experts.117.w3", "model.layers.59.block_sparse_moe.experts.118.w3", "model.layers.59.block_sparse_moe.experts.119.w3", "model.layers.59.block_sparse_moe.experts.120.w3", "model.layers.59.block_sparse_moe.experts.121.w3", "model.layers.59.block_sparse_moe.experts.122.w3", "model.layers.59.block_sparse_moe.experts.123.w3", "model.layers.59.block_sparse_moe.experts.124.w3", "model.layers.59.block_sparse_moe.experts.125.w3", "model.layers.59.block_sparse_moe.experts.126.w3", "model.layers.59.block_sparse_moe.experts.127.w3", "model.layers.59.block_sparse_moe.experts.128.w3", "model.layers.59.block_sparse_moe.experts.129.w3", "model.layers.59.block_sparse_moe.experts.130.w3", "model.layers.59.block_sparse_moe.experts.131.w3", "model.layers.59.block_sparse_moe.experts.132.w3", "model.layers.59.block_sparse_moe.experts.133.w3", "model.layers.59.block_sparse_moe.experts.134.w3", "model.layers.59.block_sparse_moe.experts.135.w3", "model.layers.59.block_sparse_moe.experts.136.w3", "model.layers.59.block_sparse_moe.experts.137.w3", "model.layers.59.block_sparse_moe.experts.138.w3", "model.layers.59.block_sparse_moe.experts.139.w3", "model.layers.59.block_sparse_moe.experts.140.w3", "model.layers.59.block_sparse_moe.experts.141.w3", "model.layers.59.block_sparse_moe.experts.142.w3", "model.layers.59.block_sparse_moe.experts.143.w3", "model.layers.59.block_sparse_moe.experts.144.w3", "model.layers.59.block_sparse_moe.experts.145.w3", "model.layers.59.block_sparse_moe.experts.146.w3", "model.layers.59.block_sparse_moe.experts.147.w3", "model.layers.59.block_sparse_moe.experts.148.w3", "model.layers.59.block_sparse_moe.experts.149.w3", "model.layers.59.block_sparse_moe.experts.150.w3", "model.layers.59.block_sparse_moe.experts.151.w3", "model.layers.59.block_sparse_moe.experts.152.w3", "model.layers.59.block_sparse_moe.experts.153.w3", "model.layers.59.block_sparse_moe.experts.154.w3", "model.layers.59.block_sparse_moe.experts.155.w3", "model.layers.59.block_sparse_moe.experts.156.w3", "model.layers.59.block_sparse_moe.experts.157.w3", "model.layers.59.block_sparse_moe.experts.158.w3", "model.layers.59.block_sparse_moe.experts.159.w3", "model.layers.59.block_sparse_moe.experts.160.w3", "model.layers.59.block_sparse_moe.experts.161.w3", "model.layers.59.block_sparse_moe.experts.162.w3", "model.layers.59.block_sparse_moe.experts.163.w3", "model.layers.59.block_sparse_moe.experts.164.w3", "model.layers.59.block_sparse_moe.experts.165.w3", "model.layers.59.block_sparse_moe.experts.166.w3", "model.layers.59.block_sparse_moe.experts.167.w3", "model.layers.59.block_sparse_moe.experts.168.w3", "model.layers.59.block_sparse_moe.experts.169.w3", "model.layers.59.block_sparse_moe.experts.170.w3", "model.layers.59.block_sparse_moe.experts.171.w3", "model.layers.59.block_sparse_moe.experts.172.w3", "model.layers.59.block_sparse_moe.experts.173.w3", "model.layers.59.block_sparse_moe.experts.174.w3", "model.layers.59.block_sparse_moe.experts.175.w3", "model.layers.59.block_sparse_moe.experts.176.w3", "model.layers.59.block_sparse_moe.experts.177.w3", "model.layers.59.block_sparse_moe.experts.178.w3", "model.layers.59.block_sparse_moe.experts.179.w3", "model.layers.59.block_sparse_moe.experts.180.w3", "model.layers.59.block_sparse_moe.experts.181.w3", "model.layers.59.block_sparse_moe.experts.182.w3", "model.layers.59.block_sparse_moe.experts.183.w3", "model.layers.59.block_sparse_moe.experts.184.w3", "model.layers.59.block_sparse_moe.experts.185.w3", "model.layers.59.block_sparse_moe.experts.186.w3", "model.layers.59.block_sparse_moe.experts.187.w3", "model.layers.59.block_sparse_moe.experts.188.w3", "model.layers.59.block_sparse_moe.experts.189.w3", "model.layers.59.block_sparse_moe.experts.190.w3", "model.layers.59.block_sparse_moe.experts.191.w3", "model.layers.59.block_sparse_moe.experts.192.w3", "model.layers.59.block_sparse_moe.experts.193.w3", "model.layers.59.block_sparse_moe.experts.194.w3", "model.layers.59.block_sparse_moe.experts.195.w3", "model.layers.59.block_sparse_moe.experts.196.w3", "model.layers.59.block_sparse_moe.experts.197.w3", "model.layers.59.block_sparse_moe.experts.198.w3", "model.layers.59.block_sparse_moe.experts.199.w3", "model.layers.59.block_sparse_moe.experts.200.w3", "model.layers.59.block_sparse_moe.experts.201.w3", "model.layers.59.block_sparse_moe.experts.202.w3", "model.layers.59.block_sparse_moe.experts.203.w3", "model.layers.59.block_sparse_moe.experts.204.w3", "model.layers.59.block_sparse_moe.experts.205.w3", "model.layers.59.block_sparse_moe.experts.206.w3", "model.layers.59.block_sparse_moe.experts.207.w3", "model.layers.59.block_sparse_moe.experts.208.w3", "model.layers.59.block_sparse_moe.experts.209.w3", "model.layers.59.block_sparse_moe.experts.210.w3", "model.layers.59.block_sparse_moe.experts.211.w3", "model.layers.59.block_sparse_moe.experts.212.w3", "model.layers.59.block_sparse_moe.experts.213.w3", "model.layers.59.block_sparse_moe.experts.214.w3", "model.layers.59.block_sparse_moe.experts.215.w3", "model.layers.59.block_sparse_moe.experts.216.w3", "model.layers.59.block_sparse_moe.experts.217.w3", "model.layers.59.block_sparse_moe.experts.218.w3", "model.layers.59.block_sparse_moe.experts.219.w3", "model.layers.59.block_sparse_moe.experts.220.w3", "model.layers.59.block_sparse_moe.experts.221.w3", "model.layers.59.block_sparse_moe.experts.222.w3", "model.layers.59.block_sparse_moe.experts.223.w3", "model.layers.59.block_sparse_moe.experts.224.w3", "model.layers.59.block_sparse_moe.experts.225.w3", "model.layers.59.block_sparse_moe.experts.226.w3", "model.layers.59.block_sparse_moe.experts.227.w3", "model.layers.59.block_sparse_moe.experts.228.w3", "model.layers.59.block_sparse_moe.experts.229.w3", "model.layers.59.block_sparse_moe.experts.230.w3", "model.layers.59.block_sparse_moe.experts.231.w3", "model.layers.59.block_sparse_moe.experts.232.w3", "model.layers.59.block_sparse_moe.experts.233.w3", "model.layers.59.block_sparse_moe.experts.234.w3", "model.layers.59.block_sparse_moe.experts.235.w3", "model.layers.59.block_sparse_moe.experts.236.w3", "model.layers.59.block_sparse_moe.experts.237.w3", "model.layers.59.block_sparse_moe.experts.238.w3", "model.layers.59.block_sparse_moe.experts.239.w3", "model.layers.59.block_sparse_moe.experts.240.w3", "model.layers.59.block_sparse_moe.experts.241.w3", "model.layers.59.block_sparse_moe.experts.242.w3", "model.layers.59.block_sparse_moe.experts.243.w3", "model.layers.59.block_sparse_moe.experts.244.w3", "model.layers.59.block_sparse_moe.experts.245.w3", "model.layers.59.block_sparse_moe.experts.246.w3", "model.layers.59.block_sparse_moe.experts.247.w3", "model.layers.59.block_sparse_moe.experts.248.w3", "model.layers.59.block_sparse_moe.experts.249.w3", "model.layers.59.block_sparse_moe.experts.250.w3", "model.layers.59.block_sparse_moe.experts.251.w3", "model.layers.59.block_sparse_moe.experts.252.w3", "model.layers.59.block_sparse_moe.experts.253.w3", "model.layers.59.block_sparse_moe.experts.254.w3", "model.layers.59.block_sparse_moe.experts.255.w3", "model.layers.59.block_sparse_moe.experts.0.w2", "model.layers.59.block_sparse_moe.experts.1.w2", "model.layers.59.block_sparse_moe.experts.2.w2", "model.layers.59.block_sparse_moe.experts.3.w2", "model.layers.59.block_sparse_moe.experts.4.w2", "model.layers.59.block_sparse_moe.experts.5.w2", "model.layers.59.block_sparse_moe.experts.6.w2", "model.layers.59.block_sparse_moe.experts.7.w2", "model.layers.59.block_sparse_moe.experts.8.w2", "model.layers.59.block_sparse_moe.experts.9.w2", "model.layers.59.block_sparse_moe.experts.10.w2", "model.layers.59.block_sparse_moe.experts.11.w2", "model.layers.59.block_sparse_moe.experts.12.w2", "model.layers.59.block_sparse_moe.experts.13.w2", "model.layers.59.block_sparse_moe.experts.14.w2", "model.layers.59.block_sparse_moe.experts.15.w2", "model.layers.59.block_sparse_moe.experts.16.w2", "model.layers.59.block_sparse_moe.experts.17.w2", "model.layers.59.block_sparse_moe.experts.18.w2", "model.layers.59.block_sparse_moe.experts.19.w2", "model.layers.59.block_sparse_moe.experts.20.w2", "model.layers.59.block_sparse_moe.experts.21.w2", "model.layers.59.block_sparse_moe.experts.22.w2", "model.layers.59.block_sparse_moe.experts.23.w2", "model.layers.59.block_sparse_moe.experts.24.w2", "model.layers.59.block_sparse_moe.experts.25.w2", "model.layers.59.block_sparse_moe.experts.26.w2", "model.layers.59.block_sparse_moe.experts.27.w2", "model.layers.59.block_sparse_moe.experts.28.w2", "model.layers.59.block_sparse_moe.experts.29.w2", "model.layers.59.block_sparse_moe.experts.30.w2", "model.layers.59.block_sparse_moe.experts.31.w2", "model.layers.59.block_sparse_moe.experts.32.w2", "model.layers.59.block_sparse_moe.experts.33.w2", "model.layers.59.block_sparse_moe.experts.34.w2", "model.layers.59.block_sparse_moe.experts.35.w2", "model.layers.59.block_sparse_moe.experts.36.w2", "model.layers.59.block_sparse_moe.experts.37.w2", "model.layers.59.block_sparse_moe.experts.38.w2", "model.layers.59.block_sparse_moe.experts.39.w2", "model.layers.59.block_sparse_moe.experts.40.w2", "model.layers.59.block_sparse_moe.experts.41.w2", "model.layers.59.block_sparse_moe.experts.42.w2", "model.layers.59.block_sparse_moe.experts.43.w2", "model.layers.59.block_sparse_moe.experts.44.w2", "model.layers.59.block_sparse_moe.experts.45.w2", "model.layers.59.block_sparse_moe.experts.46.w2", "model.layers.59.block_sparse_moe.experts.47.w2", "model.layers.59.block_sparse_moe.experts.48.w2", "model.layers.59.block_sparse_moe.experts.49.w2", "model.layers.59.block_sparse_moe.experts.50.w2", "model.layers.59.block_sparse_moe.experts.51.w2", "model.layers.59.block_sparse_moe.experts.52.w2", "model.layers.59.block_sparse_moe.experts.53.w2", "model.layers.59.block_sparse_moe.experts.54.w2", "model.layers.59.block_sparse_moe.experts.55.w2", "model.layers.59.block_sparse_moe.experts.56.w2", "model.layers.59.block_sparse_moe.experts.57.w2", "model.layers.59.block_sparse_moe.experts.58.w2", "model.layers.59.block_sparse_moe.experts.59.w2", "model.layers.59.block_sparse_moe.experts.60.w2", "model.layers.59.block_sparse_moe.experts.61.w2", "model.layers.59.block_sparse_moe.experts.62.w2", "model.layers.59.block_sparse_moe.experts.63.w2", "model.layers.59.block_sparse_moe.experts.64.w2", "model.layers.59.block_sparse_moe.experts.65.w2", "model.layers.59.block_sparse_moe.experts.66.w2", "model.layers.59.block_sparse_moe.experts.67.w2", "model.layers.59.block_sparse_moe.experts.68.w2", "model.layers.59.block_sparse_moe.experts.69.w2", "model.layers.59.block_sparse_moe.experts.70.w2", "model.layers.59.block_sparse_moe.experts.71.w2", "model.layers.59.block_sparse_moe.experts.72.w2", "model.layers.59.block_sparse_moe.experts.73.w2", "model.layers.59.block_sparse_moe.experts.74.w2", "model.layers.59.block_sparse_moe.experts.75.w2", "model.layers.59.block_sparse_moe.experts.76.w2", "model.layers.59.block_sparse_moe.experts.77.w2", "model.layers.59.block_sparse_moe.experts.78.w2", "model.layers.59.block_sparse_moe.experts.79.w2", "model.layers.59.block_sparse_moe.experts.80.w2", "model.layers.59.block_sparse_moe.experts.81.w2", "model.layers.59.block_sparse_moe.experts.82.w2", "model.layers.59.block_sparse_moe.experts.83.w2", "model.layers.59.block_sparse_moe.experts.84.w2", "model.layers.59.block_sparse_moe.experts.85.w2", "model.layers.59.block_sparse_moe.experts.86.w2", "model.layers.59.block_sparse_moe.experts.87.w2", "model.layers.59.block_sparse_moe.experts.88.w2", "model.layers.59.block_sparse_moe.experts.89.w2", "model.layers.59.block_sparse_moe.experts.90.w2", "model.layers.59.block_sparse_moe.experts.91.w2", "model.layers.59.block_sparse_moe.experts.92.w2", "model.layers.59.block_sparse_moe.experts.93.w2", "model.layers.59.block_sparse_moe.experts.94.w2", "model.layers.59.block_sparse_moe.experts.95.w2", "model.layers.59.block_sparse_moe.experts.96.w2", "model.layers.59.block_sparse_moe.experts.97.w2", "model.layers.59.block_sparse_moe.experts.98.w2", "model.layers.59.block_sparse_moe.experts.99.w2", "model.layers.59.block_sparse_moe.experts.100.w2", "model.layers.59.block_sparse_moe.experts.101.w2", "model.layers.59.block_sparse_moe.experts.102.w2", "model.layers.59.block_sparse_moe.experts.103.w2", "model.layers.59.block_sparse_moe.experts.104.w2", "model.layers.59.block_sparse_moe.experts.105.w2", "model.layers.59.block_sparse_moe.experts.106.w2", "model.layers.59.block_sparse_moe.experts.107.w2", "model.layers.59.block_sparse_moe.experts.108.w2", "model.layers.59.block_sparse_moe.experts.109.w2", "model.layers.59.block_sparse_moe.experts.110.w2", "model.layers.59.block_sparse_moe.experts.111.w2", "model.layers.59.block_sparse_moe.experts.112.w2", "model.layers.59.block_sparse_moe.experts.113.w2", "model.layers.59.block_sparse_moe.experts.114.w2", "model.layers.59.block_sparse_moe.experts.115.w2", "model.layers.59.block_sparse_moe.experts.116.w2", "model.layers.59.block_sparse_moe.experts.117.w2", "model.layers.59.block_sparse_moe.experts.118.w2", "model.layers.59.block_sparse_moe.experts.119.w2", "model.layers.59.block_sparse_moe.experts.120.w2", "model.layers.59.block_sparse_moe.experts.121.w2", "model.layers.59.block_sparse_moe.experts.122.w2", "model.layers.59.block_sparse_moe.experts.123.w2", "model.layers.59.block_sparse_moe.experts.124.w2", "model.layers.59.block_sparse_moe.experts.125.w2", "model.layers.59.block_sparse_moe.experts.126.w2", "model.layers.59.block_sparse_moe.experts.127.w2", "model.layers.59.block_sparse_moe.experts.128.w2", "model.layers.59.block_sparse_moe.experts.129.w2", "model.layers.59.block_sparse_moe.experts.130.w2", "model.layers.59.block_sparse_moe.experts.131.w2", "model.layers.59.block_sparse_moe.experts.132.w2", "model.layers.59.block_sparse_moe.experts.133.w2", "model.layers.59.block_sparse_moe.experts.134.w2", "model.layers.59.block_sparse_moe.experts.135.w2", "model.layers.59.block_sparse_moe.experts.136.w2", "model.layers.59.block_sparse_moe.experts.137.w2", "model.layers.59.block_sparse_moe.experts.138.w2", "model.layers.59.block_sparse_moe.experts.139.w2", "model.layers.59.block_sparse_moe.experts.140.w2", "model.layers.59.block_sparse_moe.experts.141.w2", "model.layers.59.block_sparse_moe.experts.142.w2", "model.layers.59.block_sparse_moe.experts.143.w2", "model.layers.59.block_sparse_moe.experts.144.w2", "model.layers.59.block_sparse_moe.experts.145.w2", "model.layers.59.block_sparse_moe.experts.146.w2", "model.layers.59.block_sparse_moe.experts.147.w2", "model.layers.59.block_sparse_moe.experts.148.w2", "model.layers.59.block_sparse_moe.experts.149.w2", "model.layers.59.block_sparse_moe.experts.150.w2", "model.layers.59.block_sparse_moe.experts.151.w2", "model.layers.59.block_sparse_moe.experts.152.w2", "model.layers.59.block_sparse_moe.experts.153.w2", "model.layers.59.block_sparse_moe.experts.154.w2", "model.layers.59.block_sparse_moe.experts.155.w2", "model.layers.59.block_sparse_moe.experts.156.w2", "model.layers.59.block_sparse_moe.experts.157.w2", "model.layers.59.block_sparse_moe.experts.158.w2", "model.layers.59.block_sparse_moe.experts.159.w2", "model.layers.59.block_sparse_moe.experts.160.w2", "model.layers.59.block_sparse_moe.experts.161.w2", "model.layers.59.block_sparse_moe.experts.162.w2", "model.layers.59.block_sparse_moe.experts.163.w2", "model.layers.59.block_sparse_moe.experts.164.w2", "model.layers.59.block_sparse_moe.experts.165.w2", "model.layers.59.block_sparse_moe.experts.166.w2", "model.layers.59.block_sparse_moe.experts.167.w2", "model.layers.59.block_sparse_moe.experts.168.w2", "model.layers.59.block_sparse_moe.experts.169.w2", "model.layers.59.block_sparse_moe.experts.170.w2", "model.layers.59.block_sparse_moe.experts.171.w2", "model.layers.59.block_sparse_moe.experts.172.w2", "model.layers.59.block_sparse_moe.experts.173.w2", "model.layers.59.block_sparse_moe.experts.174.w2", "model.layers.59.block_sparse_moe.experts.175.w2", "model.layers.59.block_sparse_moe.experts.176.w2", "model.layers.59.block_sparse_moe.experts.177.w2", "model.layers.59.block_sparse_moe.experts.178.w2", "model.layers.59.block_sparse_moe.experts.179.w2", "model.layers.59.block_sparse_moe.experts.180.w2", "model.layers.59.block_sparse_moe.experts.181.w2", "model.layers.59.block_sparse_moe.experts.182.w2", "model.layers.59.block_sparse_moe.experts.183.w2", "model.layers.59.block_sparse_moe.experts.184.w2", "model.layers.59.block_sparse_moe.experts.185.w2", "model.layers.59.block_sparse_moe.experts.186.w2", "model.layers.59.block_sparse_moe.experts.187.w2", "model.layers.59.block_sparse_moe.experts.188.w2", "model.layers.59.block_sparse_moe.experts.189.w2", "model.layers.59.block_sparse_moe.experts.190.w2", "model.layers.59.block_sparse_moe.experts.191.w2", "model.layers.59.block_sparse_moe.experts.192.w2", "model.layers.59.block_sparse_moe.experts.193.w2", "model.layers.59.block_sparse_moe.experts.194.w2", "model.layers.59.block_sparse_moe.experts.195.w2", "model.layers.59.block_sparse_moe.experts.196.w2", "model.layers.59.block_sparse_moe.experts.197.w2", "model.layers.59.block_sparse_moe.experts.198.w2", "model.layers.59.block_sparse_moe.experts.199.w2", "model.layers.59.block_sparse_moe.experts.200.w2", "model.layers.59.block_sparse_moe.experts.201.w2", "model.layers.59.block_sparse_moe.experts.202.w2", "model.layers.59.block_sparse_moe.experts.203.w2", "model.layers.59.block_sparse_moe.experts.204.w2", "model.layers.59.block_sparse_moe.experts.205.w2", "model.layers.59.block_sparse_moe.experts.206.w2", "model.layers.59.block_sparse_moe.experts.207.w2", "model.layers.59.block_sparse_moe.experts.208.w2", "model.layers.59.block_sparse_moe.experts.209.w2", "model.layers.59.block_sparse_moe.experts.210.w2", "model.layers.59.block_sparse_moe.experts.211.w2", "model.layers.59.block_sparse_moe.experts.212.w2", "model.layers.59.block_sparse_moe.experts.213.w2", "model.layers.59.block_sparse_moe.experts.214.w2", "model.layers.59.block_sparse_moe.experts.215.w2", "model.layers.59.block_sparse_moe.experts.216.w2", "model.layers.59.block_sparse_moe.experts.217.w2", "model.layers.59.block_sparse_moe.experts.218.w2", "model.layers.59.block_sparse_moe.experts.219.w2", "model.layers.59.block_sparse_moe.experts.220.w2", "model.layers.59.block_sparse_moe.experts.221.w2", "model.layers.59.block_sparse_moe.experts.222.w2", "model.layers.59.block_sparse_moe.experts.223.w2", "model.layers.59.block_sparse_moe.experts.224.w2", "model.layers.59.block_sparse_moe.experts.225.w2", "model.layers.59.block_sparse_moe.experts.226.w2", "model.layers.59.block_sparse_moe.experts.227.w2", "model.layers.59.block_sparse_moe.experts.228.w2", "model.layers.59.block_sparse_moe.experts.229.w2", "model.layers.59.block_sparse_moe.experts.230.w2", "model.layers.59.block_sparse_moe.experts.231.w2", "model.layers.59.block_sparse_moe.experts.232.w2", "model.layers.59.block_sparse_moe.experts.233.w2", "model.layers.59.block_sparse_moe.experts.234.w2", "model.layers.59.block_sparse_moe.experts.235.w2", "model.layers.59.block_sparse_moe.experts.236.w2", "model.layers.59.block_sparse_moe.experts.237.w2", "model.layers.59.block_sparse_moe.experts.238.w2", "model.layers.59.block_sparse_moe.experts.239.w2", "model.layers.59.block_sparse_moe.experts.240.w2", "model.layers.59.block_sparse_moe.experts.241.w2", "model.layers.59.block_sparse_moe.experts.242.w2", "model.layers.59.block_sparse_moe.experts.243.w2", "model.layers.59.block_sparse_moe.experts.244.w2", "model.layers.59.block_sparse_moe.experts.245.w2", "model.layers.59.block_sparse_moe.experts.246.w2", "model.layers.59.block_sparse_moe.experts.247.w2", "model.layers.59.block_sparse_moe.experts.248.w2", "model.layers.59.block_sparse_moe.experts.249.w2", "model.layers.59.block_sparse_moe.experts.250.w2", "model.layers.59.block_sparse_moe.experts.251.w2", "model.layers.59.block_sparse_moe.experts.252.w2", "model.layers.59.block_sparse_moe.experts.253.w2", "model.layers.59.block_sparse_moe.experts.254.w2", "model.layers.59.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0005168743431568368, "dbits": 3623878656 } ] }, { "idx": 120, "layers": [ "model.layers.60.self_attn.q_proj", "model.layers.60.self_attn.k_proj", "model.layers.60.self_attn.v_proj", "model.layers.60.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0027974186465143935, "dbits": 44040192 } ] }, { "idx": 121, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w1", "model.layers.60.block_sparse_moe.experts.1.w1", "model.layers.60.block_sparse_moe.experts.2.w1", "model.layers.60.block_sparse_moe.experts.3.w1", "model.layers.60.block_sparse_moe.experts.4.w1", "model.layers.60.block_sparse_moe.experts.5.w1", "model.layers.60.block_sparse_moe.experts.6.w1", "model.layers.60.block_sparse_moe.experts.7.w1", "model.layers.60.block_sparse_moe.experts.8.w1", "model.layers.60.block_sparse_moe.experts.9.w1", "model.layers.60.block_sparse_moe.experts.10.w1", "model.layers.60.block_sparse_moe.experts.11.w1", "model.layers.60.block_sparse_moe.experts.12.w1", "model.layers.60.block_sparse_moe.experts.13.w1", "model.layers.60.block_sparse_moe.experts.14.w1", "model.layers.60.block_sparse_moe.experts.15.w1", "model.layers.60.block_sparse_moe.experts.16.w1", "model.layers.60.block_sparse_moe.experts.17.w1", "model.layers.60.block_sparse_moe.experts.18.w1", "model.layers.60.block_sparse_moe.experts.19.w1", "model.layers.60.block_sparse_moe.experts.20.w1", "model.layers.60.block_sparse_moe.experts.21.w1", "model.layers.60.block_sparse_moe.experts.22.w1", "model.layers.60.block_sparse_moe.experts.23.w1", "model.layers.60.block_sparse_moe.experts.24.w1", "model.layers.60.block_sparse_moe.experts.25.w1", "model.layers.60.block_sparse_moe.experts.26.w1", "model.layers.60.block_sparse_moe.experts.27.w1", "model.layers.60.block_sparse_moe.experts.28.w1", "model.layers.60.block_sparse_moe.experts.29.w1", "model.layers.60.block_sparse_moe.experts.30.w1", "model.layers.60.block_sparse_moe.experts.31.w1", "model.layers.60.block_sparse_moe.experts.32.w1", "model.layers.60.block_sparse_moe.experts.33.w1", "model.layers.60.block_sparse_moe.experts.34.w1", "model.layers.60.block_sparse_moe.experts.35.w1", "model.layers.60.block_sparse_moe.experts.36.w1", "model.layers.60.block_sparse_moe.experts.37.w1", "model.layers.60.block_sparse_moe.experts.38.w1", "model.layers.60.block_sparse_moe.experts.39.w1", "model.layers.60.block_sparse_moe.experts.40.w1", "model.layers.60.block_sparse_moe.experts.41.w1", "model.layers.60.block_sparse_moe.experts.42.w1", "model.layers.60.block_sparse_moe.experts.43.w1", "model.layers.60.block_sparse_moe.experts.44.w1", "model.layers.60.block_sparse_moe.experts.45.w1", "model.layers.60.block_sparse_moe.experts.46.w1", "model.layers.60.block_sparse_moe.experts.47.w1", "model.layers.60.block_sparse_moe.experts.48.w1", "model.layers.60.block_sparse_moe.experts.49.w1", "model.layers.60.block_sparse_moe.experts.50.w1", "model.layers.60.block_sparse_moe.experts.51.w1", "model.layers.60.block_sparse_moe.experts.52.w1", "model.layers.60.block_sparse_moe.experts.53.w1", "model.layers.60.block_sparse_moe.experts.54.w1", "model.layers.60.block_sparse_moe.experts.55.w1", "model.layers.60.block_sparse_moe.experts.56.w1", "model.layers.60.block_sparse_moe.experts.57.w1", "model.layers.60.block_sparse_moe.experts.58.w1", "model.layers.60.block_sparse_moe.experts.59.w1", "model.layers.60.block_sparse_moe.experts.60.w1", "model.layers.60.block_sparse_moe.experts.61.w1", "model.layers.60.block_sparse_moe.experts.62.w1", "model.layers.60.block_sparse_moe.experts.63.w1", "model.layers.60.block_sparse_moe.experts.64.w1", "model.layers.60.block_sparse_moe.experts.65.w1", "model.layers.60.block_sparse_moe.experts.66.w1", "model.layers.60.block_sparse_moe.experts.67.w1", "model.layers.60.block_sparse_moe.experts.68.w1", "model.layers.60.block_sparse_moe.experts.69.w1", "model.layers.60.block_sparse_moe.experts.70.w1", "model.layers.60.block_sparse_moe.experts.71.w1", "model.layers.60.block_sparse_moe.experts.72.w1", "model.layers.60.block_sparse_moe.experts.73.w1", "model.layers.60.block_sparse_moe.experts.74.w1", "model.layers.60.block_sparse_moe.experts.75.w1", "model.layers.60.block_sparse_moe.experts.76.w1", "model.layers.60.block_sparse_moe.experts.77.w1", "model.layers.60.block_sparse_moe.experts.78.w1", "model.layers.60.block_sparse_moe.experts.79.w1", "model.layers.60.block_sparse_moe.experts.80.w1", "model.layers.60.block_sparse_moe.experts.81.w1", "model.layers.60.block_sparse_moe.experts.82.w1", "model.layers.60.block_sparse_moe.experts.83.w1", "model.layers.60.block_sparse_moe.experts.84.w1", "model.layers.60.block_sparse_moe.experts.85.w1", "model.layers.60.block_sparse_moe.experts.86.w1", "model.layers.60.block_sparse_moe.experts.87.w1", "model.layers.60.block_sparse_moe.experts.88.w1", "model.layers.60.block_sparse_moe.experts.89.w1", "model.layers.60.block_sparse_moe.experts.90.w1", "model.layers.60.block_sparse_moe.experts.91.w1", "model.layers.60.block_sparse_moe.experts.92.w1", "model.layers.60.block_sparse_moe.experts.93.w1", "model.layers.60.block_sparse_moe.experts.94.w1", "model.layers.60.block_sparse_moe.experts.95.w1", "model.layers.60.block_sparse_moe.experts.96.w1", "model.layers.60.block_sparse_moe.experts.97.w1", "model.layers.60.block_sparse_moe.experts.98.w1", "model.layers.60.block_sparse_moe.experts.99.w1", "model.layers.60.block_sparse_moe.experts.100.w1", "model.layers.60.block_sparse_moe.experts.101.w1", "model.layers.60.block_sparse_moe.experts.102.w1", "model.layers.60.block_sparse_moe.experts.103.w1", "model.layers.60.block_sparse_moe.experts.104.w1", "model.layers.60.block_sparse_moe.experts.105.w1", "model.layers.60.block_sparse_moe.experts.106.w1", "model.layers.60.block_sparse_moe.experts.107.w1", "model.layers.60.block_sparse_moe.experts.108.w1", "model.layers.60.block_sparse_moe.experts.109.w1", "model.layers.60.block_sparse_moe.experts.110.w1", "model.layers.60.block_sparse_moe.experts.111.w1", "model.layers.60.block_sparse_moe.experts.112.w1", "model.layers.60.block_sparse_moe.experts.113.w1", "model.layers.60.block_sparse_moe.experts.114.w1", "model.layers.60.block_sparse_moe.experts.115.w1", "model.layers.60.block_sparse_moe.experts.116.w1", "model.layers.60.block_sparse_moe.experts.117.w1", "model.layers.60.block_sparse_moe.experts.118.w1", "model.layers.60.block_sparse_moe.experts.119.w1", "model.layers.60.block_sparse_moe.experts.120.w1", "model.layers.60.block_sparse_moe.experts.121.w1", "model.layers.60.block_sparse_moe.experts.122.w1", "model.layers.60.block_sparse_moe.experts.123.w1", "model.layers.60.block_sparse_moe.experts.124.w1", "model.layers.60.block_sparse_moe.experts.125.w1", "model.layers.60.block_sparse_moe.experts.126.w1", "model.layers.60.block_sparse_moe.experts.127.w1", "model.layers.60.block_sparse_moe.experts.128.w1", "model.layers.60.block_sparse_moe.experts.129.w1", "model.layers.60.block_sparse_moe.experts.130.w1", "model.layers.60.block_sparse_moe.experts.131.w1", "model.layers.60.block_sparse_moe.experts.132.w1", "model.layers.60.block_sparse_moe.experts.133.w1", "model.layers.60.block_sparse_moe.experts.134.w1", "model.layers.60.block_sparse_moe.experts.135.w1", "model.layers.60.block_sparse_moe.experts.136.w1", "model.layers.60.block_sparse_moe.experts.137.w1", "model.layers.60.block_sparse_moe.experts.138.w1", "model.layers.60.block_sparse_moe.experts.139.w1", "model.layers.60.block_sparse_moe.experts.140.w1", "model.layers.60.block_sparse_moe.experts.141.w1", "model.layers.60.block_sparse_moe.experts.142.w1", "model.layers.60.block_sparse_moe.experts.143.w1", "model.layers.60.block_sparse_moe.experts.144.w1", "model.layers.60.block_sparse_moe.experts.145.w1", "model.layers.60.block_sparse_moe.experts.146.w1", "model.layers.60.block_sparse_moe.experts.147.w1", "model.layers.60.block_sparse_moe.experts.148.w1", "model.layers.60.block_sparse_moe.experts.149.w1", "model.layers.60.block_sparse_moe.experts.150.w1", "model.layers.60.block_sparse_moe.experts.151.w1", "model.layers.60.block_sparse_moe.experts.152.w1", "model.layers.60.block_sparse_moe.experts.153.w1", "model.layers.60.block_sparse_moe.experts.154.w1", "model.layers.60.block_sparse_moe.experts.155.w1", "model.layers.60.block_sparse_moe.experts.156.w1", "model.layers.60.block_sparse_moe.experts.157.w1", "model.layers.60.block_sparse_moe.experts.158.w1", "model.layers.60.block_sparse_moe.experts.159.w1", "model.layers.60.block_sparse_moe.experts.160.w1", "model.layers.60.block_sparse_moe.experts.161.w1", "model.layers.60.block_sparse_moe.experts.162.w1", "model.layers.60.block_sparse_moe.experts.163.w1", "model.layers.60.block_sparse_moe.experts.164.w1", "model.layers.60.block_sparse_moe.experts.165.w1", "model.layers.60.block_sparse_moe.experts.166.w1", "model.layers.60.block_sparse_moe.experts.167.w1", "model.layers.60.block_sparse_moe.experts.168.w1", "model.layers.60.block_sparse_moe.experts.169.w1", "model.layers.60.block_sparse_moe.experts.170.w1", "model.layers.60.block_sparse_moe.experts.171.w1", "model.layers.60.block_sparse_moe.experts.172.w1", "model.layers.60.block_sparse_moe.experts.173.w1", "model.layers.60.block_sparse_moe.experts.174.w1", "model.layers.60.block_sparse_moe.experts.175.w1", "model.layers.60.block_sparse_moe.experts.176.w1", "model.layers.60.block_sparse_moe.experts.177.w1", "model.layers.60.block_sparse_moe.experts.178.w1", "model.layers.60.block_sparse_moe.experts.179.w1", "model.layers.60.block_sparse_moe.experts.180.w1", "model.layers.60.block_sparse_moe.experts.181.w1", "model.layers.60.block_sparse_moe.experts.182.w1", "model.layers.60.block_sparse_moe.experts.183.w1", "model.layers.60.block_sparse_moe.experts.184.w1", "model.layers.60.block_sparse_moe.experts.185.w1", "model.layers.60.block_sparse_moe.experts.186.w1", "model.layers.60.block_sparse_moe.experts.187.w1", "model.layers.60.block_sparse_moe.experts.188.w1", "model.layers.60.block_sparse_moe.experts.189.w1", "model.layers.60.block_sparse_moe.experts.190.w1", "model.layers.60.block_sparse_moe.experts.191.w1", "model.layers.60.block_sparse_moe.experts.192.w1", "model.layers.60.block_sparse_moe.experts.193.w1", "model.layers.60.block_sparse_moe.experts.194.w1", "model.layers.60.block_sparse_moe.experts.195.w1", "model.layers.60.block_sparse_moe.experts.196.w1", "model.layers.60.block_sparse_moe.experts.197.w1", "model.layers.60.block_sparse_moe.experts.198.w1", "model.layers.60.block_sparse_moe.experts.199.w1", "model.layers.60.block_sparse_moe.experts.200.w1", "model.layers.60.block_sparse_moe.experts.201.w1", "model.layers.60.block_sparse_moe.experts.202.w1", "model.layers.60.block_sparse_moe.experts.203.w1", "model.layers.60.block_sparse_moe.experts.204.w1", "model.layers.60.block_sparse_moe.experts.205.w1", "model.layers.60.block_sparse_moe.experts.206.w1", "model.layers.60.block_sparse_moe.experts.207.w1", "model.layers.60.block_sparse_moe.experts.208.w1", "model.layers.60.block_sparse_moe.experts.209.w1", "model.layers.60.block_sparse_moe.experts.210.w1", "model.layers.60.block_sparse_moe.experts.211.w1", "model.layers.60.block_sparse_moe.experts.212.w1", "model.layers.60.block_sparse_moe.experts.213.w1", "model.layers.60.block_sparse_moe.experts.214.w1", "model.layers.60.block_sparse_moe.experts.215.w1", "model.layers.60.block_sparse_moe.experts.216.w1", "model.layers.60.block_sparse_moe.experts.217.w1", "model.layers.60.block_sparse_moe.experts.218.w1", "model.layers.60.block_sparse_moe.experts.219.w1", "model.layers.60.block_sparse_moe.experts.220.w1", "model.layers.60.block_sparse_moe.experts.221.w1", "model.layers.60.block_sparse_moe.experts.222.w1", "model.layers.60.block_sparse_moe.experts.223.w1", "model.layers.60.block_sparse_moe.experts.224.w1", "model.layers.60.block_sparse_moe.experts.225.w1", "model.layers.60.block_sparse_moe.experts.226.w1", "model.layers.60.block_sparse_moe.experts.227.w1", "model.layers.60.block_sparse_moe.experts.228.w1", "model.layers.60.block_sparse_moe.experts.229.w1", "model.layers.60.block_sparse_moe.experts.230.w1", "model.layers.60.block_sparse_moe.experts.231.w1", "model.layers.60.block_sparse_moe.experts.232.w1", "model.layers.60.block_sparse_moe.experts.233.w1", "model.layers.60.block_sparse_moe.experts.234.w1", "model.layers.60.block_sparse_moe.experts.235.w1", "model.layers.60.block_sparse_moe.experts.236.w1", "model.layers.60.block_sparse_moe.experts.237.w1", "model.layers.60.block_sparse_moe.experts.238.w1", "model.layers.60.block_sparse_moe.experts.239.w1", "model.layers.60.block_sparse_moe.experts.240.w1", "model.layers.60.block_sparse_moe.experts.241.w1", "model.layers.60.block_sparse_moe.experts.242.w1", "model.layers.60.block_sparse_moe.experts.243.w1", "model.layers.60.block_sparse_moe.experts.244.w1", "model.layers.60.block_sparse_moe.experts.245.w1", "model.layers.60.block_sparse_moe.experts.246.w1", "model.layers.60.block_sparse_moe.experts.247.w1", "model.layers.60.block_sparse_moe.experts.248.w1", "model.layers.60.block_sparse_moe.experts.249.w1", "model.layers.60.block_sparse_moe.experts.250.w1", "model.layers.60.block_sparse_moe.experts.251.w1", "model.layers.60.block_sparse_moe.experts.252.w1", "model.layers.60.block_sparse_moe.experts.253.w1", "model.layers.60.block_sparse_moe.experts.254.w1", "model.layers.60.block_sparse_moe.experts.255.w1", "model.layers.60.block_sparse_moe.experts.0.w3", "model.layers.60.block_sparse_moe.experts.1.w3", "model.layers.60.block_sparse_moe.experts.2.w3", "model.layers.60.block_sparse_moe.experts.3.w3", "model.layers.60.block_sparse_moe.experts.4.w3", "model.layers.60.block_sparse_moe.experts.5.w3", "model.layers.60.block_sparse_moe.experts.6.w3", "model.layers.60.block_sparse_moe.experts.7.w3", "model.layers.60.block_sparse_moe.experts.8.w3", "model.layers.60.block_sparse_moe.experts.9.w3", "model.layers.60.block_sparse_moe.experts.10.w3", "model.layers.60.block_sparse_moe.experts.11.w3", "model.layers.60.block_sparse_moe.experts.12.w3", "model.layers.60.block_sparse_moe.experts.13.w3", "model.layers.60.block_sparse_moe.experts.14.w3", "model.layers.60.block_sparse_moe.experts.15.w3", "model.layers.60.block_sparse_moe.experts.16.w3", "model.layers.60.block_sparse_moe.experts.17.w3", "model.layers.60.block_sparse_moe.experts.18.w3", "model.layers.60.block_sparse_moe.experts.19.w3", "model.layers.60.block_sparse_moe.experts.20.w3", "model.layers.60.block_sparse_moe.experts.21.w3", "model.layers.60.block_sparse_moe.experts.22.w3", "model.layers.60.block_sparse_moe.experts.23.w3", "model.layers.60.block_sparse_moe.experts.24.w3", "model.layers.60.block_sparse_moe.experts.25.w3", "model.layers.60.block_sparse_moe.experts.26.w3", "model.layers.60.block_sparse_moe.experts.27.w3", "model.layers.60.block_sparse_moe.experts.28.w3", "model.layers.60.block_sparse_moe.experts.29.w3", "model.layers.60.block_sparse_moe.experts.30.w3", "model.layers.60.block_sparse_moe.experts.31.w3", "model.layers.60.block_sparse_moe.experts.32.w3", "model.layers.60.block_sparse_moe.experts.33.w3", "model.layers.60.block_sparse_moe.experts.34.w3", "model.layers.60.block_sparse_moe.experts.35.w3", "model.layers.60.block_sparse_moe.experts.36.w3", "model.layers.60.block_sparse_moe.experts.37.w3", "model.layers.60.block_sparse_moe.experts.38.w3", "model.layers.60.block_sparse_moe.experts.39.w3", "model.layers.60.block_sparse_moe.experts.40.w3", "model.layers.60.block_sparse_moe.experts.41.w3", "model.layers.60.block_sparse_moe.experts.42.w3", "model.layers.60.block_sparse_moe.experts.43.w3", "model.layers.60.block_sparse_moe.experts.44.w3", "model.layers.60.block_sparse_moe.experts.45.w3", "model.layers.60.block_sparse_moe.experts.46.w3", "model.layers.60.block_sparse_moe.experts.47.w3", "model.layers.60.block_sparse_moe.experts.48.w3", "model.layers.60.block_sparse_moe.experts.49.w3", "model.layers.60.block_sparse_moe.experts.50.w3", "model.layers.60.block_sparse_moe.experts.51.w3", "model.layers.60.block_sparse_moe.experts.52.w3", "model.layers.60.block_sparse_moe.experts.53.w3", "model.layers.60.block_sparse_moe.experts.54.w3", "model.layers.60.block_sparse_moe.experts.55.w3", "model.layers.60.block_sparse_moe.experts.56.w3", "model.layers.60.block_sparse_moe.experts.57.w3", "model.layers.60.block_sparse_moe.experts.58.w3", "model.layers.60.block_sparse_moe.experts.59.w3", "model.layers.60.block_sparse_moe.experts.60.w3", "model.layers.60.block_sparse_moe.experts.61.w3", "model.layers.60.block_sparse_moe.experts.62.w3", "model.layers.60.block_sparse_moe.experts.63.w3", "model.layers.60.block_sparse_moe.experts.64.w3", "model.layers.60.block_sparse_moe.experts.65.w3", "model.layers.60.block_sparse_moe.experts.66.w3", "model.layers.60.block_sparse_moe.experts.67.w3", "model.layers.60.block_sparse_moe.experts.68.w3", "model.layers.60.block_sparse_moe.experts.69.w3", "model.layers.60.block_sparse_moe.experts.70.w3", "model.layers.60.block_sparse_moe.experts.71.w3", "model.layers.60.block_sparse_moe.experts.72.w3", "model.layers.60.block_sparse_moe.experts.73.w3", "model.layers.60.block_sparse_moe.experts.74.w3", "model.layers.60.block_sparse_moe.experts.75.w3", "model.layers.60.block_sparse_moe.experts.76.w3", "model.layers.60.block_sparse_moe.experts.77.w3", "model.layers.60.block_sparse_moe.experts.78.w3", "model.layers.60.block_sparse_moe.experts.79.w3", "model.layers.60.block_sparse_moe.experts.80.w3", "model.layers.60.block_sparse_moe.experts.81.w3", "model.layers.60.block_sparse_moe.experts.82.w3", "model.layers.60.block_sparse_moe.experts.83.w3", "model.layers.60.block_sparse_moe.experts.84.w3", "model.layers.60.block_sparse_moe.experts.85.w3", "model.layers.60.block_sparse_moe.experts.86.w3", "model.layers.60.block_sparse_moe.experts.87.w3", "model.layers.60.block_sparse_moe.experts.88.w3", "model.layers.60.block_sparse_moe.experts.89.w3", "model.layers.60.block_sparse_moe.experts.90.w3", "model.layers.60.block_sparse_moe.experts.91.w3", "model.layers.60.block_sparse_moe.experts.92.w3", "model.layers.60.block_sparse_moe.experts.93.w3", "model.layers.60.block_sparse_moe.experts.94.w3", "model.layers.60.block_sparse_moe.experts.95.w3", "model.layers.60.block_sparse_moe.experts.96.w3", "model.layers.60.block_sparse_moe.experts.97.w3", "model.layers.60.block_sparse_moe.experts.98.w3", "model.layers.60.block_sparse_moe.experts.99.w3", "model.layers.60.block_sparse_moe.experts.100.w3", "model.layers.60.block_sparse_moe.experts.101.w3", "model.layers.60.block_sparse_moe.experts.102.w3", "model.layers.60.block_sparse_moe.experts.103.w3", "model.layers.60.block_sparse_moe.experts.104.w3", "model.layers.60.block_sparse_moe.experts.105.w3", "model.layers.60.block_sparse_moe.experts.106.w3", "model.layers.60.block_sparse_moe.experts.107.w3", "model.layers.60.block_sparse_moe.experts.108.w3", "model.layers.60.block_sparse_moe.experts.109.w3", "model.layers.60.block_sparse_moe.experts.110.w3", "model.layers.60.block_sparse_moe.experts.111.w3", "model.layers.60.block_sparse_moe.experts.112.w3", "model.layers.60.block_sparse_moe.experts.113.w3", "model.layers.60.block_sparse_moe.experts.114.w3", "model.layers.60.block_sparse_moe.experts.115.w3", "model.layers.60.block_sparse_moe.experts.116.w3", "model.layers.60.block_sparse_moe.experts.117.w3", "model.layers.60.block_sparse_moe.experts.118.w3", "model.layers.60.block_sparse_moe.experts.119.w3", "model.layers.60.block_sparse_moe.experts.120.w3", "model.layers.60.block_sparse_moe.experts.121.w3", "model.layers.60.block_sparse_moe.experts.122.w3", "model.layers.60.block_sparse_moe.experts.123.w3", "model.layers.60.block_sparse_moe.experts.124.w3", "model.layers.60.block_sparse_moe.experts.125.w3", "model.layers.60.block_sparse_moe.experts.126.w3", "model.layers.60.block_sparse_moe.experts.127.w3", "model.layers.60.block_sparse_moe.experts.128.w3", "model.layers.60.block_sparse_moe.experts.129.w3", "model.layers.60.block_sparse_moe.experts.130.w3", "model.layers.60.block_sparse_moe.experts.131.w3", "model.layers.60.block_sparse_moe.experts.132.w3", "model.layers.60.block_sparse_moe.experts.133.w3", "model.layers.60.block_sparse_moe.experts.134.w3", "model.layers.60.block_sparse_moe.experts.135.w3", "model.layers.60.block_sparse_moe.experts.136.w3", "model.layers.60.block_sparse_moe.experts.137.w3", "model.layers.60.block_sparse_moe.experts.138.w3", "model.layers.60.block_sparse_moe.experts.139.w3", "model.layers.60.block_sparse_moe.experts.140.w3", "model.layers.60.block_sparse_moe.experts.141.w3", "model.layers.60.block_sparse_moe.experts.142.w3", "model.layers.60.block_sparse_moe.experts.143.w3", "model.layers.60.block_sparse_moe.experts.144.w3", "model.layers.60.block_sparse_moe.experts.145.w3", "model.layers.60.block_sparse_moe.experts.146.w3", "model.layers.60.block_sparse_moe.experts.147.w3", "model.layers.60.block_sparse_moe.experts.148.w3", "model.layers.60.block_sparse_moe.experts.149.w3", "model.layers.60.block_sparse_moe.experts.150.w3", "model.layers.60.block_sparse_moe.experts.151.w3", "model.layers.60.block_sparse_moe.experts.152.w3", "model.layers.60.block_sparse_moe.experts.153.w3", "model.layers.60.block_sparse_moe.experts.154.w3", "model.layers.60.block_sparse_moe.experts.155.w3", "model.layers.60.block_sparse_moe.experts.156.w3", "model.layers.60.block_sparse_moe.experts.157.w3", "model.layers.60.block_sparse_moe.experts.158.w3", "model.layers.60.block_sparse_moe.experts.159.w3", "model.layers.60.block_sparse_moe.experts.160.w3", "model.layers.60.block_sparse_moe.experts.161.w3", "model.layers.60.block_sparse_moe.experts.162.w3", "model.layers.60.block_sparse_moe.experts.163.w3", "model.layers.60.block_sparse_moe.experts.164.w3", "model.layers.60.block_sparse_moe.experts.165.w3", "model.layers.60.block_sparse_moe.experts.166.w3", "model.layers.60.block_sparse_moe.experts.167.w3", "model.layers.60.block_sparse_moe.experts.168.w3", "model.layers.60.block_sparse_moe.experts.169.w3", "model.layers.60.block_sparse_moe.experts.170.w3", "model.layers.60.block_sparse_moe.experts.171.w3", "model.layers.60.block_sparse_moe.experts.172.w3", "model.layers.60.block_sparse_moe.experts.173.w3", "model.layers.60.block_sparse_moe.experts.174.w3", "model.layers.60.block_sparse_moe.experts.175.w3", "model.layers.60.block_sparse_moe.experts.176.w3", "model.layers.60.block_sparse_moe.experts.177.w3", "model.layers.60.block_sparse_moe.experts.178.w3", "model.layers.60.block_sparse_moe.experts.179.w3", "model.layers.60.block_sparse_moe.experts.180.w3", "model.layers.60.block_sparse_moe.experts.181.w3", "model.layers.60.block_sparse_moe.experts.182.w3", "model.layers.60.block_sparse_moe.experts.183.w3", "model.layers.60.block_sparse_moe.experts.184.w3", "model.layers.60.block_sparse_moe.experts.185.w3", "model.layers.60.block_sparse_moe.experts.186.w3", "model.layers.60.block_sparse_moe.experts.187.w3", "model.layers.60.block_sparse_moe.experts.188.w3", "model.layers.60.block_sparse_moe.experts.189.w3", "model.layers.60.block_sparse_moe.experts.190.w3", "model.layers.60.block_sparse_moe.experts.191.w3", "model.layers.60.block_sparse_moe.experts.192.w3", "model.layers.60.block_sparse_moe.experts.193.w3", "model.layers.60.block_sparse_moe.experts.194.w3", "model.layers.60.block_sparse_moe.experts.195.w3", "model.layers.60.block_sparse_moe.experts.196.w3", "model.layers.60.block_sparse_moe.experts.197.w3", "model.layers.60.block_sparse_moe.experts.198.w3", "model.layers.60.block_sparse_moe.experts.199.w3", "model.layers.60.block_sparse_moe.experts.200.w3", "model.layers.60.block_sparse_moe.experts.201.w3", "model.layers.60.block_sparse_moe.experts.202.w3", "model.layers.60.block_sparse_moe.experts.203.w3", "model.layers.60.block_sparse_moe.experts.204.w3", "model.layers.60.block_sparse_moe.experts.205.w3", "model.layers.60.block_sparse_moe.experts.206.w3", "model.layers.60.block_sparse_moe.experts.207.w3", "model.layers.60.block_sparse_moe.experts.208.w3", "model.layers.60.block_sparse_moe.experts.209.w3", "model.layers.60.block_sparse_moe.experts.210.w3", "model.layers.60.block_sparse_moe.experts.211.w3", "model.layers.60.block_sparse_moe.experts.212.w3", "model.layers.60.block_sparse_moe.experts.213.w3", "model.layers.60.block_sparse_moe.experts.214.w3", "model.layers.60.block_sparse_moe.experts.215.w3", "model.layers.60.block_sparse_moe.experts.216.w3", "model.layers.60.block_sparse_moe.experts.217.w3", "model.layers.60.block_sparse_moe.experts.218.w3", "model.layers.60.block_sparse_moe.experts.219.w3", "model.layers.60.block_sparse_moe.experts.220.w3", "model.layers.60.block_sparse_moe.experts.221.w3", "model.layers.60.block_sparse_moe.experts.222.w3", "model.layers.60.block_sparse_moe.experts.223.w3", "model.layers.60.block_sparse_moe.experts.224.w3", "model.layers.60.block_sparse_moe.experts.225.w3", "model.layers.60.block_sparse_moe.experts.226.w3", "model.layers.60.block_sparse_moe.experts.227.w3", "model.layers.60.block_sparse_moe.experts.228.w3", "model.layers.60.block_sparse_moe.experts.229.w3", "model.layers.60.block_sparse_moe.experts.230.w3", "model.layers.60.block_sparse_moe.experts.231.w3", "model.layers.60.block_sparse_moe.experts.232.w3", "model.layers.60.block_sparse_moe.experts.233.w3", "model.layers.60.block_sparse_moe.experts.234.w3", "model.layers.60.block_sparse_moe.experts.235.w3", "model.layers.60.block_sparse_moe.experts.236.w3", "model.layers.60.block_sparse_moe.experts.237.w3", "model.layers.60.block_sparse_moe.experts.238.w3", "model.layers.60.block_sparse_moe.experts.239.w3", "model.layers.60.block_sparse_moe.experts.240.w3", "model.layers.60.block_sparse_moe.experts.241.w3", "model.layers.60.block_sparse_moe.experts.242.w3", "model.layers.60.block_sparse_moe.experts.243.w3", "model.layers.60.block_sparse_moe.experts.244.w3", "model.layers.60.block_sparse_moe.experts.245.w3", "model.layers.60.block_sparse_moe.experts.246.w3", "model.layers.60.block_sparse_moe.experts.247.w3", "model.layers.60.block_sparse_moe.experts.248.w3", "model.layers.60.block_sparse_moe.experts.249.w3", "model.layers.60.block_sparse_moe.experts.250.w3", "model.layers.60.block_sparse_moe.experts.251.w3", "model.layers.60.block_sparse_moe.experts.252.w3", "model.layers.60.block_sparse_moe.experts.253.w3", "model.layers.60.block_sparse_moe.experts.254.w3", "model.layers.60.block_sparse_moe.experts.255.w3", "model.layers.60.block_sparse_moe.experts.0.w2", "model.layers.60.block_sparse_moe.experts.1.w2", "model.layers.60.block_sparse_moe.experts.2.w2", "model.layers.60.block_sparse_moe.experts.3.w2", "model.layers.60.block_sparse_moe.experts.4.w2", "model.layers.60.block_sparse_moe.experts.5.w2", "model.layers.60.block_sparse_moe.experts.6.w2", "model.layers.60.block_sparse_moe.experts.7.w2", "model.layers.60.block_sparse_moe.experts.8.w2", "model.layers.60.block_sparse_moe.experts.9.w2", "model.layers.60.block_sparse_moe.experts.10.w2", "model.layers.60.block_sparse_moe.experts.11.w2", "model.layers.60.block_sparse_moe.experts.12.w2", "model.layers.60.block_sparse_moe.experts.13.w2", "model.layers.60.block_sparse_moe.experts.14.w2", "model.layers.60.block_sparse_moe.experts.15.w2", "model.layers.60.block_sparse_moe.experts.16.w2", "model.layers.60.block_sparse_moe.experts.17.w2", "model.layers.60.block_sparse_moe.experts.18.w2", "model.layers.60.block_sparse_moe.experts.19.w2", "model.layers.60.block_sparse_moe.experts.20.w2", "model.layers.60.block_sparse_moe.experts.21.w2", "model.layers.60.block_sparse_moe.experts.22.w2", "model.layers.60.block_sparse_moe.experts.23.w2", "model.layers.60.block_sparse_moe.experts.24.w2", "model.layers.60.block_sparse_moe.experts.25.w2", "model.layers.60.block_sparse_moe.experts.26.w2", "model.layers.60.block_sparse_moe.experts.27.w2", "model.layers.60.block_sparse_moe.experts.28.w2", "model.layers.60.block_sparse_moe.experts.29.w2", "model.layers.60.block_sparse_moe.experts.30.w2", "model.layers.60.block_sparse_moe.experts.31.w2", "model.layers.60.block_sparse_moe.experts.32.w2", "model.layers.60.block_sparse_moe.experts.33.w2", "model.layers.60.block_sparse_moe.experts.34.w2", "model.layers.60.block_sparse_moe.experts.35.w2", "model.layers.60.block_sparse_moe.experts.36.w2", "model.layers.60.block_sparse_moe.experts.37.w2", "model.layers.60.block_sparse_moe.experts.38.w2", "model.layers.60.block_sparse_moe.experts.39.w2", "model.layers.60.block_sparse_moe.experts.40.w2", "model.layers.60.block_sparse_moe.experts.41.w2", "model.layers.60.block_sparse_moe.experts.42.w2", "model.layers.60.block_sparse_moe.experts.43.w2", "model.layers.60.block_sparse_moe.experts.44.w2", "model.layers.60.block_sparse_moe.experts.45.w2", "model.layers.60.block_sparse_moe.experts.46.w2", "model.layers.60.block_sparse_moe.experts.47.w2", "model.layers.60.block_sparse_moe.experts.48.w2", "model.layers.60.block_sparse_moe.experts.49.w2", "model.layers.60.block_sparse_moe.experts.50.w2", "model.layers.60.block_sparse_moe.experts.51.w2", "model.layers.60.block_sparse_moe.experts.52.w2", "model.layers.60.block_sparse_moe.experts.53.w2", "model.layers.60.block_sparse_moe.experts.54.w2", "model.layers.60.block_sparse_moe.experts.55.w2", "model.layers.60.block_sparse_moe.experts.56.w2", "model.layers.60.block_sparse_moe.experts.57.w2", "model.layers.60.block_sparse_moe.experts.58.w2", "model.layers.60.block_sparse_moe.experts.59.w2", "model.layers.60.block_sparse_moe.experts.60.w2", "model.layers.60.block_sparse_moe.experts.61.w2", "model.layers.60.block_sparse_moe.experts.62.w2", "model.layers.60.block_sparse_moe.experts.63.w2", "model.layers.60.block_sparse_moe.experts.64.w2", "model.layers.60.block_sparse_moe.experts.65.w2", "model.layers.60.block_sparse_moe.experts.66.w2", "model.layers.60.block_sparse_moe.experts.67.w2", "model.layers.60.block_sparse_moe.experts.68.w2", "model.layers.60.block_sparse_moe.experts.69.w2", "model.layers.60.block_sparse_moe.experts.70.w2", "model.layers.60.block_sparse_moe.experts.71.w2", "model.layers.60.block_sparse_moe.experts.72.w2", "model.layers.60.block_sparse_moe.experts.73.w2", "model.layers.60.block_sparse_moe.experts.74.w2", "model.layers.60.block_sparse_moe.experts.75.w2", "model.layers.60.block_sparse_moe.experts.76.w2", "model.layers.60.block_sparse_moe.experts.77.w2", "model.layers.60.block_sparse_moe.experts.78.w2", "model.layers.60.block_sparse_moe.experts.79.w2", "model.layers.60.block_sparse_moe.experts.80.w2", "model.layers.60.block_sparse_moe.experts.81.w2", "model.layers.60.block_sparse_moe.experts.82.w2", "model.layers.60.block_sparse_moe.experts.83.w2", "model.layers.60.block_sparse_moe.experts.84.w2", "model.layers.60.block_sparse_moe.experts.85.w2", "model.layers.60.block_sparse_moe.experts.86.w2", "model.layers.60.block_sparse_moe.experts.87.w2", "model.layers.60.block_sparse_moe.experts.88.w2", "model.layers.60.block_sparse_moe.experts.89.w2", "model.layers.60.block_sparse_moe.experts.90.w2", "model.layers.60.block_sparse_moe.experts.91.w2", "model.layers.60.block_sparse_moe.experts.92.w2", "model.layers.60.block_sparse_moe.experts.93.w2", "model.layers.60.block_sparse_moe.experts.94.w2", "model.layers.60.block_sparse_moe.experts.95.w2", "model.layers.60.block_sparse_moe.experts.96.w2", "model.layers.60.block_sparse_moe.experts.97.w2", "model.layers.60.block_sparse_moe.experts.98.w2", "model.layers.60.block_sparse_moe.experts.99.w2", "model.layers.60.block_sparse_moe.experts.100.w2", "model.layers.60.block_sparse_moe.experts.101.w2", "model.layers.60.block_sparse_moe.experts.102.w2", "model.layers.60.block_sparse_moe.experts.103.w2", "model.layers.60.block_sparse_moe.experts.104.w2", "model.layers.60.block_sparse_moe.experts.105.w2", "model.layers.60.block_sparse_moe.experts.106.w2", "model.layers.60.block_sparse_moe.experts.107.w2", "model.layers.60.block_sparse_moe.experts.108.w2", "model.layers.60.block_sparse_moe.experts.109.w2", "model.layers.60.block_sparse_moe.experts.110.w2", "model.layers.60.block_sparse_moe.experts.111.w2", "model.layers.60.block_sparse_moe.experts.112.w2", "model.layers.60.block_sparse_moe.experts.113.w2", "model.layers.60.block_sparse_moe.experts.114.w2", "model.layers.60.block_sparse_moe.experts.115.w2", "model.layers.60.block_sparse_moe.experts.116.w2", "model.layers.60.block_sparse_moe.experts.117.w2", "model.layers.60.block_sparse_moe.experts.118.w2", "model.layers.60.block_sparse_moe.experts.119.w2", "model.layers.60.block_sparse_moe.experts.120.w2", "model.layers.60.block_sparse_moe.experts.121.w2", "model.layers.60.block_sparse_moe.experts.122.w2", "model.layers.60.block_sparse_moe.experts.123.w2", "model.layers.60.block_sparse_moe.experts.124.w2", "model.layers.60.block_sparse_moe.experts.125.w2", "model.layers.60.block_sparse_moe.experts.126.w2", "model.layers.60.block_sparse_moe.experts.127.w2", "model.layers.60.block_sparse_moe.experts.128.w2", "model.layers.60.block_sparse_moe.experts.129.w2", "model.layers.60.block_sparse_moe.experts.130.w2", "model.layers.60.block_sparse_moe.experts.131.w2", "model.layers.60.block_sparse_moe.experts.132.w2", "model.layers.60.block_sparse_moe.experts.133.w2", "model.layers.60.block_sparse_moe.experts.134.w2", "model.layers.60.block_sparse_moe.experts.135.w2", "model.layers.60.block_sparse_moe.experts.136.w2", "model.layers.60.block_sparse_moe.experts.137.w2", "model.layers.60.block_sparse_moe.experts.138.w2", "model.layers.60.block_sparse_moe.experts.139.w2", "model.layers.60.block_sparse_moe.experts.140.w2", "model.layers.60.block_sparse_moe.experts.141.w2", "model.layers.60.block_sparse_moe.experts.142.w2", "model.layers.60.block_sparse_moe.experts.143.w2", "model.layers.60.block_sparse_moe.experts.144.w2", "model.layers.60.block_sparse_moe.experts.145.w2", "model.layers.60.block_sparse_moe.experts.146.w2", "model.layers.60.block_sparse_moe.experts.147.w2", "model.layers.60.block_sparse_moe.experts.148.w2", "model.layers.60.block_sparse_moe.experts.149.w2", "model.layers.60.block_sparse_moe.experts.150.w2", "model.layers.60.block_sparse_moe.experts.151.w2", "model.layers.60.block_sparse_moe.experts.152.w2", "model.layers.60.block_sparse_moe.experts.153.w2", "model.layers.60.block_sparse_moe.experts.154.w2", "model.layers.60.block_sparse_moe.experts.155.w2", "model.layers.60.block_sparse_moe.experts.156.w2", "model.layers.60.block_sparse_moe.experts.157.w2", "model.layers.60.block_sparse_moe.experts.158.w2", "model.layers.60.block_sparse_moe.experts.159.w2", "model.layers.60.block_sparse_moe.experts.160.w2", "model.layers.60.block_sparse_moe.experts.161.w2", "model.layers.60.block_sparse_moe.experts.162.w2", "model.layers.60.block_sparse_moe.experts.163.w2", "model.layers.60.block_sparse_moe.experts.164.w2", "model.layers.60.block_sparse_moe.experts.165.w2", "model.layers.60.block_sparse_moe.experts.166.w2", "model.layers.60.block_sparse_moe.experts.167.w2", "model.layers.60.block_sparse_moe.experts.168.w2", "model.layers.60.block_sparse_moe.experts.169.w2", "model.layers.60.block_sparse_moe.experts.170.w2", "model.layers.60.block_sparse_moe.experts.171.w2", "model.layers.60.block_sparse_moe.experts.172.w2", "model.layers.60.block_sparse_moe.experts.173.w2", "model.layers.60.block_sparse_moe.experts.174.w2", "model.layers.60.block_sparse_moe.experts.175.w2", "model.layers.60.block_sparse_moe.experts.176.w2", "model.layers.60.block_sparse_moe.experts.177.w2", "model.layers.60.block_sparse_moe.experts.178.w2", "model.layers.60.block_sparse_moe.experts.179.w2", "model.layers.60.block_sparse_moe.experts.180.w2", "model.layers.60.block_sparse_moe.experts.181.w2", "model.layers.60.block_sparse_moe.experts.182.w2", "model.layers.60.block_sparse_moe.experts.183.w2", "model.layers.60.block_sparse_moe.experts.184.w2", "model.layers.60.block_sparse_moe.experts.185.w2", "model.layers.60.block_sparse_moe.experts.186.w2", "model.layers.60.block_sparse_moe.experts.187.w2", "model.layers.60.block_sparse_moe.experts.188.w2", "model.layers.60.block_sparse_moe.experts.189.w2", "model.layers.60.block_sparse_moe.experts.190.w2", "model.layers.60.block_sparse_moe.experts.191.w2", "model.layers.60.block_sparse_moe.experts.192.w2", "model.layers.60.block_sparse_moe.experts.193.w2", "model.layers.60.block_sparse_moe.experts.194.w2", "model.layers.60.block_sparse_moe.experts.195.w2", "model.layers.60.block_sparse_moe.experts.196.w2", "model.layers.60.block_sparse_moe.experts.197.w2", "model.layers.60.block_sparse_moe.experts.198.w2", "model.layers.60.block_sparse_moe.experts.199.w2", "model.layers.60.block_sparse_moe.experts.200.w2", "model.layers.60.block_sparse_moe.experts.201.w2", "model.layers.60.block_sparse_moe.experts.202.w2", "model.layers.60.block_sparse_moe.experts.203.w2", "model.layers.60.block_sparse_moe.experts.204.w2", "model.layers.60.block_sparse_moe.experts.205.w2", "model.layers.60.block_sparse_moe.experts.206.w2", "model.layers.60.block_sparse_moe.experts.207.w2", "model.layers.60.block_sparse_moe.experts.208.w2", "model.layers.60.block_sparse_moe.experts.209.w2", "model.layers.60.block_sparse_moe.experts.210.w2", "model.layers.60.block_sparse_moe.experts.211.w2", "model.layers.60.block_sparse_moe.experts.212.w2", "model.layers.60.block_sparse_moe.experts.213.w2", "model.layers.60.block_sparse_moe.experts.214.w2", "model.layers.60.block_sparse_moe.experts.215.w2", "model.layers.60.block_sparse_moe.experts.216.w2", "model.layers.60.block_sparse_moe.experts.217.w2", "model.layers.60.block_sparse_moe.experts.218.w2", "model.layers.60.block_sparse_moe.experts.219.w2", "model.layers.60.block_sparse_moe.experts.220.w2", "model.layers.60.block_sparse_moe.experts.221.w2", "model.layers.60.block_sparse_moe.experts.222.w2", "model.layers.60.block_sparse_moe.experts.223.w2", "model.layers.60.block_sparse_moe.experts.224.w2", "model.layers.60.block_sparse_moe.experts.225.w2", "model.layers.60.block_sparse_moe.experts.226.w2", "model.layers.60.block_sparse_moe.experts.227.w2", "model.layers.60.block_sparse_moe.experts.228.w2", "model.layers.60.block_sparse_moe.experts.229.w2", "model.layers.60.block_sparse_moe.experts.230.w2", "model.layers.60.block_sparse_moe.experts.231.w2", "model.layers.60.block_sparse_moe.experts.232.w2", "model.layers.60.block_sparse_moe.experts.233.w2", "model.layers.60.block_sparse_moe.experts.234.w2", "model.layers.60.block_sparse_moe.experts.235.w2", "model.layers.60.block_sparse_moe.experts.236.w2", "model.layers.60.block_sparse_moe.experts.237.w2", "model.layers.60.block_sparse_moe.experts.238.w2", "model.layers.60.block_sparse_moe.experts.239.w2", "model.layers.60.block_sparse_moe.experts.240.w2", "model.layers.60.block_sparse_moe.experts.241.w2", "model.layers.60.block_sparse_moe.experts.242.w2", "model.layers.60.block_sparse_moe.experts.243.w2", "model.layers.60.block_sparse_moe.experts.244.w2", "model.layers.60.block_sparse_moe.experts.245.w2", "model.layers.60.block_sparse_moe.experts.246.w2", "model.layers.60.block_sparse_moe.experts.247.w2", "model.layers.60.block_sparse_moe.experts.248.w2", "model.layers.60.block_sparse_moe.experts.249.w2", "model.layers.60.block_sparse_moe.experts.250.w2", "model.layers.60.block_sparse_moe.experts.251.w2", "model.layers.60.block_sparse_moe.experts.252.w2", "model.layers.60.block_sparse_moe.experts.253.w2", "model.layers.60.block_sparse_moe.experts.254.w2", "model.layers.60.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 4.574581980704151e-05, "dbits": 3623878656 } ] }, { "idx": 122, "layers": [ "model.layers.61.self_attn.q_proj", "model.layers.61.self_attn.k_proj", "model.layers.61.self_attn.v_proj", "model.layers.61.self_attn.o_proj" ], "candidates": [ { "dkld": 0.00036310553550722435, "dbits": 44040192 } ] }, { "idx": 123, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w1", "model.layers.61.block_sparse_moe.experts.1.w1", "model.layers.61.block_sparse_moe.experts.2.w1", "model.layers.61.block_sparse_moe.experts.3.w1", "model.layers.61.block_sparse_moe.experts.4.w1", "model.layers.61.block_sparse_moe.experts.5.w1", "model.layers.61.block_sparse_moe.experts.6.w1", "model.layers.61.block_sparse_moe.experts.7.w1", "model.layers.61.block_sparse_moe.experts.8.w1", "model.layers.61.block_sparse_moe.experts.9.w1", "model.layers.61.block_sparse_moe.experts.10.w1", "model.layers.61.block_sparse_moe.experts.11.w1", "model.layers.61.block_sparse_moe.experts.12.w1", "model.layers.61.block_sparse_moe.experts.13.w1", "model.layers.61.block_sparse_moe.experts.14.w1", "model.layers.61.block_sparse_moe.experts.15.w1", "model.layers.61.block_sparse_moe.experts.16.w1", "model.layers.61.block_sparse_moe.experts.17.w1", "model.layers.61.block_sparse_moe.experts.18.w1", "model.layers.61.block_sparse_moe.experts.19.w1", "model.layers.61.block_sparse_moe.experts.20.w1", "model.layers.61.block_sparse_moe.experts.21.w1", "model.layers.61.block_sparse_moe.experts.22.w1", "model.layers.61.block_sparse_moe.experts.23.w1", "model.layers.61.block_sparse_moe.experts.24.w1", "model.layers.61.block_sparse_moe.experts.25.w1", "model.layers.61.block_sparse_moe.experts.26.w1", "model.layers.61.block_sparse_moe.experts.27.w1", "model.layers.61.block_sparse_moe.experts.28.w1", "model.layers.61.block_sparse_moe.experts.29.w1", "model.layers.61.block_sparse_moe.experts.30.w1", "model.layers.61.block_sparse_moe.experts.31.w1", "model.layers.61.block_sparse_moe.experts.32.w1", "model.layers.61.block_sparse_moe.experts.33.w1", "model.layers.61.block_sparse_moe.experts.34.w1", "model.layers.61.block_sparse_moe.experts.35.w1", "model.layers.61.block_sparse_moe.experts.36.w1", "model.layers.61.block_sparse_moe.experts.37.w1", "model.layers.61.block_sparse_moe.experts.38.w1", "model.layers.61.block_sparse_moe.experts.39.w1", "model.layers.61.block_sparse_moe.experts.40.w1", "model.layers.61.block_sparse_moe.experts.41.w1", "model.layers.61.block_sparse_moe.experts.42.w1", "model.layers.61.block_sparse_moe.experts.43.w1", "model.layers.61.block_sparse_moe.experts.44.w1", "model.layers.61.block_sparse_moe.experts.45.w1", "model.layers.61.block_sparse_moe.experts.46.w1", "model.layers.61.block_sparse_moe.experts.47.w1", "model.layers.61.block_sparse_moe.experts.48.w1", "model.layers.61.block_sparse_moe.experts.49.w1", "model.layers.61.block_sparse_moe.experts.50.w1", "model.layers.61.block_sparse_moe.experts.51.w1", "model.layers.61.block_sparse_moe.experts.52.w1", "model.layers.61.block_sparse_moe.experts.53.w1", "model.layers.61.block_sparse_moe.experts.54.w1", "model.layers.61.block_sparse_moe.experts.55.w1", "model.layers.61.block_sparse_moe.experts.56.w1", "model.layers.61.block_sparse_moe.experts.57.w1", "model.layers.61.block_sparse_moe.experts.58.w1", "model.layers.61.block_sparse_moe.experts.59.w1", "model.layers.61.block_sparse_moe.experts.60.w1", "model.layers.61.block_sparse_moe.experts.61.w1", "model.layers.61.block_sparse_moe.experts.62.w1", "model.layers.61.block_sparse_moe.experts.63.w1", "model.layers.61.block_sparse_moe.experts.64.w1", "model.layers.61.block_sparse_moe.experts.65.w1", "model.layers.61.block_sparse_moe.experts.66.w1", "model.layers.61.block_sparse_moe.experts.67.w1", "model.layers.61.block_sparse_moe.experts.68.w1", "model.layers.61.block_sparse_moe.experts.69.w1", "model.layers.61.block_sparse_moe.experts.70.w1", "model.layers.61.block_sparse_moe.experts.71.w1", "model.layers.61.block_sparse_moe.experts.72.w1", "model.layers.61.block_sparse_moe.experts.73.w1", "model.layers.61.block_sparse_moe.experts.74.w1", "model.layers.61.block_sparse_moe.experts.75.w1", "model.layers.61.block_sparse_moe.experts.76.w1", "model.layers.61.block_sparse_moe.experts.77.w1", "model.layers.61.block_sparse_moe.experts.78.w1", "model.layers.61.block_sparse_moe.experts.79.w1", "model.layers.61.block_sparse_moe.experts.80.w1", "model.layers.61.block_sparse_moe.experts.81.w1", "model.layers.61.block_sparse_moe.experts.82.w1", "model.layers.61.block_sparse_moe.experts.83.w1", "model.layers.61.block_sparse_moe.experts.84.w1", "model.layers.61.block_sparse_moe.experts.85.w1", "model.layers.61.block_sparse_moe.experts.86.w1", "model.layers.61.block_sparse_moe.experts.87.w1", "model.layers.61.block_sparse_moe.experts.88.w1", "model.layers.61.block_sparse_moe.experts.89.w1", "model.layers.61.block_sparse_moe.experts.90.w1", "model.layers.61.block_sparse_moe.experts.91.w1", "model.layers.61.block_sparse_moe.experts.92.w1", "model.layers.61.block_sparse_moe.experts.93.w1", "model.layers.61.block_sparse_moe.experts.94.w1", "model.layers.61.block_sparse_moe.experts.95.w1", "model.layers.61.block_sparse_moe.experts.96.w1", "model.layers.61.block_sparse_moe.experts.97.w1", "model.layers.61.block_sparse_moe.experts.98.w1", "model.layers.61.block_sparse_moe.experts.99.w1", "model.layers.61.block_sparse_moe.experts.100.w1", "model.layers.61.block_sparse_moe.experts.101.w1", "model.layers.61.block_sparse_moe.experts.102.w1", "model.layers.61.block_sparse_moe.experts.103.w1", "model.layers.61.block_sparse_moe.experts.104.w1", "model.layers.61.block_sparse_moe.experts.105.w1", "model.layers.61.block_sparse_moe.experts.106.w1", "model.layers.61.block_sparse_moe.experts.107.w1", "model.layers.61.block_sparse_moe.experts.108.w1", "model.layers.61.block_sparse_moe.experts.109.w1", "model.layers.61.block_sparse_moe.experts.110.w1", "model.layers.61.block_sparse_moe.experts.111.w1", "model.layers.61.block_sparse_moe.experts.112.w1", "model.layers.61.block_sparse_moe.experts.113.w1", "model.layers.61.block_sparse_moe.experts.114.w1", "model.layers.61.block_sparse_moe.experts.115.w1", "model.layers.61.block_sparse_moe.experts.116.w1", "model.layers.61.block_sparse_moe.experts.117.w1", "model.layers.61.block_sparse_moe.experts.118.w1", "model.layers.61.block_sparse_moe.experts.119.w1", "model.layers.61.block_sparse_moe.experts.120.w1", "model.layers.61.block_sparse_moe.experts.121.w1", "model.layers.61.block_sparse_moe.experts.122.w1", "model.layers.61.block_sparse_moe.experts.123.w1", "model.layers.61.block_sparse_moe.experts.124.w1", "model.layers.61.block_sparse_moe.experts.125.w1", "model.layers.61.block_sparse_moe.experts.126.w1", "model.layers.61.block_sparse_moe.experts.127.w1", "model.layers.61.block_sparse_moe.experts.128.w1", "model.layers.61.block_sparse_moe.experts.129.w1", "model.layers.61.block_sparse_moe.experts.130.w1", "model.layers.61.block_sparse_moe.experts.131.w1", "model.layers.61.block_sparse_moe.experts.132.w1", "model.layers.61.block_sparse_moe.experts.133.w1", "model.layers.61.block_sparse_moe.experts.134.w1", "model.layers.61.block_sparse_moe.experts.135.w1", "model.layers.61.block_sparse_moe.experts.136.w1", "model.layers.61.block_sparse_moe.experts.137.w1", "model.layers.61.block_sparse_moe.experts.138.w1", "model.layers.61.block_sparse_moe.experts.139.w1", "model.layers.61.block_sparse_moe.experts.140.w1", "model.layers.61.block_sparse_moe.experts.141.w1", "model.layers.61.block_sparse_moe.experts.142.w1", "model.layers.61.block_sparse_moe.experts.143.w1", "model.layers.61.block_sparse_moe.experts.144.w1", "model.layers.61.block_sparse_moe.experts.145.w1", "model.layers.61.block_sparse_moe.experts.146.w1", "model.layers.61.block_sparse_moe.experts.147.w1", "model.layers.61.block_sparse_moe.experts.148.w1", "model.layers.61.block_sparse_moe.experts.149.w1", "model.layers.61.block_sparse_moe.experts.150.w1", "model.layers.61.block_sparse_moe.experts.151.w1", "model.layers.61.block_sparse_moe.experts.152.w1", "model.layers.61.block_sparse_moe.experts.153.w1", "model.layers.61.block_sparse_moe.experts.154.w1", "model.layers.61.block_sparse_moe.experts.155.w1", "model.layers.61.block_sparse_moe.experts.156.w1", "model.layers.61.block_sparse_moe.experts.157.w1", "model.layers.61.block_sparse_moe.experts.158.w1", "model.layers.61.block_sparse_moe.experts.159.w1", "model.layers.61.block_sparse_moe.experts.160.w1", "model.layers.61.block_sparse_moe.experts.161.w1", "model.layers.61.block_sparse_moe.experts.162.w1", "model.layers.61.block_sparse_moe.experts.163.w1", "model.layers.61.block_sparse_moe.experts.164.w1", "model.layers.61.block_sparse_moe.experts.165.w1", "model.layers.61.block_sparse_moe.experts.166.w1", "model.layers.61.block_sparse_moe.experts.167.w1", "model.layers.61.block_sparse_moe.experts.168.w1", "model.layers.61.block_sparse_moe.experts.169.w1", "model.layers.61.block_sparse_moe.experts.170.w1", "model.layers.61.block_sparse_moe.experts.171.w1", "model.layers.61.block_sparse_moe.experts.172.w1", "model.layers.61.block_sparse_moe.experts.173.w1", "model.layers.61.block_sparse_moe.experts.174.w1", "model.layers.61.block_sparse_moe.experts.175.w1", "model.layers.61.block_sparse_moe.experts.176.w1", "model.layers.61.block_sparse_moe.experts.177.w1", "model.layers.61.block_sparse_moe.experts.178.w1", "model.layers.61.block_sparse_moe.experts.179.w1", "model.layers.61.block_sparse_moe.experts.180.w1", "model.layers.61.block_sparse_moe.experts.181.w1", "model.layers.61.block_sparse_moe.experts.182.w1", "model.layers.61.block_sparse_moe.experts.183.w1", "model.layers.61.block_sparse_moe.experts.184.w1", "model.layers.61.block_sparse_moe.experts.185.w1", "model.layers.61.block_sparse_moe.experts.186.w1", "model.layers.61.block_sparse_moe.experts.187.w1", "model.layers.61.block_sparse_moe.experts.188.w1", "model.layers.61.block_sparse_moe.experts.189.w1", "model.layers.61.block_sparse_moe.experts.190.w1", "model.layers.61.block_sparse_moe.experts.191.w1", "model.layers.61.block_sparse_moe.experts.192.w1", "model.layers.61.block_sparse_moe.experts.193.w1", "model.layers.61.block_sparse_moe.experts.194.w1", "model.layers.61.block_sparse_moe.experts.195.w1", "model.layers.61.block_sparse_moe.experts.196.w1", "model.layers.61.block_sparse_moe.experts.197.w1", "model.layers.61.block_sparse_moe.experts.198.w1", "model.layers.61.block_sparse_moe.experts.199.w1", "model.layers.61.block_sparse_moe.experts.200.w1", "model.layers.61.block_sparse_moe.experts.201.w1", "model.layers.61.block_sparse_moe.experts.202.w1", "model.layers.61.block_sparse_moe.experts.203.w1", "model.layers.61.block_sparse_moe.experts.204.w1", "model.layers.61.block_sparse_moe.experts.205.w1", "model.layers.61.block_sparse_moe.experts.206.w1", "model.layers.61.block_sparse_moe.experts.207.w1", "model.layers.61.block_sparse_moe.experts.208.w1", "model.layers.61.block_sparse_moe.experts.209.w1", "model.layers.61.block_sparse_moe.experts.210.w1", "model.layers.61.block_sparse_moe.experts.211.w1", "model.layers.61.block_sparse_moe.experts.212.w1", "model.layers.61.block_sparse_moe.experts.213.w1", "model.layers.61.block_sparse_moe.experts.214.w1", "model.layers.61.block_sparse_moe.experts.215.w1", "model.layers.61.block_sparse_moe.experts.216.w1", "model.layers.61.block_sparse_moe.experts.217.w1", "model.layers.61.block_sparse_moe.experts.218.w1", "model.layers.61.block_sparse_moe.experts.219.w1", "model.layers.61.block_sparse_moe.experts.220.w1", "model.layers.61.block_sparse_moe.experts.221.w1", "model.layers.61.block_sparse_moe.experts.222.w1", "model.layers.61.block_sparse_moe.experts.223.w1", "model.layers.61.block_sparse_moe.experts.224.w1", "model.layers.61.block_sparse_moe.experts.225.w1", "model.layers.61.block_sparse_moe.experts.226.w1", "model.layers.61.block_sparse_moe.experts.227.w1", "model.layers.61.block_sparse_moe.experts.228.w1", "model.layers.61.block_sparse_moe.experts.229.w1", "model.layers.61.block_sparse_moe.experts.230.w1", "model.layers.61.block_sparse_moe.experts.231.w1", "model.layers.61.block_sparse_moe.experts.232.w1", "model.layers.61.block_sparse_moe.experts.233.w1", "model.layers.61.block_sparse_moe.experts.234.w1", "model.layers.61.block_sparse_moe.experts.235.w1", "model.layers.61.block_sparse_moe.experts.236.w1", "model.layers.61.block_sparse_moe.experts.237.w1", "model.layers.61.block_sparse_moe.experts.238.w1", "model.layers.61.block_sparse_moe.experts.239.w1", "model.layers.61.block_sparse_moe.experts.240.w1", "model.layers.61.block_sparse_moe.experts.241.w1", "model.layers.61.block_sparse_moe.experts.242.w1", "model.layers.61.block_sparse_moe.experts.243.w1", "model.layers.61.block_sparse_moe.experts.244.w1", "model.layers.61.block_sparse_moe.experts.245.w1", "model.layers.61.block_sparse_moe.experts.246.w1", "model.layers.61.block_sparse_moe.experts.247.w1", "model.layers.61.block_sparse_moe.experts.248.w1", "model.layers.61.block_sparse_moe.experts.249.w1", "model.layers.61.block_sparse_moe.experts.250.w1", "model.layers.61.block_sparse_moe.experts.251.w1", "model.layers.61.block_sparse_moe.experts.252.w1", "model.layers.61.block_sparse_moe.experts.253.w1", "model.layers.61.block_sparse_moe.experts.254.w1", "model.layers.61.block_sparse_moe.experts.255.w1", "model.layers.61.block_sparse_moe.experts.0.w3", "model.layers.61.block_sparse_moe.experts.1.w3", "model.layers.61.block_sparse_moe.experts.2.w3", "model.layers.61.block_sparse_moe.experts.3.w3", "model.layers.61.block_sparse_moe.experts.4.w3", "model.layers.61.block_sparse_moe.experts.5.w3", "model.layers.61.block_sparse_moe.experts.6.w3", "model.layers.61.block_sparse_moe.experts.7.w3", "model.layers.61.block_sparse_moe.experts.8.w3", "model.layers.61.block_sparse_moe.experts.9.w3", "model.layers.61.block_sparse_moe.experts.10.w3", "model.layers.61.block_sparse_moe.experts.11.w3", "model.layers.61.block_sparse_moe.experts.12.w3", "model.layers.61.block_sparse_moe.experts.13.w3", "model.layers.61.block_sparse_moe.experts.14.w3", "model.layers.61.block_sparse_moe.experts.15.w3", "model.layers.61.block_sparse_moe.experts.16.w3", "model.layers.61.block_sparse_moe.experts.17.w3", "model.layers.61.block_sparse_moe.experts.18.w3", "model.layers.61.block_sparse_moe.experts.19.w3", "model.layers.61.block_sparse_moe.experts.20.w3", "model.layers.61.block_sparse_moe.experts.21.w3", "model.layers.61.block_sparse_moe.experts.22.w3", "model.layers.61.block_sparse_moe.experts.23.w3", "model.layers.61.block_sparse_moe.experts.24.w3", "model.layers.61.block_sparse_moe.experts.25.w3", "model.layers.61.block_sparse_moe.experts.26.w3", "model.layers.61.block_sparse_moe.experts.27.w3", "model.layers.61.block_sparse_moe.experts.28.w3", "model.layers.61.block_sparse_moe.experts.29.w3", "model.layers.61.block_sparse_moe.experts.30.w3", "model.layers.61.block_sparse_moe.experts.31.w3", "model.layers.61.block_sparse_moe.experts.32.w3", "model.layers.61.block_sparse_moe.experts.33.w3", "model.layers.61.block_sparse_moe.experts.34.w3", "model.layers.61.block_sparse_moe.experts.35.w3", "model.layers.61.block_sparse_moe.experts.36.w3", "model.layers.61.block_sparse_moe.experts.37.w3", "model.layers.61.block_sparse_moe.experts.38.w3", "model.layers.61.block_sparse_moe.experts.39.w3", "model.layers.61.block_sparse_moe.experts.40.w3", "model.layers.61.block_sparse_moe.experts.41.w3", "model.layers.61.block_sparse_moe.experts.42.w3", "model.layers.61.block_sparse_moe.experts.43.w3", "model.layers.61.block_sparse_moe.experts.44.w3", "model.layers.61.block_sparse_moe.experts.45.w3", "model.layers.61.block_sparse_moe.experts.46.w3", "model.layers.61.block_sparse_moe.experts.47.w3", "model.layers.61.block_sparse_moe.experts.48.w3", "model.layers.61.block_sparse_moe.experts.49.w3", "model.layers.61.block_sparse_moe.experts.50.w3", "model.layers.61.block_sparse_moe.experts.51.w3", "model.layers.61.block_sparse_moe.experts.52.w3", "model.layers.61.block_sparse_moe.experts.53.w3", "model.layers.61.block_sparse_moe.experts.54.w3", "model.layers.61.block_sparse_moe.experts.55.w3", "model.layers.61.block_sparse_moe.experts.56.w3", "model.layers.61.block_sparse_moe.experts.57.w3", "model.layers.61.block_sparse_moe.experts.58.w3", "model.layers.61.block_sparse_moe.experts.59.w3", "model.layers.61.block_sparse_moe.experts.60.w3", "model.layers.61.block_sparse_moe.experts.61.w3", "model.layers.61.block_sparse_moe.experts.62.w3", "model.layers.61.block_sparse_moe.experts.63.w3", "model.layers.61.block_sparse_moe.experts.64.w3", "model.layers.61.block_sparse_moe.experts.65.w3", "model.layers.61.block_sparse_moe.experts.66.w3", "model.layers.61.block_sparse_moe.experts.67.w3", "model.layers.61.block_sparse_moe.experts.68.w3", "model.layers.61.block_sparse_moe.experts.69.w3", "model.layers.61.block_sparse_moe.experts.70.w3", "model.layers.61.block_sparse_moe.experts.71.w3", "model.layers.61.block_sparse_moe.experts.72.w3", "model.layers.61.block_sparse_moe.experts.73.w3", "model.layers.61.block_sparse_moe.experts.74.w3", "model.layers.61.block_sparse_moe.experts.75.w3", "model.layers.61.block_sparse_moe.experts.76.w3", "model.layers.61.block_sparse_moe.experts.77.w3", "model.layers.61.block_sparse_moe.experts.78.w3", "model.layers.61.block_sparse_moe.experts.79.w3", "model.layers.61.block_sparse_moe.experts.80.w3", "model.layers.61.block_sparse_moe.experts.81.w3", "model.layers.61.block_sparse_moe.experts.82.w3", "model.layers.61.block_sparse_moe.experts.83.w3", "model.layers.61.block_sparse_moe.experts.84.w3", "model.layers.61.block_sparse_moe.experts.85.w3", "model.layers.61.block_sparse_moe.experts.86.w3", "model.layers.61.block_sparse_moe.experts.87.w3", "model.layers.61.block_sparse_moe.experts.88.w3", "model.layers.61.block_sparse_moe.experts.89.w3", "model.layers.61.block_sparse_moe.experts.90.w3", "model.layers.61.block_sparse_moe.experts.91.w3", "model.layers.61.block_sparse_moe.experts.92.w3", "model.layers.61.block_sparse_moe.experts.93.w3", "model.layers.61.block_sparse_moe.experts.94.w3", "model.layers.61.block_sparse_moe.experts.95.w3", "model.layers.61.block_sparse_moe.experts.96.w3", "model.layers.61.block_sparse_moe.experts.97.w3", "model.layers.61.block_sparse_moe.experts.98.w3", "model.layers.61.block_sparse_moe.experts.99.w3", "model.layers.61.block_sparse_moe.experts.100.w3", "model.layers.61.block_sparse_moe.experts.101.w3", "model.layers.61.block_sparse_moe.experts.102.w3", "model.layers.61.block_sparse_moe.experts.103.w3", "model.layers.61.block_sparse_moe.experts.104.w3", "model.layers.61.block_sparse_moe.experts.105.w3", "model.layers.61.block_sparse_moe.experts.106.w3", "model.layers.61.block_sparse_moe.experts.107.w3", "model.layers.61.block_sparse_moe.experts.108.w3", "model.layers.61.block_sparse_moe.experts.109.w3", "model.layers.61.block_sparse_moe.experts.110.w3", "model.layers.61.block_sparse_moe.experts.111.w3", "model.layers.61.block_sparse_moe.experts.112.w3", "model.layers.61.block_sparse_moe.experts.113.w3", "model.layers.61.block_sparse_moe.experts.114.w3", "model.layers.61.block_sparse_moe.experts.115.w3", "model.layers.61.block_sparse_moe.experts.116.w3", "model.layers.61.block_sparse_moe.experts.117.w3", "model.layers.61.block_sparse_moe.experts.118.w3", "model.layers.61.block_sparse_moe.experts.119.w3", "model.layers.61.block_sparse_moe.experts.120.w3", "model.layers.61.block_sparse_moe.experts.121.w3", "model.layers.61.block_sparse_moe.experts.122.w3", "model.layers.61.block_sparse_moe.experts.123.w3", "model.layers.61.block_sparse_moe.experts.124.w3", "model.layers.61.block_sparse_moe.experts.125.w3", "model.layers.61.block_sparse_moe.experts.126.w3", "model.layers.61.block_sparse_moe.experts.127.w3", "model.layers.61.block_sparse_moe.experts.128.w3", "model.layers.61.block_sparse_moe.experts.129.w3", "model.layers.61.block_sparse_moe.experts.130.w3", "model.layers.61.block_sparse_moe.experts.131.w3", "model.layers.61.block_sparse_moe.experts.132.w3", "model.layers.61.block_sparse_moe.experts.133.w3", "model.layers.61.block_sparse_moe.experts.134.w3", "model.layers.61.block_sparse_moe.experts.135.w3", "model.layers.61.block_sparse_moe.experts.136.w3", "model.layers.61.block_sparse_moe.experts.137.w3", "model.layers.61.block_sparse_moe.experts.138.w3", "model.layers.61.block_sparse_moe.experts.139.w3", "model.layers.61.block_sparse_moe.experts.140.w3", "model.layers.61.block_sparse_moe.experts.141.w3", "model.layers.61.block_sparse_moe.experts.142.w3", "model.layers.61.block_sparse_moe.experts.143.w3", "model.layers.61.block_sparse_moe.experts.144.w3", "model.layers.61.block_sparse_moe.experts.145.w3", "model.layers.61.block_sparse_moe.experts.146.w3", "model.layers.61.block_sparse_moe.experts.147.w3", "model.layers.61.block_sparse_moe.experts.148.w3", "model.layers.61.block_sparse_moe.experts.149.w3", "model.layers.61.block_sparse_moe.experts.150.w3", "model.layers.61.block_sparse_moe.experts.151.w3", "model.layers.61.block_sparse_moe.experts.152.w3", "model.layers.61.block_sparse_moe.experts.153.w3", "model.layers.61.block_sparse_moe.experts.154.w3", "model.layers.61.block_sparse_moe.experts.155.w3", "model.layers.61.block_sparse_moe.experts.156.w3", "model.layers.61.block_sparse_moe.experts.157.w3", "model.layers.61.block_sparse_moe.experts.158.w3", "model.layers.61.block_sparse_moe.experts.159.w3", "model.layers.61.block_sparse_moe.experts.160.w3", "model.layers.61.block_sparse_moe.experts.161.w3", "model.layers.61.block_sparse_moe.experts.162.w3", "model.layers.61.block_sparse_moe.experts.163.w3", "model.layers.61.block_sparse_moe.experts.164.w3", "model.layers.61.block_sparse_moe.experts.165.w3", "model.layers.61.block_sparse_moe.experts.166.w3", "model.layers.61.block_sparse_moe.experts.167.w3", "model.layers.61.block_sparse_moe.experts.168.w3", "model.layers.61.block_sparse_moe.experts.169.w3", "model.layers.61.block_sparse_moe.experts.170.w3", "model.layers.61.block_sparse_moe.experts.171.w3", "model.layers.61.block_sparse_moe.experts.172.w3", "model.layers.61.block_sparse_moe.experts.173.w3", "model.layers.61.block_sparse_moe.experts.174.w3", "model.layers.61.block_sparse_moe.experts.175.w3", "model.layers.61.block_sparse_moe.experts.176.w3", "model.layers.61.block_sparse_moe.experts.177.w3", "model.layers.61.block_sparse_moe.experts.178.w3", "model.layers.61.block_sparse_moe.experts.179.w3", "model.layers.61.block_sparse_moe.experts.180.w3", "model.layers.61.block_sparse_moe.experts.181.w3", "model.layers.61.block_sparse_moe.experts.182.w3", "model.layers.61.block_sparse_moe.experts.183.w3", "model.layers.61.block_sparse_moe.experts.184.w3", "model.layers.61.block_sparse_moe.experts.185.w3", "model.layers.61.block_sparse_moe.experts.186.w3", "model.layers.61.block_sparse_moe.experts.187.w3", "model.layers.61.block_sparse_moe.experts.188.w3", "model.layers.61.block_sparse_moe.experts.189.w3", "model.layers.61.block_sparse_moe.experts.190.w3", "model.layers.61.block_sparse_moe.experts.191.w3", "model.layers.61.block_sparse_moe.experts.192.w3", "model.layers.61.block_sparse_moe.experts.193.w3", "model.layers.61.block_sparse_moe.experts.194.w3", "model.layers.61.block_sparse_moe.experts.195.w3", "model.layers.61.block_sparse_moe.experts.196.w3", "model.layers.61.block_sparse_moe.experts.197.w3", "model.layers.61.block_sparse_moe.experts.198.w3", "model.layers.61.block_sparse_moe.experts.199.w3", "model.layers.61.block_sparse_moe.experts.200.w3", "model.layers.61.block_sparse_moe.experts.201.w3", "model.layers.61.block_sparse_moe.experts.202.w3", "model.layers.61.block_sparse_moe.experts.203.w3", "model.layers.61.block_sparse_moe.experts.204.w3", "model.layers.61.block_sparse_moe.experts.205.w3", "model.layers.61.block_sparse_moe.experts.206.w3", "model.layers.61.block_sparse_moe.experts.207.w3", "model.layers.61.block_sparse_moe.experts.208.w3", "model.layers.61.block_sparse_moe.experts.209.w3", "model.layers.61.block_sparse_moe.experts.210.w3", "model.layers.61.block_sparse_moe.experts.211.w3", "model.layers.61.block_sparse_moe.experts.212.w3", "model.layers.61.block_sparse_moe.experts.213.w3", "model.layers.61.block_sparse_moe.experts.214.w3", "model.layers.61.block_sparse_moe.experts.215.w3", "model.layers.61.block_sparse_moe.experts.216.w3", "model.layers.61.block_sparse_moe.experts.217.w3", "model.layers.61.block_sparse_moe.experts.218.w3", "model.layers.61.block_sparse_moe.experts.219.w3", "model.layers.61.block_sparse_moe.experts.220.w3", "model.layers.61.block_sparse_moe.experts.221.w3", "model.layers.61.block_sparse_moe.experts.222.w3", "model.layers.61.block_sparse_moe.experts.223.w3", "model.layers.61.block_sparse_moe.experts.224.w3", "model.layers.61.block_sparse_moe.experts.225.w3", "model.layers.61.block_sparse_moe.experts.226.w3", "model.layers.61.block_sparse_moe.experts.227.w3", "model.layers.61.block_sparse_moe.experts.228.w3", "model.layers.61.block_sparse_moe.experts.229.w3", "model.layers.61.block_sparse_moe.experts.230.w3", "model.layers.61.block_sparse_moe.experts.231.w3", "model.layers.61.block_sparse_moe.experts.232.w3", "model.layers.61.block_sparse_moe.experts.233.w3", "model.layers.61.block_sparse_moe.experts.234.w3", "model.layers.61.block_sparse_moe.experts.235.w3", "model.layers.61.block_sparse_moe.experts.236.w3", "model.layers.61.block_sparse_moe.experts.237.w3", "model.layers.61.block_sparse_moe.experts.238.w3", "model.layers.61.block_sparse_moe.experts.239.w3", "model.layers.61.block_sparse_moe.experts.240.w3", "model.layers.61.block_sparse_moe.experts.241.w3", "model.layers.61.block_sparse_moe.experts.242.w3", "model.layers.61.block_sparse_moe.experts.243.w3", "model.layers.61.block_sparse_moe.experts.244.w3", "model.layers.61.block_sparse_moe.experts.245.w3", "model.layers.61.block_sparse_moe.experts.246.w3", "model.layers.61.block_sparse_moe.experts.247.w3", "model.layers.61.block_sparse_moe.experts.248.w3", "model.layers.61.block_sparse_moe.experts.249.w3", "model.layers.61.block_sparse_moe.experts.250.w3", "model.layers.61.block_sparse_moe.experts.251.w3", "model.layers.61.block_sparse_moe.experts.252.w3", "model.layers.61.block_sparse_moe.experts.253.w3", "model.layers.61.block_sparse_moe.experts.254.w3", "model.layers.61.block_sparse_moe.experts.255.w3", "model.layers.61.block_sparse_moe.experts.0.w2", "model.layers.61.block_sparse_moe.experts.1.w2", "model.layers.61.block_sparse_moe.experts.2.w2", "model.layers.61.block_sparse_moe.experts.3.w2", "model.layers.61.block_sparse_moe.experts.4.w2", "model.layers.61.block_sparse_moe.experts.5.w2", "model.layers.61.block_sparse_moe.experts.6.w2", "model.layers.61.block_sparse_moe.experts.7.w2", "model.layers.61.block_sparse_moe.experts.8.w2", "model.layers.61.block_sparse_moe.experts.9.w2", "model.layers.61.block_sparse_moe.experts.10.w2", "model.layers.61.block_sparse_moe.experts.11.w2", "model.layers.61.block_sparse_moe.experts.12.w2", "model.layers.61.block_sparse_moe.experts.13.w2", "model.layers.61.block_sparse_moe.experts.14.w2", "model.layers.61.block_sparse_moe.experts.15.w2", "model.layers.61.block_sparse_moe.experts.16.w2", "model.layers.61.block_sparse_moe.experts.17.w2", "model.layers.61.block_sparse_moe.experts.18.w2", "model.layers.61.block_sparse_moe.experts.19.w2", "model.layers.61.block_sparse_moe.experts.20.w2", "model.layers.61.block_sparse_moe.experts.21.w2", "model.layers.61.block_sparse_moe.experts.22.w2", "model.layers.61.block_sparse_moe.experts.23.w2", "model.layers.61.block_sparse_moe.experts.24.w2", "model.layers.61.block_sparse_moe.experts.25.w2", "model.layers.61.block_sparse_moe.experts.26.w2", "model.layers.61.block_sparse_moe.experts.27.w2", "model.layers.61.block_sparse_moe.experts.28.w2", "model.layers.61.block_sparse_moe.experts.29.w2", "model.layers.61.block_sparse_moe.experts.30.w2", "model.layers.61.block_sparse_moe.experts.31.w2", "model.layers.61.block_sparse_moe.experts.32.w2", "model.layers.61.block_sparse_moe.experts.33.w2", "model.layers.61.block_sparse_moe.experts.34.w2", "model.layers.61.block_sparse_moe.experts.35.w2", "model.layers.61.block_sparse_moe.experts.36.w2", "model.layers.61.block_sparse_moe.experts.37.w2", "model.layers.61.block_sparse_moe.experts.38.w2", "model.layers.61.block_sparse_moe.experts.39.w2", "model.layers.61.block_sparse_moe.experts.40.w2", "model.layers.61.block_sparse_moe.experts.41.w2", "model.layers.61.block_sparse_moe.experts.42.w2", "model.layers.61.block_sparse_moe.experts.43.w2", "model.layers.61.block_sparse_moe.experts.44.w2", "model.layers.61.block_sparse_moe.experts.45.w2", "model.layers.61.block_sparse_moe.experts.46.w2", "model.layers.61.block_sparse_moe.experts.47.w2", "model.layers.61.block_sparse_moe.experts.48.w2", "model.layers.61.block_sparse_moe.experts.49.w2", "model.layers.61.block_sparse_moe.experts.50.w2", "model.layers.61.block_sparse_moe.experts.51.w2", "model.layers.61.block_sparse_moe.experts.52.w2", "model.layers.61.block_sparse_moe.experts.53.w2", "model.layers.61.block_sparse_moe.experts.54.w2", "model.layers.61.block_sparse_moe.experts.55.w2", "model.layers.61.block_sparse_moe.experts.56.w2", "model.layers.61.block_sparse_moe.experts.57.w2", "model.layers.61.block_sparse_moe.experts.58.w2", "model.layers.61.block_sparse_moe.experts.59.w2", "model.layers.61.block_sparse_moe.experts.60.w2", "model.layers.61.block_sparse_moe.experts.61.w2", "model.layers.61.block_sparse_moe.experts.62.w2", "model.layers.61.block_sparse_moe.experts.63.w2", "model.layers.61.block_sparse_moe.experts.64.w2", "model.layers.61.block_sparse_moe.experts.65.w2", "model.layers.61.block_sparse_moe.experts.66.w2", "model.layers.61.block_sparse_moe.experts.67.w2", "model.layers.61.block_sparse_moe.experts.68.w2", "model.layers.61.block_sparse_moe.experts.69.w2", "model.layers.61.block_sparse_moe.experts.70.w2", "model.layers.61.block_sparse_moe.experts.71.w2", "model.layers.61.block_sparse_moe.experts.72.w2", "model.layers.61.block_sparse_moe.experts.73.w2", "model.layers.61.block_sparse_moe.experts.74.w2", "model.layers.61.block_sparse_moe.experts.75.w2", "model.layers.61.block_sparse_moe.experts.76.w2", "model.layers.61.block_sparse_moe.experts.77.w2", "model.layers.61.block_sparse_moe.experts.78.w2", "model.layers.61.block_sparse_moe.experts.79.w2", "model.layers.61.block_sparse_moe.experts.80.w2", "model.layers.61.block_sparse_moe.experts.81.w2", "model.layers.61.block_sparse_moe.experts.82.w2", "model.layers.61.block_sparse_moe.experts.83.w2", "model.layers.61.block_sparse_moe.experts.84.w2", "model.layers.61.block_sparse_moe.experts.85.w2", "model.layers.61.block_sparse_moe.experts.86.w2", "model.layers.61.block_sparse_moe.experts.87.w2", "model.layers.61.block_sparse_moe.experts.88.w2", "model.layers.61.block_sparse_moe.experts.89.w2", "model.layers.61.block_sparse_moe.experts.90.w2", "model.layers.61.block_sparse_moe.experts.91.w2", "model.layers.61.block_sparse_moe.experts.92.w2", "model.layers.61.block_sparse_moe.experts.93.w2", "model.layers.61.block_sparse_moe.experts.94.w2", "model.layers.61.block_sparse_moe.experts.95.w2", "model.layers.61.block_sparse_moe.experts.96.w2", "model.layers.61.block_sparse_moe.experts.97.w2", "model.layers.61.block_sparse_moe.experts.98.w2", "model.layers.61.block_sparse_moe.experts.99.w2", "model.layers.61.block_sparse_moe.experts.100.w2", "model.layers.61.block_sparse_moe.experts.101.w2", "model.layers.61.block_sparse_moe.experts.102.w2", "model.layers.61.block_sparse_moe.experts.103.w2", "model.layers.61.block_sparse_moe.experts.104.w2", "model.layers.61.block_sparse_moe.experts.105.w2", "model.layers.61.block_sparse_moe.experts.106.w2", "model.layers.61.block_sparse_moe.experts.107.w2", "model.layers.61.block_sparse_moe.experts.108.w2", "model.layers.61.block_sparse_moe.experts.109.w2", "model.layers.61.block_sparse_moe.experts.110.w2", "model.layers.61.block_sparse_moe.experts.111.w2", "model.layers.61.block_sparse_moe.experts.112.w2", "model.layers.61.block_sparse_moe.experts.113.w2", "model.layers.61.block_sparse_moe.experts.114.w2", "model.layers.61.block_sparse_moe.experts.115.w2", "model.layers.61.block_sparse_moe.experts.116.w2", "model.layers.61.block_sparse_moe.experts.117.w2", "model.layers.61.block_sparse_moe.experts.118.w2", "model.layers.61.block_sparse_moe.experts.119.w2", "model.layers.61.block_sparse_moe.experts.120.w2", "model.layers.61.block_sparse_moe.experts.121.w2", "model.layers.61.block_sparse_moe.experts.122.w2", "model.layers.61.block_sparse_moe.experts.123.w2", "model.layers.61.block_sparse_moe.experts.124.w2", "model.layers.61.block_sparse_moe.experts.125.w2", "model.layers.61.block_sparse_moe.experts.126.w2", "model.layers.61.block_sparse_moe.experts.127.w2", "model.layers.61.block_sparse_moe.experts.128.w2", "model.layers.61.block_sparse_moe.experts.129.w2", "model.layers.61.block_sparse_moe.experts.130.w2", "model.layers.61.block_sparse_moe.experts.131.w2", "model.layers.61.block_sparse_moe.experts.132.w2", "model.layers.61.block_sparse_moe.experts.133.w2", "model.layers.61.block_sparse_moe.experts.134.w2", "model.layers.61.block_sparse_moe.experts.135.w2", "model.layers.61.block_sparse_moe.experts.136.w2", "model.layers.61.block_sparse_moe.experts.137.w2", "model.layers.61.block_sparse_moe.experts.138.w2", "model.layers.61.block_sparse_moe.experts.139.w2", "model.layers.61.block_sparse_moe.experts.140.w2", "model.layers.61.block_sparse_moe.experts.141.w2", "model.layers.61.block_sparse_moe.experts.142.w2", "model.layers.61.block_sparse_moe.experts.143.w2", "model.layers.61.block_sparse_moe.experts.144.w2", "model.layers.61.block_sparse_moe.experts.145.w2", "model.layers.61.block_sparse_moe.experts.146.w2", "model.layers.61.block_sparse_moe.experts.147.w2", "model.layers.61.block_sparse_moe.experts.148.w2", "model.layers.61.block_sparse_moe.experts.149.w2", "model.layers.61.block_sparse_moe.experts.150.w2", "model.layers.61.block_sparse_moe.experts.151.w2", "model.layers.61.block_sparse_moe.experts.152.w2", "model.layers.61.block_sparse_moe.experts.153.w2", "model.layers.61.block_sparse_moe.experts.154.w2", "model.layers.61.block_sparse_moe.experts.155.w2", "model.layers.61.block_sparse_moe.experts.156.w2", "model.layers.61.block_sparse_moe.experts.157.w2", "model.layers.61.block_sparse_moe.experts.158.w2", "model.layers.61.block_sparse_moe.experts.159.w2", "model.layers.61.block_sparse_moe.experts.160.w2", "model.layers.61.block_sparse_moe.experts.161.w2", "model.layers.61.block_sparse_moe.experts.162.w2", "model.layers.61.block_sparse_moe.experts.163.w2", "model.layers.61.block_sparse_moe.experts.164.w2", "model.layers.61.block_sparse_moe.experts.165.w2", "model.layers.61.block_sparse_moe.experts.166.w2", "model.layers.61.block_sparse_moe.experts.167.w2", "model.layers.61.block_sparse_moe.experts.168.w2", "model.layers.61.block_sparse_moe.experts.169.w2", "model.layers.61.block_sparse_moe.experts.170.w2", "model.layers.61.block_sparse_moe.experts.171.w2", "model.layers.61.block_sparse_moe.experts.172.w2", "model.layers.61.block_sparse_moe.experts.173.w2", "model.layers.61.block_sparse_moe.experts.174.w2", "model.layers.61.block_sparse_moe.experts.175.w2", "model.layers.61.block_sparse_moe.experts.176.w2", "model.layers.61.block_sparse_moe.experts.177.w2", "model.layers.61.block_sparse_moe.experts.178.w2", "model.layers.61.block_sparse_moe.experts.179.w2", "model.layers.61.block_sparse_moe.experts.180.w2", "model.layers.61.block_sparse_moe.experts.181.w2", "model.layers.61.block_sparse_moe.experts.182.w2", "model.layers.61.block_sparse_moe.experts.183.w2", "model.layers.61.block_sparse_moe.experts.184.w2", "model.layers.61.block_sparse_moe.experts.185.w2", "model.layers.61.block_sparse_moe.experts.186.w2", "model.layers.61.block_sparse_moe.experts.187.w2", "model.layers.61.block_sparse_moe.experts.188.w2", "model.layers.61.block_sparse_moe.experts.189.w2", "model.layers.61.block_sparse_moe.experts.190.w2", "model.layers.61.block_sparse_moe.experts.191.w2", "model.layers.61.block_sparse_moe.experts.192.w2", "model.layers.61.block_sparse_moe.experts.193.w2", "model.layers.61.block_sparse_moe.experts.194.w2", "model.layers.61.block_sparse_moe.experts.195.w2", "model.layers.61.block_sparse_moe.experts.196.w2", "model.layers.61.block_sparse_moe.experts.197.w2", "model.layers.61.block_sparse_moe.experts.198.w2", "model.layers.61.block_sparse_moe.experts.199.w2", "model.layers.61.block_sparse_moe.experts.200.w2", "model.layers.61.block_sparse_moe.experts.201.w2", "model.layers.61.block_sparse_moe.experts.202.w2", "model.layers.61.block_sparse_moe.experts.203.w2", "model.layers.61.block_sparse_moe.experts.204.w2", "model.layers.61.block_sparse_moe.experts.205.w2", "model.layers.61.block_sparse_moe.experts.206.w2", "model.layers.61.block_sparse_moe.experts.207.w2", "model.layers.61.block_sparse_moe.experts.208.w2", "model.layers.61.block_sparse_moe.experts.209.w2", "model.layers.61.block_sparse_moe.experts.210.w2", "model.layers.61.block_sparse_moe.experts.211.w2", "model.layers.61.block_sparse_moe.experts.212.w2", "model.layers.61.block_sparse_moe.experts.213.w2", "model.layers.61.block_sparse_moe.experts.214.w2", "model.layers.61.block_sparse_moe.experts.215.w2", "model.layers.61.block_sparse_moe.experts.216.w2", "model.layers.61.block_sparse_moe.experts.217.w2", "model.layers.61.block_sparse_moe.experts.218.w2", "model.layers.61.block_sparse_moe.experts.219.w2", "model.layers.61.block_sparse_moe.experts.220.w2", "model.layers.61.block_sparse_moe.experts.221.w2", "model.layers.61.block_sparse_moe.experts.222.w2", "model.layers.61.block_sparse_moe.experts.223.w2", "model.layers.61.block_sparse_moe.experts.224.w2", "model.layers.61.block_sparse_moe.experts.225.w2", "model.layers.61.block_sparse_moe.experts.226.w2", "model.layers.61.block_sparse_moe.experts.227.w2", "model.layers.61.block_sparse_moe.experts.228.w2", "model.layers.61.block_sparse_moe.experts.229.w2", "model.layers.61.block_sparse_moe.experts.230.w2", "model.layers.61.block_sparse_moe.experts.231.w2", "model.layers.61.block_sparse_moe.experts.232.w2", "model.layers.61.block_sparse_moe.experts.233.w2", "model.layers.61.block_sparse_moe.experts.234.w2", "model.layers.61.block_sparse_moe.experts.235.w2", "model.layers.61.block_sparse_moe.experts.236.w2", "model.layers.61.block_sparse_moe.experts.237.w2", "model.layers.61.block_sparse_moe.experts.238.w2", "model.layers.61.block_sparse_moe.experts.239.w2", "model.layers.61.block_sparse_moe.experts.240.w2", "model.layers.61.block_sparse_moe.experts.241.w2", "model.layers.61.block_sparse_moe.experts.242.w2", "model.layers.61.block_sparse_moe.experts.243.w2", "model.layers.61.block_sparse_moe.experts.244.w2", "model.layers.61.block_sparse_moe.experts.245.w2", "model.layers.61.block_sparse_moe.experts.246.w2", "model.layers.61.block_sparse_moe.experts.247.w2", "model.layers.61.block_sparse_moe.experts.248.w2", "model.layers.61.block_sparse_moe.experts.249.w2", "model.layers.61.block_sparse_moe.experts.250.w2", "model.layers.61.block_sparse_moe.experts.251.w2", "model.layers.61.block_sparse_moe.experts.252.w2", "model.layers.61.block_sparse_moe.experts.253.w2", "model.layers.61.block_sparse_moe.experts.254.w2", "model.layers.61.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0012129440903663746, "dbits": 3623878656 } ] } ], "base_kld": 0.2642178900539875, "arch_string": "MiniMaxM2ForCausalLM" }