classTransformer(nn.Module):def__init__(self,d_head,n_head,d_model,d_ff,act=F.gelu):super().__init__() self.d = d_head self.h = n_head n = d_head * n_head self.Q = nn.Linear(n, n) self.K = nn.Linear(n, n) self.V = nn.Linear(n, n) self.O = nn.Linear(n, n) self.LayerNorm_1 = nn.LayerNorm(n) self.LayerNorm_2 = nn.LayerNorm(n) self.W_1 = nn.Linear(d_model, d_ff) self.W_2 = nn.Linear(d_ff, d_model) self.act = act self.dropout = nn.Dropout(0.2)defattn(self,x,mask): q, k, v = self.Q(x), self.K(x), self.V(x) q = einops.rearrange(q, 'B L (H D) -> B H L D', H=self.h) k = einops.rearrange(k, 'B L (H D) -> B H D L', H=self.h) v = einops.rearrange(v, 'B L (H D) -> B H L D', H=self.h) a = torch.softmax(q @ k / math.sqrt(self.d) + mask, dim=-1)# [B H L L] o = einops.rearrange(a @ v, 'B H L D -> B L (H D)') o = self.O(o)return odefffn(self,x):return self.dropout(self.W_2(self.dropout(self.act(self.W_1(x)))))defforward(self,x,mask): x = self.LayerNorm_1(x + self.dropout(self.attn(x, mask))) x = self.LayerNorm_2(x + self.dropout(self.ffn(x)))return x
最小的 k 个数
classSolution:defGetLeastNumbers_Solution(self,tinput,k):defpartition(a,lo,hi):if lo >= hi:return p = a[lo] l, r = lo, hiwhile l < r:while l < r and a[r]>= p: r -=1while l < r and a[l]<= p: l +=1 a[l], a[r]= a[r], a[l] a[lo], a[l]= a[l], a[lo]# 因为只需要前 k 个数,所有加上 if,去掉 if 就是标准的快排if l > k -1:partition(a, lo, l -1)if l < k -1:partition(a, l +1, hi)partition(tinput, 0, len(tinput) -1)return tinput[: k]