https://blog.csdn.net/m0_37769093/article/details/107732606
softmax 函数如下所示:
y i = exp  ( x i ) ∑ j = 1 n exp  ( x j ) y_{i} = \frac{\exp(x_{i})}{\sum_{j=1}^{n}{\exp(x_j)}} yi=∑j=1nexp(xj)exp(xi)
softmax求导如下:
i = j i = j i=j 的情况:
 
      
       
        
         
          
          
            ∂ 
           
           
           
             y 
            
           
             i 
            
           
          
          
          
            ∂ 
           
           
           
             x 
            
           
             i 
            
           
          
         
        
          = 
         
         
          
          
            exp 
           
          
             
           
          
            ( 
           
           
           
             x 
            
           
             i 
            
           
          
            ) 
           
          
          
           
           
             ∑ 
            
            
            
              j 
             
            
              = 
             
            
              1 
             
            
           
             n 
            
           
           
           
             exp 
            
           
              
            
           
             ( 
            
            
            
              x 
             
            
              j 
             
            
           
             ) 
            
           
          
         
        
          − 
         
         
          
          
            ( 
           
          
            exp 
           
          
             
           
          
            ( 
           
           
           
             x 
            
           
             i 
            
           
          
            ) 
           
           
           
             ) 
            
           
             2 
            
           
          
          
          
            ( 
           
           
           
             ∑ 
            
            
            
              j 
             
            
              = 
             
            
              1 
             
            
           
             n 
            
           
           
           
             exp 
            
           
              
            
           
             ( 
            
            
            
              x 
             
            
              j 
             
            
           
             ) 
            
           
           
           
             ) 
            
           
             2 
            
           
          
         
        
       
         \frac{\partial y_{i}}{\partial x_{i}} = \frac{\exp(x_{i})}{\sum_{j=1}^{n}{\exp(x_j)}} - \frac{(\exp(x_{i}))^2}{(\sum_{j=1}^{n}{\exp(x_j)})^2} 
        
       
     ∂xi∂yi=∑j=1nexp(xj)exp(xi)−(∑j=1nexp(xj))2(exp(xi))2
  
      
       
        
         
          
          
            ∂ 
           
           
           
             y 
            
           
             i 
            
           
          
          
          
            ∂ 
           
           
           
             x 
            
           
             i 
            
           
          
         
        
          = 
         
         
         
           y 
          
         
           i 
          
         
        
          − 
         
        
          ( 
         
         
         
           y 
          
         
           i 
          
         
         
         
           ) 
          
         
           2 
          
         
        
       
         \frac{\partial y_{i}}{\partial x_{i}} = y_{i} - (y_{i})^2 
        
       
     ∂xi∂yi=yi−(yi)2
i ≠ j i \neq j i=j 的情况:
 
      
       
        
         
          
          
            ∂ 
           
           
           
             y 
            
           
             i 
            
           
          
          
          
            ∂ 
           
           
           
             x 
            
           
             j 
            
           
          
         
        
          = 
         
        
          − 
         
         
          
          
            ( 
           
          
            exp 
           
          
             
           
          
            ( 
           
           
           
             x 
            
           
             i 
            
           
          
            ) 
           
          
            × 
           
          
            exp 
           
          
             
           
          
            ( 
           
           
           
             x 
            
           
             j 
            
           
          
            ) 
           
          
            ) 
           
          
          
          
            ( 
           
           
           
             ∑ 
            
            
            
              j 
             
            
              = 
             
            
              1 
             
            
           
             n 
            
           
           
           
             exp 
            
           
              
            
           
             ( 
            
            
            
              x 
             
            
              j 
             
            
           
             ) 
            
           
           
           
             ) 
            
           
             2 
            
           
          
         
        
       
         \frac{\partial y_{i}}{\partial x_{j}} = - \frac{(\exp(x_{i})\times\exp(x_{j}))}{(\sum_{j=1}^{n}{\exp(x_j)})^2} 
        
       
     ∂xj∂yi=−(∑j=1nexp(xj))2(exp(xi)×exp(xj))
  
      
       
        
         
          
          
            ∂ 
           
           
           
             y 
            
           
             i 
            
           
          
          
          
            ∂ 
           
           
           
             x 
            
           
             j 
            
           
          
         
        
          = 
         
        
          − 
         
         
         
           y 
          
         
           i 
          
         
         
         
           y 
          
         
           j 
          
         
        
       
         \frac{\partial y_{i}}{\partial x_{j}} = - y_{i}y_{j} 
        
       
     ∂xj∂yi=−yiyj










