Skip to content

Commit 809ef0b

Browse files
Adds supplier/license info to pip components. (microsoft#938)
* Adds supplier/license info to pip components. * Rename GetReleasesAsync to GetProjectAsync * Address feedback --------- Co-authored-by: Sebastian Gomez <[email protected]>
1 parent 710273b commit 809ef0b

File tree

7 files changed

+226
-44
lines changed

7 files changed

+226
-44
lines changed

src/Microsoft.ComponentDetection.Contracts/TypedComponent/PipComponent.cs

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
namespace Microsoft.ComponentDetection.Contracts.TypedComponent;
22

33
using System.Diagnostics.CodeAnalysis;
4+
using Newtonsoft.Json;
45
using PackageUrl;
56

67
public class PipComponent : TypedComponent
@@ -10,16 +11,26 @@ private PipComponent()
1011
/* Reserved for deserialization */
1112
}
1213

13-
public PipComponent(string name, string version)
14+
public PipComponent(string name, string version, string author = null, string license = null)
1415
{
1516
this.Name = this.ValidateRequiredInput(name, nameof(this.Name), nameof(ComponentType.Pip));
1617
this.Version = this.ValidateRequiredInput(version, nameof(this.Version), nameof(ComponentType.Pip));
18+
this.Author = author;
19+
this.License = license;
1720
}
1821

1922
public string Name { get; set; }
2023

2124
public string Version { get; set; }
2225

26+
#nullable enable
27+
[JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
28+
public string? Author { get; set; }
29+
30+
[JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
31+
public string? License { get; set; }
32+
#nullable disable
33+
2334
public override ComponentType Type => ComponentType.Pip;
2435

2536
[SuppressMessage("Usage", "CA1308:Normalize String to Uppercase", Justification = "Casing cannot be overwritten.")]

src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs

+10-6
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public interface IPyPiClient
2424
{
2525
Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsync(string name, string version, PythonProjectRelease release);
2626

27-
Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetReleasesAsync(PipDependencySpecification spec);
27+
Task<PythonProject> GetProjectAsync(PipDependencySpecification spec);
2828
}
2929

3030
public sealed class PyPiClient : IPyPiClient, IDisposable
@@ -134,7 +134,7 @@ public async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsy
134134
return dependencies;
135135
}
136136

137-
public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetReleasesAsync(PipDependencySpecification spec)
137+
public async Task<PythonProject> GetProjectAsync(PipDependencySpecification spec)
138138
{
139139
var requestUri = new Uri($"https://pypi.org/pypi/{spec.Name}/json");
140140

@@ -183,7 +183,7 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
183183

184184
this.logger.LogWarning($"Call to pypi.org failed, but no more retries allowed!");
185185

186-
return new SortedDictionary<string, IList<PythonProjectRelease>>();
186+
return new PythonProject();
187187
}
188188

189189
if (!request.IsSuccessStatusCode)
@@ -192,12 +192,16 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
192192

193193
this.logger.LogWarning("Received {StatusCode} {ReasonPhrase} from {RequestUri}", request.StatusCode, request.ReasonPhrase, requestUri);
194194

195-
return new SortedDictionary<string, IList<PythonProjectRelease>>();
195+
return new PythonProject();
196196
}
197197

198198
var response = await request.Content.ReadAsStringAsync();
199199
var project = JsonConvert.DeserializeObject<PythonProject>(response);
200-
var versions = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer());
200+
var versions = new PythonProject
201+
{
202+
Info = project.Info,
203+
Releases = new SortedDictionary<string, IList<PythonProjectRelease>>(new PythonVersionComparer()),
204+
};
201205

202206
foreach (var release in project.Releases)
203207
{
@@ -208,7 +212,7 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
208212
parsedVersion.Valid && parsedVersion.IsReleasedPackage &&
209213
PythonVersionUtilities.VersionValidForSpec(release.Key, spec.DependencySpecifiers))
210214
{
211-
versions.Add(release.Key, release.Value);
215+
versions.Releases.Add(release.Key, release.Value);
212216
}
213217
}
214218
catch (ArgumentException ae)

src/Microsoft.ComponentDetection.Detectors/pip/PythonProject.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,8 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
77
/// </summary>
88
public class PythonProject
99
{
10-
public Dictionary<string, IList<PythonProjectRelease>> Releases { get; set; }
10+
public SortedDictionary<string, IList<PythonProjectRelease>> Releases { get; set; }
11+
12+
#nullable enable
13+
public PythonProjectInfo? Info { get; set; }
1114
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Pip;
2+
3+
using System.Collections.Generic;
4+
using Newtonsoft.Json;
5+
6+
public class PythonProjectInfo
7+
{
8+
public string Author { get; set; }
9+
10+
[JsonProperty("author_email")]
11+
public string AuthorEmail { get; set; }
12+
13+
public List<string> Classifiers { get; set; }
14+
15+
public string License { get; set; }
16+
17+
public string Maintainer { get; set; }
18+
19+
[JsonProperty("maintainer_email")]
20+
public string MaintainerEmail { get; set; }
21+
22+
// Add other properties from the "info" object as needed
23+
}

src/Microsoft.ComponentDetection.Detectors/pip/PythonResolver.cs

+62-7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ public class PythonResolver : IPythonResolver
1313
private readonly IPyPiClient pypiClient;
1414
private readonly ILogger<PythonResolver> logger;
1515

16+
private readonly int maxLicenseFieldLength = 100;
17+
private readonly string classifierFieldSeparator = " :: ";
18+
private readonly string classifierFieldLicensePrefix = "License";
19+
1620
public PythonResolver(IPyPiClient pypiClient, ILogger<PythonResolver> logger)
1721
{
1822
this.pypiClient = pypiClient;
@@ -35,7 +39,9 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
3539
// If we have it, we probably just want to skip at this phase as this indicates duplicates
3640
if (!state.ValidVersionMap.TryGetValue(rootPackage.Name, out _))
3741
{
38-
var result = await this.pypiClient.GetReleasesAsync(rootPackage);
42+
var project = await this.pypiClient.GetProjectAsync(rootPackage);
43+
44+
var result = project.Releases;
3945

4046
if (result.Keys.Any())
4147
{
@@ -45,7 +51,7 @@ public async Task<IList<PipGraphNode>> ResolveRootsAsync(ISingleFileComponentRec
4551
var candidateVersion = state.ValidVersionMap[rootPackage.Name].Keys.Any()
4652
? state.ValidVersionMap[rootPackage.Name].Keys.Last() : null;
4753

48-
var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion));
54+
var node = new PipGraphNode(new PipComponent(rootPackage.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project)));
4955

5056
state.NodeReferences[rootPackage.Name] = node;
5157

@@ -103,15 +109,17 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
103109
else
104110
{
105111
// We haven't encountered this package before, so let's fetch it and find a candidate
106-
var result = await this.pypiClient.GetReleasesAsync(dependencyNode);
112+
var project = await this.pypiClient.GetProjectAsync(dependencyNode);
113+
114+
var result = project.Releases;
107115

108116
if (result.Keys.Any())
109117
{
110118
state.ValidVersionMap[dependencyNode.Name] = result;
111119
var candidateVersion = state.ValidVersionMap[dependencyNode.Name].Keys.Any()
112120
? state.ValidVersionMap[dependencyNode.Name].Keys.Last() : null;
113121

114-
this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion);
122+
this.AddGraphNode(state, state.NodeReferences[currentNode.Name], dependencyNode.Name, candidateVersion, license: this.GetLicenseFromProject(project), author: this.GetSupplierFromProject(project));
115123

116124
state.ProcessingQueue.Enqueue((root, dependencyNode));
117125
}
@@ -155,7 +163,7 @@ private async Task<bool> InvalidateAndReprocessAsync(
155163

156164
var candidateVersion = state.ValidVersionMap[pipComponent.Name].Keys.Any() ? state.ValidVersionMap[pipComponent.Name].Keys.Last() : null;
157165

158-
node.Value = new PipComponent(pipComponent.Name, candidateVersion);
166+
node.Value = new PipComponent(pipComponent.Name, candidateVersion, license: pipComponent.License, author: pipComponent.Author);
159167

160168
var dependencies = (await this.FetchPackageDependenciesAsync(state, newSpec)).ToDictionary(x => x.Name, x => x);
161169

@@ -201,7 +209,7 @@ private async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAs
201209
return await this.pypiClient.FetchPackageDependenciesAsync(spec.Name, candidateVersion, packageToFetch);
202210
}
203211

204-
private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version)
212+
private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string name, string version, string license = null, string author = null)
205213
{
206214
if (state.NodeReferences.TryGetValue(name, out var value))
207215
{
@@ -210,10 +218,57 @@ private void AddGraphNode(PythonResolverState state, PipGraphNode parent, string
210218
}
211219
else
212220
{
213-
var node = new PipGraphNode(new PipComponent(name, version));
221+
var node = new PipGraphNode(new PipComponent(name, version, license: license, author: author));
214222
state.NodeReferences[name] = node;
215223
parent.Children.Add(node);
216224
node.Parents.Add(parent);
217225
}
218226
}
227+
228+
private string GetSupplierFromProject(PythonProject project)
229+
{
230+
if (!string.IsNullOrWhiteSpace(project.Info?.Maintainer))
231+
{
232+
return project.Info.Maintainer;
233+
}
234+
235+
if (!string.IsNullOrWhiteSpace(project.Info?.MaintainerEmail))
236+
{
237+
return project.Info.MaintainerEmail;
238+
}
239+
240+
if (!string.IsNullOrWhiteSpace(project.Info?.Author))
241+
{
242+
return project.Info.Author;
243+
}
244+
245+
if (!string.IsNullOrWhiteSpace(project.Info?.AuthorEmail))
246+
{
247+
return project.Info.AuthorEmail;
248+
}
249+
250+
// If none of the fields are populated, return null.
251+
return null;
252+
}
253+
254+
private string GetLicenseFromProject(PythonProject project)
255+
{
256+
// There are cases where the actual license text is found in the license field so we limit the length of this field to 100 characters.
257+
if (project.Info?.License != null && project.Info?.License.Length < this.maxLicenseFieldLength)
258+
{
259+
return project.Info.License;
260+
}
261+
262+
if (project.Info?.Classifiers != null)
263+
{
264+
var licenseClassifiers = project.Info.Classifiers.Where(x => !string.IsNullOrWhiteSpace(x) && x.StartsWith(this.classifierFieldLicensePrefix));
265+
266+
// Split the license classifiers by the " :: " and take the last part of the string
267+
licenseClassifiers = licenseClassifiers.Select(x => x.Split(this.classifierFieldSeparator).Last()).ToList();
268+
269+
return string.Join(", ", licenseClassifiers);
270+
}
271+
272+
return null;
273+
}
219274
}

0 commit comments

Comments
 (0)