22 * PubMed Article Details Adapter
33 *
44 * Get detailed information about a specific PubMed article by PMID.
5- * Uses ESummary API to retrieve metadata (ESummary returns JSON, EFetch returns XML).
5+ * Uses EFetch API (XML) for full article details including abstract,
6+ * MeSH terms, keywords, and author affiliations.
67 *
78 * API Documentation:
8- * - ESummary : https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESummary
9+ * - EFetch : https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
910 */
1011
1112import { cli , Strategy } from '@jackwener/opencli/registry' ;
1213import { CliError } from '@jackwener/opencli/errors' ;
1314import {
14- eutilsFetch ,
15- extractAuthors ,
16- extractFirstAuthor ,
17- extractCorrespondingAuthor ,
18- extractDoi ,
19- extractPmcId ,
15+ eutilsFetchText ,
2016 buildPubMedUrl ,
2117 truncateText ,
22- formatArticleType ,
2318} from './utils.js' ;
2419
20+ /**
21+ * Parse EFetch XML response to extract full article details
22+ */
23+ function parseEFetchXml ( xml : string , pmid : string ) {
24+ // Helper: extract text content between tags
25+ const getTag = ( src : string , tag : string ) : string => {
26+ const m = src . match ( new RegExp ( `<${ tag } [^>]*>([\\s\\S]*?)<\/${ tag } >` , 'i' ) ) ;
27+ return m ? m [ 1 ] . replace ( / < [ ^ > ] + > / g, '' ) . trim ( ) : '' ;
28+ } ;
29+
30+ const getAllTags = ( src : string , tag : string ) : string [ ] => {
31+ const re = new RegExp ( `<${ tag } [^>]*>([\\s\\S]*?)<\/${ tag } >` , 'gi' ) ;
32+ const results : string [ ] = [ ] ;
33+ let m ;
34+ while ( ( m = re . exec ( src ) ) !== null ) {
35+ results . push ( m [ 1 ] . replace ( / < [ ^ > ] + > / g, '' ) . trim ( ) ) ;
36+ }
37+ return results ;
38+ } ;
39+
40+ // Abstract - may have multiple AbstractText sections (structured abstract)
41+ const abstractParts = getAllTags ( xml , 'AbstractText' ) ;
42+ const abstract = abstractParts . join ( ' ' ) . replace ( / \s + / g, ' ' ) . trim ( ) ;
43+
44+ // Title
45+ const title = getTag ( xml , 'ArticleTitle' ) ;
46+
47+ // Journal
48+ const journalTitle = getTag ( xml , 'Title' ) ;
49+ const isoAbbreviation = getTag ( xml , 'ISOAbbreviation' ) ;
50+ const volume = getTag ( xml , 'Volume' ) ;
51+ const issue = getTag ( xml , 'Issue' ) ;
52+ const pagination = getTag ( xml , 'MedlinePgn' ) ;
53+
54+ // Publication date
55+ const year = getTag ( xml , 'Year' ) || getTag ( xml , 'MedlineDate' ) . slice ( 0 , 4 ) ;
56+ const month = getTag ( xml , 'Month' ) ;
57+ const day = getTag ( xml , 'Day' ) ;
58+ const fullDate = [ year , month , day ] . filter ( Boolean ) . join ( ' ' ) ;
59+
60+ // Authors and affiliations
61+ const authorBlocks = xml . match ( / < A u t h o r [ ^ > ] * > ( [ \s \S ] * ?) < \/ A u t h o r > / gi) || [ ] ;
62+ const authors : Array < { name : string ; affiliation : string } > = authorBlocks . map ( block => {
63+ const lastName = getTag ( block , 'LastName' ) ;
64+ const foreName = getTag ( block , 'ForeName' ) || getTag ( block , 'Initials' ) ;
65+ const collectiveName = getTag ( block , 'CollectiveName' ) ;
66+ const name = collectiveName || `${ lastName } ${ foreName } ` . trim ( ) ;
67+ const affiliation = getTag ( block , 'Affiliation' ) ;
68+ return { name, affiliation } ;
69+ } ) ;
70+
71+ const allAuthors = authors . map ( a => a . name ) ;
72+ const firstAuthor = allAuthors [ 0 ] || '' ;
73+ const correspondingAuthor = allAuthors [ allAuthors . length - 1 ] || '' ;
74+
75+ // Unique affiliations
76+ const affiliations = [ ...new Set (
77+ authors . map ( a => a . affiliation ) . filter ( Boolean )
78+ ) ] ;
79+
80+ // MeSH terms
81+ const meshBlocks = xml . match ( / < M e s h H e a d i n g > ( [ \s \S ] * ?) < \/ M e s h H e a d i n g > / gi) || [ ] ;
82+ const meshTerms = meshBlocks
83+ . map ( block => getTag ( block , 'DescriptorName' ) )
84+ . filter ( Boolean )
85+ . slice ( 0 , 10 ) ;
86+
87+ // Keywords
88+ const keywords = getAllTags ( xml , 'Keyword' ) . filter ( Boolean ) . slice ( 0 , 10 ) ;
89+
90+ // Article type
91+ const pubTypes = getAllTags ( xml , 'PublicationType' ) . filter ( Boolean ) ;
92+ const articleType = pubTypes [ 0 ] || 'Journal Article' ;
93+
94+ // Language
95+ const language = getTag ( xml , 'Language' ) ;
96+
97+ // IDs: DOI
98+ const doiMatch = xml . match ( / < A r t i c l e I d I d T y p e = " d o i " > ( [ ^ < ] + ) < \/ A r t i c l e I d > / i) ;
99+ const doi = doiMatch ? doiMatch [ 1 ] . trim ( ) : '' ;
100+
101+ const pmcMatch = xml . match ( / < A r t i c l e I d I d T y p e = " p m c " > ( [ ^ < ] + ) < \/ A r t i c l e I d > / i) ;
102+ const pmcId = pmcMatch ? pmcMatch [ 1 ] . trim ( ) : '' ;
103+
104+ return {
105+ pmid,
106+ title,
107+ abstract,
108+ authors : {
109+ list : allAuthors ,
110+ all : allAuthors . slice ( 0 , 10 ) . join ( ', ' ) + ( allAuthors . length > 10 ? ', et al.' : '' ) ,
111+ first : firstAuthor ,
112+ corresponding : correspondingAuthor ,
113+ count : allAuthors . length ,
114+ affiliations,
115+ } ,
116+ journal : {
117+ title : journalTitle ,
118+ isoAbbreviation,
119+ volume,
120+ issue,
121+ pagination,
122+ } ,
123+ publication : {
124+ year,
125+ fullDate,
126+ } ,
127+ ids : {
128+ pmid,
129+ doi,
130+ pmc : pmcId ,
131+ } ,
132+ classification : {
133+ articleType,
134+ pubTypes,
135+ language,
136+ meshTerms,
137+ keywords,
138+ } ,
139+ url : buildPubMedUrl ( pmid ) ,
140+ } ;
141+ }
142+
25143cli ( {
26144 site : 'pubmed' ,
27145 name : 'article' ,
28- description : 'Get detailed information about a PubMed article by PMID' ,
146+ description : 'Get detailed information about a PubMed article by PMID (full abstract, MeSH terms, affiliations) ' ,
29147 strategy : Strategy . PUBLIC ,
30148 browser : false ,
31149 args : [
34152 type : 'string' ,
35153 required : true ,
36154 positional : true ,
37- help : 'PubMed ID (e.g., "37780221", "37158692" )' ,
155+ help : 'PubMed ID (e.g., "37780221")' ,
38156 } ,
39157 {
40158 name : 'output' ,
@@ -43,14 +161,10 @@ cli({
43161 help : 'Output format: table (summary) or json (full details)' ,
44162 } ,
45163 ] ,
46- columns : [
47- 'field' ,
48- 'value' ,
49- ] ,
164+ columns : [ 'field' , 'value' ] ,
50165 func : async ( _page , args ) => {
51166 const pmid = args . pmid . trim ( ) ;
52167
53- // Validate PMID format
54168 if ( ! / ^ \d + $ / . test ( pmid ) ) {
55169 throw new CliError (
56170 'INVALID_ARGUMENT' ,
@@ -59,125 +173,52 @@ cli({
59173 ) ;
60174 }
61175
62- // Use ESummary to get article details (returns JSON, unlike EFetch which returns XML )
63- const esummaryResult = await eutilsFetch ( 'esummary ', {
176+ // Use EFetch to get full article details (XML includes abstract, MeSH, affiliations )
177+ const xml = await eutilsFetchText ( 'efetch ', {
64178 id : pmid ,
179+ rettype : 'abstract' ,
180+ retmode : 'xml' ,
65181 } ) ;
66182
67- const article = esummaryResult . result ?. [ pmid ] ;
68- if ( ! article ) {
183+ if ( ! xml || xml . includes ( '<ERROR>' ) || ! xml . includes ( '<PubmedArticle>' ) ) {
69184 throw new CliError (
70185 'NOT_FOUND' ,
71186 `Article with PMID ${ pmid } not found` ,
72187 'Check the PMID and try again'
73188 ) ;
74189 }
75190
76- // Extract basic info
77- const title = article . title || '' ;
78- const abstract = article . abstract || '' ;
79- const abstractText = typeof abstract === 'string' ? abstract : '' ;
191+ const article = parseEFetchXml ( xml , pmid ) ;
80192
81- // Extract authors
82- const authorList = article . authors || [ ] ;
83- const allAuthors = extractAuthors ( authorList , 10 ) ;
84- const firstAuthor = extractFirstAuthor ( authorList ) ;
85- const correspondingAuthor = extractCorrespondingAuthor ( authorList ) ;
86-
87- // Extract journal info
88- const journalTitle = article . fulljournalname || article . source || '' ;
89- const isoAbbreviation = article . source || '' ;
90-
91- // Extract publication date
92- const pubDate = article . pubdate || '' ;
93- const year = pubDate . split ( ' ' ) [ 0 ] || '' ;
94- const fullDate = pubDate ;
95-
96- // Extract volume, issue, pages
97- const volume = article . volume || '' ;
98- const issue = article . issue || '' ;
99- const pagination = article . pages || '' ;
100-
101- // Extract article IDs
102- const articleIds = article . articleids || [ ] ;
103- const doi = extractDoi ( articleIds ) ;
104- const pmcId = extractPmcId ( articleIds ) ;
105-
106- // Extract MeSH terms and keywords (from ESummary these may not be available)
107- const meshTerms : string [ ] = [ ] ;
108- const keywords : string [ ] = [ ] ;
109-
110- // Extract article type
111- const pubTypeList = article . pubtype || [ ] ;
112- const articleType = formatArticleType ( pubTypeList ) ;
113-
114- // Extract language
115- const language = article . lang ?. [ 0 ] || '' ;
116-
117- // If JSON format requested, return full structured data
118193 if ( args . output === 'json' ) {
119194 return [ {
120195 field : 'data' ,
121- value : JSON . stringify ( {
122- pmid,
123- title,
124- abstract : abstractText ,
125- authors : {
126- all : allAuthors ,
127- first : firstAuthor ,
128- corresponding : correspondingAuthor ,
129- count : authorList ?. length || 0 ,
130- } ,
131- journal : {
132- title : journalTitle ,
133- isoAbbreviation,
134- volume,
135- issue,
136- pagination,
137- } ,
138- publication : {
139- year,
140- fullDate,
141- } ,
142- ids : {
143- pmid,
144- doi,
145- pmc : pmcId ,
146- } ,
147- classification : {
148- articleType,
149- language,
150- meshTerms,
151- keywords,
152- } ,
153- url : buildPubMedUrl ( pmid ) ,
154- } , null , 2 ) ,
196+ value : JSON . stringify ( article , null , 2 ) ,
155197 } ] ;
156198 }
157199
158- // Table format - return key-value pairs
200+ // Table format
159201 const rows : Array < { field : string ; value : string } > = [
160- { field : 'PMID' , value : pmid } ,
161- { field : 'Title' , value : title } ,
162- { field : 'First Author' , value : firstAuthor } ,
163- { field : 'Corresponding Author' , value : correspondingAuthor } ,
164- { field : 'All Authors' , value : truncateText ( allAuthors , 100 ) } ,
165- { field : 'Journal' , value : journalTitle } ,
166- { field : 'Year' , value : year } ,
167- { field : 'Volume/Issue' , value : `${ volume } ${ issue ? `(${ issue } )` : '' } ` } ,
168- { field : 'Pages' , value : pagination } ,
169- { field : 'DOI' , value : doi || 'N/A' } ,
170- { field : 'PMC ID' , value : pmcId || 'N/A' } ,
171- { field : 'Article Type' , value : articleType } ,
172- { field : 'Language' , value : language } ,
173- { field : 'MeSH Terms' , value : meshTerms . join ( ', ' ) || 'N/A' } ,
174- { field : 'Keywords' , value : keywords . join ( ', ' ) || 'N/A' } ,
175- { field : 'Abstract' , value : truncateText ( abstractText , 300 ) || 'N/A' } ,
176- { field : 'URL' , value : buildPubMedUrl ( pmid ) } ,
202+ { field : 'PMID' , value : article . pmid } ,
203+ { field : 'Title' , value : article . title } ,
204+ { field : 'First Author' , value : article . authors . first } ,
205+ { field : 'Corresponding Author' , value : article . authors . corresponding } ,
206+ { field : 'All Authors' , value : truncateText ( article . authors . all , 120 ) } ,
207+ { field : 'Affiliations' , value : truncateText ( article . authors . affiliations [ 0 ] || 'N/A' , 120 ) } ,
208+ { field : 'Journal' , value : article . journal . title || article . journal . isoAbbreviation } ,
209+ { field : 'Year' , value : article . publication . year } ,
210+ { field : 'Volume/Issue' , value : `${ article . journal . volume } ${ article . journal . issue ? `(${ article . journal . issue } )` : '' } ` } ,
211+ { field : 'Pages' , value : article . journal . pagination } ,
212+ { field : 'DOI' , value : article . ids . doi || 'N/A' } ,
213+ { field : 'PMC ID' , value : article . ids . pmc || 'N/A' } ,
214+ { field : 'Article Type' , value : article . classification . articleType } ,
215+ { field : 'Language' , value : article . classification . language } ,
216+ { field : 'MeSH Terms' , value : article . classification . meshTerms . join ( ', ' ) || 'N/A' } ,
217+ { field : 'Keywords' , value : article . classification . keywords . join ( ', ' ) || 'N/A' } ,
218+ { field : 'Abstract' , value : truncateText ( article . abstract , 400 ) || 'N/A' } ,
219+ { field : 'URL' , value : article . url } ,
177220 ] ;
178221
179-
180-
181222 return rows ;
182223 } ,
183224} ) ;
0 commit comments