Python zip() function

Description

The zip() function is used to take an iterable object as a parameter, pack the corresponding elements in the object into tuples, and then return a list composed of these tuples.

If the number of elements in each iterator is inconsistent, the length of the returned list is the same as the shortest object. The tuple can be decompressed into a list using the * operator.

The difference between zip method in Python 2 and Python 3: In Python 3.x, in order to reduce memory, zip() returns an object. If you want to display a list, you need to manually list() conversion.

Grammar

zip syntax:

zip([iterable, ...])

Parameter Description:

iterable – one or more iterators;

Return value

Returns a list of tuples.

Example

The following two examples demonstrate the use of Python2.x and Python3.x zip respectively:

Example (Python 2.0 +)

>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b) # Packed into a list of tuples
[(1, 4), (2, 5), (3, 6)]
>>> zip(a,c) #The number of elements is consistent with the shortest list
[(1, 4), (2, 5), (3, 6)]
>>> zip(*zipped) # Contrary to zip, *zipped can be understood as decompression and returns a two-dimensional matrix formula
[(1, 2, 3), (4, 5, 6)]

Example (Python 3.0 +)

>>> a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b) # Return an object
>>> zipped
<zip object at 0x103abc288>
>>> list(zipped) # list() convert to list
[(1, 4), (2, 5), (3, 6)]
>>> list(zip(a,c)) # The number of elements is consistent with the shortest list
[(1, 4), (2, 5), (3, 6)]

>>> a1, a2 = zip(*zip(a,b)) # Contrary to zip, zip(*) can be understood as decompression and returns a two-dimensional matrix formula
>>> list(a1)
[1, 2, 3]
>>> list(a2)
[4, 5, 6]
>>>

Using the zip() function at work

def mutation_drug_and_no_drug(self):
    df = self.new_gene_mutation_class_df(gene_class='').copy()
    df['mutationtype_ch'] = df['mutationtype_ch'].fillna('--')

    have_drug = []
    no_drug = []
    for by, group in df.groupby(['genesymbol', 'exonintron', 'mutationtype_ch', 'chgvs', 'phgvs', 'vaf']):
        # This type of cancer
        current_mut = group[group['current_tumor']]
        # Non-cancer types
        no_current_mut = group[~group['current_tumor']]

        # This cancer type is sensitive
        current_sensitive = current_mut[current_mut['drugefficacy'] == 'sensitive']
        # This type of cancer is drug-resistant
        current_negative = current_mut[current_mut['drugefficacy'] == 'drugefficacy']

        # This cancer type may be sensitive
        current_maybe_sen = current_mut[
            (current_mut['drugefficacy'] == 'Possibly sensitive')
             & amp; (current_mut['level_grade1'].isin(['Level B', 'Level C', 'Level D']))
             & amp; (current_mut['Evidence_level'] != 'preclinical')
        ]

        # Sensitive to non-cancer types
        no_current_sen = no_current_mut[
            (no_current_mut['drugefficacy'] == 'sensitive')
             & amp; (no_current_mut['Evidence_level'].isin(['FDA', 'NMPA', 'NCCN', 'CSCO']))
        ]
        maybe_benefit = pd.concat([current_maybe_sen, no_current_sen]) # Potentially beneficial drugs

        # Add 'level_grade_new' column
        current_sensitive['level_grade_new'] = current_sensitive['level_grade1'].apply(lambda x: x.split()[-1])
        current_negative['level_grade_new'] = current_negative['level_grade1'].apply(lambda x: x.split()[-1])
        maybe_benefit['level_grade_new'] = maybe_benefit['level_grade1'].apply(lambda x: x.split()[-1])

        current_sensitive_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in zip(current_sensitive['drug_ch'], current_sensitive['level_grade1'])]
        current_negative_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in zip(current_negative['drug_ch'], current_negative['level_grade1'])]
        maybe_benefit_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in zip(maybe_benefit['drug_ch'], maybe_benefit['level_grade1'])]
        
        # Use the chinese_multi_sort() function to sort
        current_sensitive_list = chinese_multi_sort(current_sensitive_list, key=['level_grade_new'], chinese_first=True)
        current_negative_list = chinese_multi_sort(current_negative_list, key=['level_grade_new'], chinese_first=True)
        maybe_benefit_list = chinese_multi_sort(maybe_benefit_list, key=['level_grade_new'], chinese_first=True)

        current_sensitive_list = [i['level_grade_new'] for i in current_sensitive_list]
        current_negative_list = [i['level_grade_new'] for i in current_negative_list]
        maybe_benefit_list = [i['level_grade_new'] for i in maybe_benefit_list]

        maybe_benefit_list = list(set(maybe_benefit_list) - set(current_sensitive_list))
        maybe_benefit_list = [{'drug_ch': drug_ch} for drug_ch in maybe_benefit_list]
        maybe_benefit_list = chinese_multi_sort(maybe_benefit_list, key=['drug_ch'], chinese_first=True)
        maybe_benefit_list = [i['drug_ch'] for i in maybe_benefit_list]

        if all([current_sensitive.empty, current_negative.empty, maybe_benefit.empty]):
            no_drug_group = group[(group['somger'] == 'SOM') & amp; ((group['mtype'] == 'snv') | (group['mtype\ '] == 'skipping'))]
            if not no_drug_group.empty:
                for index, row in no_drug_group.iterrows():
                    exonintron_sort_exon = 0
                    exonintron_sort_intron = 0

                    if 'exon' in row['exonintron']:
                        exonintron_sort_exon = int(row['exonintron'].split('exon')[-1])
                    elif 'intron' in row['exonintron']:
                        exonintron_sort_intron = int(row['exonintron'].split('intron')[-1])

                no_drug.append({
                    'genesymbol': row['genesymbol'],
                    'exonintron': row['exonintron'],
                    'mutationtype_ch': row['mutationtype_ch'],
                    'chgvs': row['chgvs'],
                    'phgvs': row['phgvs'],
                    'vaf': row['vaf'],
                    'exonintron_sort_exon': exonintron_sort_exon,
                    'exonintron_sort_intron': exonintron_sort_intron
                })
                no_drug = chinese_multi_sort(
                    no_drug,
                    key=['genesymbol', 'exonintron_sort_exon', 'exonintron_sort_intron'],
                    chinese_first=True
                )
        else:
            mut = {
                'genesymbol': by[0],
                'fix_mutation': group['fix_mutation'].values[0] if len(group['fix_mutation'].values) else '',
                'current_sensitive': '\\
'.join(current_sensitive_list) if current_sensitive_list else 'None',
                'current_negative': '\\
'.join(current_negative_list) if current_negative_list else 'None',
                'maybe_benefit': '\\
'.join(maybe_benefit_list) if maybe_benefit_list else 'none',
            }
            have_drug.append(mut)
    return have_drug, no_drug