# Order dependent DEFINE statements #rules DEFINE aryl [a] DEFINE arene [c] DEFINE Caryl [#6;a] DEFINE Calkyne [C;$(C#C)] DEFINE Calkene [C;$(C=C)] DEFINE Calkyl [C;!$(C=[!#6])] DEFINE Cstd [#6;!$(*=[!#6])] DEFINE hydroxyl [$([OD1]-*)] DEFINE mercapto [$([SD1]-*)] DEFINE halide [!#6;$([F,Cl,Br,I])] DEFINE iodine [I] DEFINE lg_halide [Br,I] DEFINE hetatm [!#6;$([N,O,S,F,Cl,Br,I,P])] DEFINE Scarbonyl [S;D1;$(S=C)] DEFINE Cthiocarbonyl [C;$(C=[$Scarbonyl])] DEFINE Ocarbonyl [O;D1;$(O=C)] DEFINE Ccarbonyl [C;$(C=[$Ocarbonyl])] DEFINE Ssulfoxide [S;D3;$(S(=O)([#6])[#6])] DEFINE Ssulfone [S;$(S(=O)(=O)([#6])[#6])] DEFINE Ssulfide [S;D2;$(S([#6])[#6])] DEFINE pseudo_amine [N;!$(N*=[!#6])] DEFINE amine [N;!$(N*=[!#6]);!$(N~[!#6]);!$(Na);!$(N#*);!$(N=*)] DEFINE ring [R] DEFINE Ntertiary_amine [$amine;D3] DEFINE Nsecondary_amine [$amine;D2] DEFINE Nprimary_amine [$amine;D1] DEFINE Ophenol [$hydroxyl;$(Oc)] DEFINE Operoxide [O;$(O[$hydroxyl])] DEFINE Nnitro [N;+0,+1;$(N(=O)~[OD1])] DEFINE Cnitrile [C;$(C#[N;D1])] DEFINE Nnitrile [N;$(N#[$Cnitrile])] DEFINE Nisothiocyanate [N;$(N(=C=S)*)] DEFINE Cisothiocyanate [C;$(C=[$Nisothiocyanate])] DEFINE Nisocyanate [N;$(N(=C=O)*)] DEFINE Cisocyanate [C;$(C=[$Nisocyanate])] DEFINE Nhydrazone [N;$(N[N;D2]=C)] DEFINE hydroxylamine [$pseudo_amine;$(N[$hydroxyl]);!$(N=*)] DEFINE Nhydrazine [N;$(N-[N;D1]);!$(N=C)] DEFINE Cepoxide [C;$(C1CO1)] DEFINE Oepoxide [O;$(O([$Cepoxide])[$Cepoxide])] DEFINE Pphosphoric_ester [P;$(P(=O)(O)O*)] DEFINE Pphosphoric_acid [P;$(P(=O)(O)[$hydroxyl])] DEFINE Pphosphonic_ester [P;$(P(=O)(=O)O*)] DEFINE Pphosphonic_acid [P;$(P(=O)(=O)[$hydroxyl])] DEFINE Ssulfonic_ester [S;$(S(=O)(=O)O*)] DEFINE Ssulfonic_acid [S;$(S(=O)(=O)[$hydroxyl])] DEFINE Cketone [$Ccarbonyl;$(C([#6])[#6])] DEFINE Caldehyde [$Ccarbonyl;$([H1,H2]);!$(C-[$hetatm])] DEFINE Cester [$Ccarbonyl;$(C(=O)O[#6]);$(C[#6,#1])] DEFINE Cthioester [$Cthiocarbonyl;$(C(=S)O[#6]);$(C[#6,#1])] DEFINE Clactone [$Cester;R] DEFINE Cacid_halide [$Ccarbonyl;$(C[$halide]);$(C[#6,#1])] DEFINE Cacid_chloride [$Cacid_halide;$(CCl)] DEFINE Ccarboxylic_acid [$Ccarbonyl;$(C[$hydroxyl]);$(C[#6,#1])] DEFINE Ocarboxylic_acid [$hydroxyl;$(O[$Ccarboxylic_acid])] DEFINE Cthiourea [$Cthiocarbonyl;$(C(=S)(N)N)] DEFINE Nthiourea [N;$(N[$Cthiourea])] DEFINE Ccarbonate [$Ccarbonyl;$(C(=O)(O)O)] DEFINE Ocarbonate [O;$(O[$Ccarbonate])] DEFINE Ccarbamate [$Ccarbonyl;$(C(=O)(O)N)] DEFINE Ncarbamate [N;$(N[$Ccarbamate])] DEFINE Ocarbamate [O;$(O[$Ccarbamate])] DEFINE Cthiocarbamate [$Cthiocarbonyl;$(C(=S)(O)N)] DEFINE Nthiocarbamate [N;$(N[$Cthiocarbamate])] DEFINE Othiocarbamate [O;$(O[$Cthiocarbamate])] DEFINE Ccarbamic_acid [$Ccarbamate;$(C[$hydroxyl])] DEFINE Nazo [N;D2;$(N=[N;D2]);!$(N(=N)~[$hetatm]);!$(N=N~[$hetatm])] DEFINE N12triazine [N;$(N-N=N);D2,D3] DEFINE N1triazine [N;$(N=N-N);D2] DEFINE Nazide [N;$(N=[N+]=[N-])] DEFINE Canhydride [$Ccarbonyl;$(CO[$Ccarbonyl])] DEFINE Camide [$Ccarbonyl;$(CN);!$(C(N)(=O)[!#6])] DEFINE Namide [N;$(N[$Camide])] DEFINE Cthioamide [$Cthiocarbonyl;$(CN);!$(C(N)(=S)[!#6])] DEFINE Nthioamide [N;$(N[$Cthioamide])] DEFINE Clactam [$Camide;R] DEFINE Nlactam [$Namide;R] DEFINE Sthiol [$mercapto;$(S[#6;!$(C=[!#6])])] DEFINE Oalcohol [$hydroxyl;$(O[C;!$(C=[!#6])])] DEFINE Curea [$Ccarbonyl;$(C(=O)(N)N)] DEFINE Nurea [N;$(N[$Curea])] DEFINE Oether [O;$(O([$Cstd])[$Cstd])] DEFINE Cenol_ether [C;$(C=C[$Oether])] DEFINE Oenol_ether [O;$(OC=[$Cenol_ether])] DEFINE Cenamine [C;$(C=C[N;!$Nnitro])] DEFINE Coxalyl [$Ccarbonyl;$(C[$Ccarbonyl])] DEFINE Ooxalyl [O;$(O=[$Coxalyl])] DEFINE Corganometallic [C;$(C[BD1]),$(C[Mg][$halide]),$(C[Li]),$(C[Cu][Li]),$(C([Ag])#C)] DEFINE Cdithioacetal [C;$(C1[SD2]CCC[SD2]1)] DEFINE Sdithioacetal [S;$(S[$Cdithioacetal])] DEFINE Coxime [C;$(C=N[$hydroxyl])] DEFINE Noxime [N;$(N=[$Coxime])] DEFINE Ooxime [O;$(O[$Noxime])] DEFINE Cimino [C;$(C=[N;!$(N~[$hetatm])])] DEFINE Nimino [N;$(N=[$Cimino])] DEFINE Ssulfonyl_halide [S;$(S(=O)(=O)[$halide])] DEFINE Ssulfonamide [S;$(S(=O)(=O)N)] DEFINE Chemiketal [CD4;$(C(O[$Calkyl])[$hydroxyl])] DEFINE Chemiacetal [CD3H1,CD2H2;$(C(O[$Calkyl])[$hydroxyl])] DEFINE OHhemiketal [O;$hydroxyl;$(O[$Chemiketal])] DEFINE OEhemiacetal [O;$Oether;$(O[$Chemiacetal])] DEFINE Cketal [CD4;$(C(O[$Calkyl])O[$Calkyl])] DEFINE Oketal [O;$(O[$Cketal])] DEFINE Cacetal [CD3H1,CD2H2;$(C(O[$Calkyl])O[$Calkyl])] DEFINE Oacetal [O;$(O[$Cacetal])] DEFINE Sthioether [SD2;$(S([$Calkyl])[$Calkyl])] DEFINE Xaryl_halide [$halide;$(*[$aryl])] DEFINE Xalkyl_halide [$halide;$(*[$Calkyl])] DEFINE Xalkylating_agent [$lg_halide;$(*[$Calkyl])] DEFINE Sdisulfide [S;$(S([$Calkyl])S[$Calkyl])] DEFINE enol_ether [$Cenol_ether] DEFINE enamine [$Cenamine] DEFINE oxalyl [$Coxalyl] DEFINE organometallic [$Corganometallic] DEFINE dithioacetal [$Cdithioacetal] DEFINE oxime [$Coxime] DEFINE imino [$Cimino] DEFINE sulfonyl_halide [$Ssulfonyl_halide] DEFINE sulfonamide [$Ssulfonamide] DEFINE hemiketal [$Chemiketal] DEFINE hemiacetal [$Chemiacetal] DEFINE ketal [$Cketal] DEFINE acetal [$Cacetal] DEFINE thioether [$Sthioether] DEFINE ether [$Oether] DEFINE aryl_halide [$Xaryl_halide] DEFINE alkyl_halide [$Xalkyl_halide] DEFINE alkylating_agent [$Xalkylating_agent] DEFINE disulfide [$Sdisulfide] DEFINE sulfoxide [$Ssulfoxide] DEFINE sulfone [$Ssulfone] DEFINE sulfide [$Ssulfide] DEFINE tertiary_amine [$Ntertiary_amine] DEFINE secondary_amine [$Nsecondary_amine] DEFINE primary_amine [$Nprimary_amine] DEFINE phenol [$Ophenol] DEFINE peroxide [$Operoxide] DEFINE nitro [$Nnitro] DEFINE nitrile [$Cnitrile] DEFINE isothiocyanate [$Nisothiocyanate] DEFINE isocyanate [$Nisocyanate] DEFINE hydrazone [$Nhydrazone] DEFINE hydrazine [$Nhydrazine] DEFINE epoxide [$Cepoxide] DEFINE phosphoric_ester [$Pphosphoric_ester] DEFINE phosphoric_acid [$Pphosphoric_acid] DEFINE phosphonic_ester [$Pphosphonic_ester] DEFINE phosphonic_acid [$Pphosphonic_acid] DEFINE sulfonic_ester [$Ssulfonic_ester] DEFINE sulfonic_acid [$Ssulfonic_acid] DEFINE ketone [$Cketone] DEFINE aldehyde [$Caldehyde] DEFINE lactone [$Clactone] DEFINE ester [$Cester] DEFINE thioester [$Cthioester] DEFINE acid_chloride [$Cacid_chloride] DEFINE acid_halide [$Cacid_halide] DEFINE carboxylic_acid [$Ccarboxylic_acid] DEFINE thiocarbonyl [$Cthiocarbonyl] DEFINE carbonyl [$Ccarbonyl] DEFINE thiourea [$Nthiourea] DEFINE carbonate [$Ocarbonate] DEFINE carbamic_acid [$Ccarbamic_acid] DEFINE carbamate [$Ocarbamate,$Ncarbamate] DEFINE thiocarbamate [$Othiocarbamate,$Nthiocarbamate] DEFINE azo [$Nazo] DEFINE triazine [$N1triazine,$N12triazine] DEFINE azide [$Nazide] DEFINE aniline_unsubstituted [$pseudo_amine;D1;$(N[$aryl]);!$(N~[!#6])] DEFINE aniline [$pseudo_amine;$(N[$aryl]);!$(N~[!#6])] DEFINE anhydride [$Canhydride] DEFINE thioamide [$Cthioamide] DEFINE amide [$Camide] DEFINE lactam [$Clactam] DEFINE alkyne [$Calkyne] DEFINE alkene [$Calkene] DEFINE thiol [$Sthiol] DEFINE alcohol [$Oalcohol] DEFINE urea [$Nurea] DEFINE aryl_mono_BrI [c;$(c[Br,I])] #explicit alkyne & alkene needed to break symmetry DEFINE mono_alkyne [$alkyne;!$(*.C#C)] DEFINE mono_alkene [$alkene;!$(*.C=C)] DEFINE alkyl [$Calkyl] DEFINE nucleophile [$alcohol,$primary_amine&!$([$enamine]),$secondary_amine&!$([$enamine]),$aniline,$phenol,$azide,$hydrazine,$hydroxylamine,$peroxide,$thiol,$oxime] DEFINE malonic [CD3H1,CD2H2;$(C([$Ccarbonyl])[$Ccarbonyl])] DEFINE base [nD2,N&!D4&!$(N*=[!#6])&!$([ND3v4])&!$(N#*)] DEFINE acid [*&$(*=*)&$(*[$hydroxyl]),$malonic] DEFINE charge [$acid,$base] DEFINE long_aliphatic_chain [$([A!r]~[A!r]~[A!r]~[A!r]~[A!r]~[A!r]~[A!r]~[A!r])] DEFINE michael_acceptor [$([CH1,CH2]=CC=O)] DEFINE beta_halo_carbonyl [$([$halide]CCC=O)] DEFINE triphenyl_phosphene [#15;$(P(c)(c)c)] DEFINE unbranched_chain [$([AD2!r]~[AD2!r]~[AD2!r]~[AD2!r,AD1!r])] DEFINE amino_acid [$(NCC(=O)[O,N])] DEFINE di_peptide [$(NCC(=O)NCC(=O)[O,N])] DEFINE hetero_hetero [$([$hetatm;D2]-[$hetatm;D2])] DEFINE halopyrimidine [$halide;$(*c1ncccn1)] DEFINE methyl_ketone [$Cketone;$(*-[CD1])] DEFINE perhalo_ketone [$Cketone;$(*-C([$halide])([$halide])[$halide])] DEFINE aziridine [$(C1NC1)] DEFINE imine [$(C=[ND2])] #smarts rules generated to eliminate all of the compounds with "colors" #in their names from a set of available compounds. this list is #fairly complete, but is empirical and may not remove all possible dyes. #NOTE: these are not recursive, so they cannot be arbitrarily combined with other smarts. DEFINE dye1 O=CC(N)=C(N)C=O DEFINE dye2 OC-C=C-CO DEFINE dye3 [n,c]1c2ccccc2[o,s,n]c3ccccc13 DEFINE dye4 O=C1c2ccccc2C(=O)c3ccccc13 DEFINE dye5 a-N=N-[#6;$(a),$(C(N)N),$(C=*),$(C*=*)] DEFINE dye6 aC1C(=O)a(a23)aaaa2aaaa3C1=O DEFINE dye7 a-[C,N]=C(C=C1)C=CC1=[N,O] DEFINE dye8 CN1C(=O)c2cccc(c23)cccc3C1=O DEFINE dye9 a-C(=O)[R0]=[R0]-a DEFINE dye10 a-N#N DEFINE dye11 aC(a)a DEFINE dye12 O=c1ncc(cc2)c3c4c2cnc(=O)c4ccc13 DEFINE dye13 O=S1(=O)Oc2ccccc2C1(a)[#6] DEFINE dye14 O=C(O1)c2ccccc2C1(a)[#6] DEFINE dye15 O=S(=O)(O1)c2ccccc2C1(a)[#6] DEFINE dye16 Nc(cc1)ccc1Nc(cc2)ccc2[N,O] DEFINE dye17 O=C1c2ccccc2C(=O)C3C=CC=CC13 DEFINE dye18 n1c2CC=CCc2cc3CC=CC=c13 DEFINE dye19 N=C-N-C(C=N)=CN DEFINE dye20 O~S(~O)(~O)aa-[R0]=[R0]-aaS(~O)(~O)~O DEFINE dye21 O~S(~O)(~O)c(cc1)ccc1N(C2=O)N=CC2-C=C DEFINE dye22 NS(=O)(=O)aaaN DEFINE dye23 CC1(C)c2ccccc2N=C1-C=[R0] DEFINE dye24 aC(aaaa-N)(aaaa-N) DEFINE dye25 na-C=C-a DEFINE dye26 Cc1n[$(nn-a)]nn1 DEFINE dye27 O=C1c2ccccc2SC1=C3Sc4ccccc4C3=O DEFINE dye28 Clc1c2Oc3ccccc3N=c2c(Cl)c4Oc5ccccc5N=c14 DEFINE dye29 [$(N(=O)=O),$([N+](=O)[O-]);$(N-aa-N),$(N-aaaa-N)] DEFINE dye30 a-N-N=*-a DEFINE dye31 O=C1c2cccc(ccc3)c2c3-c4ccccc14 DEFINE dye32 O=C1c2ccccc2C(=O)C1-[$(an),$(aan)] # Functional group PATTERN statements PATTERN aryl [a] PATTERN arene [c] PATTERN hydroxyl [$hydroxyl] PATTERN mercapto [$mercapto] PATTERN halide [$halide] PATTERN iodine [$iodine] PATTERN hetatm [$hetatm] PATTERN pseudo_amine [$pseudo_amine] PATTERN amine [$amine] PATTERN ring [R] PATTERN hydroxylamine [$hydroxylamine] PATTERN enol_ether [$Cenol_ether] PATTERN enamine [$Cenamine] PATTERN oxalyl [$Coxalyl] PATTERN organometallic [$Corganometallic] PATTERN dithioacetal [$Cdithioacetal] PATTERN oxime [$Coxime] PATTERN imino [$Cimino] PATTERN sulfonyl_halide [$Ssulfonyl_halide] PATTERN sulfonamide [$Ssulfonamide] PATTERN hemiketal [$Chemiketal] PATTERN hemiacetal [$Chemiacetal] PATTERN ketal [$Cketal] PATTERN acetal [$Cacetal] PATTERN thioether [$Sthioether] PATTERN ether [$Oether] PATTERN aryl_halide [$Xaryl_halide] PATTERN alkyl_halide [$Xalkyl_halide] PATTERN alkylating_agent [$Xalkylating_agent] PATTERN disulfide [$Sdisulfide] PATTERN sulfoxide [$Ssulfoxide] PATTERN sulfone [$Ssulfone] PATTERN sulfide [$Ssulfide] PATTERN tertiary_amine [$Ntertiary_amine] PATTERN secondary_amine [$Nsecondary_amine] PATTERN primary_amine [$Nprimary_amine] PATTERN phenol [$Ophenol] PATTERN peroxide [$Operoxide] PATTERN nitro [$Nnitro] PATTERN nitrile [$Cnitrile] PATTERN isothiocyanate [$Nisothiocyanate] PATTERN isocyanate [$Nisocyanate] PATTERN hydrazone [$Nhydrazone] PATTERN hydrazine [$Nhydrazine] PATTERN epoxide [$Cepoxide] PATTERN phosphoric_ester [$Pphosphoric_ester] PATTERN phosphoric_acid [$Pphosphoric_acid] PATTERN phosphonic_ester [$Pphosphonic_ester] PATTERN phosphonic_acid [$Pphosphonic_acid] PATTERN sulfonic_ester [$Ssulfonic_ester] PATTERN sulfonic_acid [$Ssulfonic_acid] PATTERN ketone [$Cketone] PATTERN aldehyde [$Caldehyde] PATTERN lactone [$Clactone] PATTERN ester [$Cester] PATTERN thioester [$Cthioester] PATTERN acid_chloride [$Cacid_chloride] PATTERN acid_halide [$Cacid_halide] PATTERN carboxylic_acid [$Ccarboxylic_acid] PATTERN thiocarbonyl [$Cthiocarbonyl] PATTERN carbonyl [$Ccarbonyl] PATTERN thiourea [$Nthiourea] PATTERN carbonate [$Ocarbonate] PATTERN carbamic_acid [$Ccarbamic_acid] PATTERN carbamate [$Ocarbamate,$Ncarbamate] PATTERN thiocarbamate [$Othiocarbamate,$Nthiocarbamate] PATTERN azo [$Nazo] PATTERN triazine [$N1triazine,$N12triazine] PATTERN azide [$Nazide] PATTERN aniline_unsubstituted [$aniline_unsubstituted] PATTERN aniline [$aniline] PATTERN anhydride [$Canhydride] PATTERN thioamide [$Cthioamide] PATTERN amide [$Camide] PATTERN lactam [$Clactam] PATTERN alkyne [$Calkyne] PATTERN alkene [$Calkene] PATTERN thiol [$Sthiol] PATTERN alcohol [$Oalcohol] PATTERN urea [$Nurea] PATTERN aryl_mono_BrI [$aryl_mono_BrI] PATTERN mono_alkyne [$mono_alkyne] PATTERN mono_alkene [$mono_alkene] PATTERN alkyl [$Calkyl] PATTERN nucleophile [$nucleophile] PATTERN malonic [$malonic] PATTERN base [$base] PATTERN acid [$acid] PATTERN charge [$charge] PATTERN long_aliphatic_chain [$long_aliphatic_chain] PATTERN michael_acceptor [$michael_acceptor] PATTERN triphenyl_phosphene [$triphenyl_phosphene] PATTERN unbranched_chain [$unbranched_chain] PATTERN amino_acid [$amino_acid] PATTERN di_peptide [$di_peptide] PATTERN beta_halo_carbonyl [$beta_halo_carbonyl] PATTERN hetero_hetero [$hetero_hetero] PATTERN halopyrimidine [$halopyrimidine] PATTERN methyl_ketone [$methyl_ketone] PATTERN perhalo_ketone [$perhalo_ketone] PATTERN aziridine [$aziridine] PATTERN imine [$imine] #smarts rules generated to eliminate all of the compounds with "colors" #in their names from a set of available compounds. this list is #fairly complete, but is empirical and may not remove all possible dyes. PATTERN dye [$dye1,$dye2,$dye3,$dye4,$dye5,$dye6,$dye7,$dye8,$dye9,$dye10,$dye11,$dye12,$dye13,$dye14,$dye15,$dye16,$dye17,$dye18,$dye19,$dye20,$dye21,$dye22,$dye23,$dye24,$dye25,$dye26,$dye27,$dye28,$dye29,$dye30,$dye31,$dye32] #specific undesiriable functional groups not build from general pieces #NOTE, these do not return a single atom as match PATTERN Carbazides O=CN=[N+]=[N-] PATTERN Acid_anhydrides C(=O)OC(=O) PATTERN Pentafluorophenyl_esters C(=O)Oc1c(F)c(F)c(F)c(F)c1(F) PATTERN Paranitrophenyl_esters C(=O)Oc1ccc(N(=O)=O)cc1 PATTERN HOBT_esters C(=O)Onnn PATTERN Triflates OS(=O)(=O)C(F)(F)F PATTERN Lawesson_s_reagent P(=S)(S)S PATTERN Phosphoramides NP(=O)(N)N PATTERN Aromatic_azides cN=[N+]=[N-] PATTERN Beta_carbonyl_quart_nitrogen C(=O)C[N+,n+] PATTERN Acylhydrazide [!a][N;R0][N;R0]C(=O) PATTERN Quarternary_C_Cl_I_P_or_S [C+,Cl+,I+,P+,S+] PATTERN Phosphoranes C=P PATTERN Chloramidines [Cl]C([C&R0])=N PATTERN Nitroso [N&D2](=O) PATTERN P_S_Halides [P,S][Cl,Br,F,I] PATTERN Carbodiimide N=C=N PATTERN Isonitrile [N+]#[C-] PATTERN Triacyloxime C(=O)N(C(C=O))OC(=O) PATTERN Cyanohydrins N#CC[OH] PATTERN Acyl_cyanides N#CC(=O) PATTERN Sulfonyl_cyanides S(=O)(=O)C#N PATTERN Cyanophosphonates P(OCC)(OCC)(=O)C#N PATTERN Azocyanamides [N;R0]=[N;R0]C#N PATTERN Azoalkanals [N;R0]=[N;R0]CC=O PATTERN Polyenes C=CC=CC=CC=C PATTERN Saponin_derivatives O1CCCCC1OC2CCC3CCCCC3C2 PATTERN Cytochalasin_derivatives O=C1NCC2CCCCC21 PATTERN Cycloheximide_derivatives O=C1CCCC(N1)=O PATTERN Monensin_derivatives O1CCCCC1C2CCCO2 PATTERN Cyanidin_derivatives [OH]c1cc([OH])cc2[O+]=C(C([OH])=Cc21)c3cc([OH])c([OH])cc3 PATTERN Squalestatin_derivatives C12OCCC(O1)CC2